# Importing file data to an SQL Database (PostgreSQL)

In [1]:
from sqlalchemy import create_engine
import geopandas as gpd
from shapely.geometry import LineString
import numpy as np

from scrollstats import calc_dist, meanfilt, calc_cubic_spline
from config import DB_PW

In [2]:
engine = create_engine(f"postgresql://postgres:{DB_PW}@localhost:5432/scroll")

## Importing ridges
- ridges need to have a sufficient point density for the transecting algorithm to work properly
    - density should be 1p/m
    - if the provided ridges are not at the sufficient denstiy, they will be automatically upscaled
- ridges need to have the following fields
    - `ridge_id`: str : ex. `r_###`
    - `deposit_date`: str : ex. `DD_MM_YYYY`
    - `geometry`: LineString : ex. `LINESTRING([(x,y), (x,y)])`

In [20]:
def check_ridges_for_sql(in_ridges):
    """
    Check the provided ridge gdf to see if it 
        1. Has the correct point density - 1p/m
        2. Has the correct columns - "ridge_id", "deposit_date", "geometry"
        3. Is in the correct CRS - EPSG:4326
    
    If the ridges do not have the correct point density, they will be automatically densified.
    If they do not have the correct columns or they are not of the correct type, an exception will be thrown describing the problem and how to fix it.
    Because this tool has the potential to handle bends from all over the world, the database tables will store the data in a global geographic CRS - EPSG:4326.
    Any ridges returned by this function will be in EPSG:4326

    If the ridges are correct or corrected, the satisfactory ridges will be returned.

    """

    req_density = 1
    req_column_names = ["ridge_id", "deposit_date", "geometry"]
    req_crs = "EPSG:4326"

    if not isinstance(in_ridges, gpd.GeoDataFrame):
        raise ValueError(f"Input ridges is of type {type(in_ridges)}, not GeoDataFrame.")
    
    # Copy in_ridges to not modify the input
    ridges = in_ridges.copy().reset_index()

    # Check for ridge_id
    if not "ridge_id" in ridges.columns:
        print(f"Column 'ridge_id' not found in ridges. Creating 'ridge_id' and populating now")
        ridges["ridge_id"] = [f"r_{i:03d}" for i in range(len(ridges))]
    
    if "ridge_id" in ridges.columns:
        if not all(ridges.ridge_id.str.startswith("r_")):
            raise ValueError(f"Column 'ridge_id' in ridges does not follow the r_### pattern.")

    # Check deposit date
    if not "deposit_date" in ridges.columns:
        print("Column 'deposit_date' not found in ridges. Creating and filling with `np.nan` now")
        ridges['deposit_date'] = np.nan

    # Check geometry column
    if not "geometry" in ridges.columns:
        raise ValueError(f"`Column 'geometry' not found in ridges.")
    
    point_densities = ridges.geometry.apply(lambda x: len(x.coords)/x.length)
    if not all(point_densities.round()==req_density):
        print(f"Point density of input ridges was not sufficient. Needed: {req_density}pt/m, Found: ~{round(point_densities.mean(), 2)}pt/m on average. Densifying and smoothing now.")
        
        ridges.geometry = ridges.geometry.apply(lambda x: meanfilt(x, 5))
        ridges.geometry = ridges.geometry.apply(calc_cubic_spline, spacing=req_density)


        
    return ridges[req_column_names].sort_values("ridge_id").to_crs("EPSG:4326")

    


In [17]:
bend_id = "LBR_025"
local_crs = "EPSG:32140"
ridges = gpd.GeoDataFrame.from_postgis(f"SELECT * FROM ridges WHERE bend_id='{bend_id}'", engine, geom_col="geometry").to_crs(local_crs)

In [18]:
check_ridges_for_sql(ridges)

Column 'deposit_date' not found in ridges. Creating and filling with `np.nan` now


Unnamed: 0,ridge_id,deposit_date,geometry
0,r_000,,"LINESTRING (-96.50682 30.61456, -96.50682 30.6..."
1,r_001,,"LINESTRING (-96.50421 30.61420, -96.50420 30.6..."
5,r_002,,"LINESTRING (-96.50629 30.61662, -96.50628 30.6..."
6,r_003,,"LINESTRING (-96.50589 30.61735, -96.50588 30.6..."
11,r_004,,"LINESTRING (-96.49788 30.61558, -96.49787 30.6..."
2,r_005,,"LINESTRING (-96.50290 30.61448, -96.50289 30.6..."
3,r_006,,"LINESTRING (-96.50183 30.61376, -96.50182 30.6..."
4,r_007,,"LINESTRING (-96.50543 30.61791, -96.50542 30.6..."
7,r_008,,"LINESTRING (-96.50263 30.61631, -96.50263 30.6..."
8,r_009,,"LINESTRING (-96.49730 30.61454, -96.49729 30.6..."


In [19]:
ridges

Unnamed: 0,ridge_id,geometry,bend_id
0,r_000,"LINESTRING (839070.995 4310807.624, 839071.131...",LBR_025
1,r_001,"LINESTRING (839322.240 4310773.193, 839323.154...",LBR_025
2,r_005,"LINESTRING (839447.357 4310805.922, 839448.121...",LBR_025
3,r_006,"LINESTRING (839551.716 4310728.702, 839552.580...",LBR_025
4,r_007,"LINESTRING (839196.750 4311181.295, 839197.067...",LBR_025
5,r_002,"LINESTRING (839117.293 4311036.916, 839117.872...",LBR_025
6,r_003,"LINESTRING (839153.794 4311118.980, 839154.387...",LBR_025
7,r_008,"LINESTRING (839468.356 4311009.465, 839468.823...",LBR_025
8,r_009,"LINESTRING (839983.724 4310824.877, 839984.463...",LBR_025
9,r_010,"LINESTRING (839819.114 4310720.226, 839820.085...",LBR_025


In [27]:
x, y = geom.xy

In [28]:
GetS(x,y)

[0.0,
 1.002839498272528,
 2.005288275560189,
 3.007405687574272,
 4.009245350024806,
 5.010855425839884,
 6.012278883036351,
 7.01355379689236,
 8.014713613516891,
 9.015787448618314,
 10.016800348551913,
 11.017773607762289,
 12.018725028613739,
 13.019669226009333,
 14.020617910264082,
 15.021580172635652,
 16.02256278084746,
 17.023570465658704,
 18.02460620746346,
 19.02567152855705,
 20.02676677721514,
 21.027891441364844,
 22.029044392015475,
 23.03022421706402,
 24.031429484976595,
 25.032659040950676,
 26.03391227336471,
 27.035183706310068,
 28.036455404554292,
 29.03770804387389,
 30.038925259255656,
 31.040096479045662,
 32.04121980101558,
 33.04230485835793,
 34.04337567635478,
 35.044469764592954,
 36.045606898735954,
 37.04679131802329,
 38.0480242842989,
 39.049304252276364,
 40.05062692997716,
 41.05198534334536,
 42.05336563965299,
 43.054737753633674,
 44.05607364225651,
 45.057352545834966,
 46.058561351209775,
 47.0596950069396,
 48.06075692217921,
 49.061774099198

In [31]:
np.cumsum(calc_dist(np.asarray(geom.coords[:-1]), np.asarray(geom.coords[1:])))

array([  1.0028395 ,   2.00528828,   3.00740569,   4.00924535,
         5.01085543,   6.01227888,   7.0135538 ,   8.01471361,
         9.01578745,  10.01680035,  11.01777361,  12.01872503,
        13.01966923,  14.02061791,  15.02158017,  16.02256278,
        17.02357047,  18.02460621,  19.02567153,  20.02676678,
        21.02789144,  22.02904439,  23.03022422,  24.03142948,
        25.03265904,  26.03391227,  27.03518371,  28.0364554 ,
        29.03770804,  30.03892526,  31.04009648,  32.0412198 ,
        33.04230486,  34.04337568,  35.04446976,  36.0456069 ,
        37.04679132,  38.04802428,  39.04930425,  40.05062693,
        41.05198534,  42.05336564,  43.05473775,  44.05607364,
        45.05735255,  46.05856135,  47.05969501,  48.06075692,
        49.0617741 ,  50.06280471,  51.06388863,  52.06504388,
        53.06627108,  54.06755792,  55.06888361,  56.07022336,
        57.07155291,  58.07285801,  59.07413262,  60.07536932,
        61.07656167,  62.07770747,  63.07881203,  64.07