In [1]:
import pandas as pd
import csv 
import geopandas as gpd
from datetime import timedelta
from shapely.geometry import Point
import glob
from shapely import wkt

ModuleNotFoundError: No module named 'geopandas'

In [None]:
#read csv files and adjust to datetime
R_im_date = pd.read_csv('../data/Ramsey/RamseyImageryDates.csv', parse_dates=['date'])
R_im_date['date'] = pd.to_datetime(R_im_date['date'])

#years = range(2015, 2025)
#pd.DataFrame([pd.Timestamp(year=year, month=6, day=15) for year in years], columns = ['date'])
R_sur_date = pd.read_csv('../data/Ramsey/RamseySurveyDates.csv', delimiter=';', index_col=False, parse_dates = ['Ramsey wet/dry date']) 
R_sur_date.rename(columns={'Ramsey wet/dry date':'date'}, inplace=True )
R_sur_date['date'] = pd.to_datetime(R_sur_date['date'], format = '%m/%d/%Y', errors = 'coerce')
R_sur_date = R_sur_date.dropna(subset = ['date'])

R_hyd = pd.read_csv('../data/Ramsey/RamseyHydroData.csv')
R_hyd.rename( columns={'Unnamed: 0':'date'}, inplace=True )
R_hyd['date'] = pd.to_datetime(R_hyd['date'])

R_precipitation = pd.read_csv('../data/Ramsey/daymet_precip.csv')
R_precipitation['system:time_start'] = pd.to_datetime(R_precipitation['system:time_start'])
R_precipitation.rename( columns={'00000000000000000000':'P','system:time_start':'day'}, inplace=True )

R_surveyData = pd.read_csv('../data/Ramsey/Ramsey_surveyData.csv')


In [None]:
#find matching dates between survey and imagery
matching_dates = []
tolerance = timedelta(days = 5)

for date1 in R_sur_date['date']:
    exact_date = False
    tol = False 
    for date2 in R_im_date['date']:
        if date1 == date2:
            matching_dates.append({'Survey': date1, 'Imagery': date2})
            exact_date = True
    if not exact_date:
        for date2 in R_im_date['date']:
            if abs(date1 - date2) <= tolerance:
                matching_dates.append({'Survey': date1, 'Imagery': date2})
                tol = True
        if not tol: 
            for date2 in R_im_date['date']:
                if abs(date1-date2) < timedelta(days = 10): 
                    matching_dates.append({'Survey': date1, 'Imagery': date2})


matching_df = pd.DataFrame(matching_dates)


In [None]:
R_datessurData = pd.merge(matching_df, R_hyd, left_on = 'Survey', right_on = 'date', how = 'left')
R_datesimData = pd.merge(matching_df, R_hyd, left_on = 'Imagery', right_on = 'date')
#R_datessurData = R_datessurData.drop(columns = ['Imagery','date'])
#R_datesimData = R_datesimData.drop(columns = ['Survey','date'])


In [None]:
#sum precipitation for dates in between survey and imagery
def sum_pdatesbetween(d1, d2):
    r = pd.date_range(start=min(d1,d2), end=max(d1,d2))
    return R_hyd[R_hyd['date'].isin(r)]['P [mm]'].sum()

In [None]:
#making a dataframe to determine which imagery dates to use
Rh = pd.DataFrame([])

Rh['Survey'] = matching_df['Survey']
Rh['Imagery'] = matching_df['Imagery']
Rh['sum_P'] = [sum_pdatesbetween(R_datessurData.loc[i, 'Survey'], R_datesimData.loc[i, 'Imagery']) for i in range(len(Rh))]
Rh['Q_s-i'] = (R_datessurData['Q [mm/d]'] - R_datesimData['Q [mm/d]']) / R_datessurData['Q [mm/d]'] * 100
Rh['Use/not'] = ['use', 'use', 'use', 'use', 'not', 'not',
                 'not', 'use', 'not', 'use'] 

Rh = Rh.fillna('')

conditions = (Rh['Use/not'] == 'not')

Rh = Rh[~conditions]

Rh = Rh.drop(columns=['Use/not'])

Rh

In [None]:
Rh.to_csv('../data/Ramsey/Ramsey_survey_imagery_hydro.csv', index = 'False')

In [None]:
#merging a data frame to match the survey data to imagery dates
#First adjusting the dates in surveyData, only logged with the respective year and not exact date
R_sur_date['Year'] = R_sur_date['date'].dt.year
R_surveyData['Year'] = R_surveyData['Year'].astype(int)
Rdata = pd.merge(R_surveyData, R_sur_date[['date', 'Year']], on = 'Year', how = 'left')
Rdata = Rdata.drop(columns=['Year', 'Unnamed: 0'])
Rdata.rename(columns = {'date':'Year'}, inplace = True)
Rdata = Rdata.drop_duplicates()
Rdata

In [None]:
#making the survey data with imagery dates into a geodataframe and adding x and y from the geometry to 
#facilitate merge

Rdata['geometry'] = Rdata['geometry'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(Rdata, geometry = 'geometry', crs='EPSG:26912')

gdf['x'] = gdf.geometry.x
gdf['y'] = gdf.geometry.y
gdf = gdf[['geometry', 'x', 'y', 'wetdry', 'Year']]
#gdf['Year'] = gdf['Year'].astype(int) 
#gdf['Year'] = pd.to_datetime(gdf['Year'])
gdf

In [None]:
#Checking for perennial reaches by comparing surveys of each year
perennial = pd.DataFrame(gdf.groupby('geometry')['wetdry'].apply(lambda x: sum(x == 'wet'))).reset_index(drop=False)

# whichever number is reasonable based on data?
perennial = perennial[(perennial['wetdry'] == 6)]

#assume always wet
perennial = perennial.assign(wetdry = 'wet')

#perennial['geometry'] = perennial['geometry'].apply(wkt.loads)
gdf_perennial = gpd.GeoDataFrame(perennial, geometry = 'geometry', crs='EPSG:26912')
gdf_perennial['x'] = gdf_perennial.geometry.x
gdf_perennial['y'] = gdf_perennial.geometry.y

#making the gdf matching the perennial reaches to the imagery dates available 
imagery_perennial = pd.concat([gdf_perennial.assign(imagery = date) for date in R_im_date['date']], ignore_index=True)
imagery_perennial = imagery_perennial[~imagery_perennial['imagery'].isin(Rh['Imagery'])]
imagery_perennial['assumption'] = len(imagery_perennial)*['assumed perennial']
imagery_perennial

In [None]:
filtered_gdf = gdf[gdf['wetdry'] == 'wet']
filtered_gdf.groupby('geometry').count().wetdry.hist()

In [None]:
# merging to be able to use the new precipitation data
R_new_hyd = R_hyd.merge( R_precipitation, left_on = 'date', right_on = 'day')
R_new_hyd = R_new_hyd.drop(columns = ['day', 'P [mm]'])
R_new_hyd.rename( columns={'P':'P [mm]'}, inplace=True )
R_new_hyd.set_index(['date'], drop =True, inplace = True)
R_new_hyd

In [None]:
#function to define assumptions around dates to choose, based on streamflow and precipitation
def tolerance(Q_P_data, date, start, adjust, tolerance_p, P_condition = -999, Q_condition = -999):
    
    sub_grupp = Q_P_data.copy()
    
    if adjust == 'start':      
        sub_grupp = Q_P_data.loc[start:].copy()       
        
    elif adjust == 'end': #reverse index to loop backwards
        sub_grupp = sub_grupp.loc[:start].copy().iloc[::-1]        
    
    else:
        print('Invalid adjust parameter. Please use "start" or "end"')
        return

    
    # Reset index if reversed
    sub_grupp.reset_index(inplace=True)
     
    sub_grupp['Q_diff'] = sub_grupp['Q [mm/d]'].diff().fillna(0)

    if adjust == 'start':
        sub_grupp['tolerance_condition'] = (sub_grupp.Q_diff < tolerance_p * sub_grupp['Q [mm/d]'])

    if adjust == 'end':
        sub_grupp['tolerance_condition'] = (sub_grupp.Q_diff > -tolerance_p * sub_grupp['Q [mm/d]'])
    
    if P_condition == -999 == Q_condition:
        print('not a valid condition')
        return 
        
    elif P_condition == -999:
        if Q_condition > 0:
            sub_grupp['condition'] = sub_grupp['Q [mm/d]'] > Q_condition
        else:
            sub_grupp['condition'] = sub_grupp['Q [mm/d]'] < -Q_condition
            
    elif Q_condition == -999:
        if P_condition > 0:
            sub_grupp['condition'] = sub_grupp['P [mm]'] > P_condition
        else:
            sub_grupp['condition'] = sub_grupp['P [mm]'] < -P_condition
            
    else:
        if (Q_condition > 0) & (P_condition > 0):
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] > Q_condition) & (sub_grupp['P [mm]'] > P_condition)
        elif (Q_condition < 0) & (P_condition > 0):
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] < -Q_condition) & (sub_grupp['P [mm]'] > P_condition)           
        elif (Q_condition > 0) & (P_condition < 0):
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] > Q_condition) & (sub_grupp['P [mm]'] < -P_condition)            
        else:
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] < -Q_condition) & (sub_grupp['P [mm]'] < -P_condition)

    #where conditions are met
    yesgroup = sub_grupp[(sub_grupp['condition'] == True) & (sub_grupp['tolerance_condition'] == True)] 
       
  
    if len(yesgroup) == 0:
        print('No data where conditions are met')
        return pd.DataFrame()
        
    #where conditions are not met
    nogroup = sub_grupp[(sub_grupp['condition'] == False) | (sub_grupp['tolerance_condition'] == False)]  
    
    if len(nogroup) == 0:
        print('nogroup = 0')
        return sub_grupp.loc[yesgroup.index[0]:]

    if yesgroup.index[0] < nogroup.index[0]:
        print('everything is fine')
        return sub_grupp.loc[:nogroup.index[0]]
    
    else:
        print('No valid range found between yesgroup and nogroup indices')
        return pd.DataFrame()




In [None]:
#assumption of dry dates after the survey date


dry_list = []

for date in Rdata['Year'].unique():
    dry1 = tolerance(R_new_hyd, 'date', date, 'start', 0.05, Q_condition = -999, P_condition = -1)
    if len(dry1) == 0:
        print('dry1 is empty')
        continue
    dry1 = dry1[~dry1['date'].isin(Rh['Imagery'])]
    dry_imagery = pd.merge(dry1, R_im_date, on = ['date'], how = 'inner')
    #print(len(dry_imagery))
    dry_points = pd.DataFrame(gdf[gdf['Year']== (date)] .groupby('geometry')['wetdry'].apply(lambda x: (x == 'dry'))).reset_index(drop = False)
    dry_points = dry_points.assign(wetdry = 'dry')
    dry_im_points = [dry_points.assign(imagery = date) for date in dry_imagery['date']]
    #print(len(dry_im_points))
    
    try:
        dry = pd.concat(dry_im_points).drop(columns = ['level_1'])
        dry_list.append(dry)
    except:
        if len(dry_im_points)==0:
            print('No data for date '+ date.strftime('%Y-%m-%d'))
        else:
            dry = dry_im_points[0]
            dry_list.append(dry)
    #print(len(dry))
    

dry_df = pd.concat(dry_list)
dry_df['assumption'] = len(dry_df)*['assumed dry']




In [None]:

gdf['assumption'] = len(gdf)*['survey/imagery match']
gdf_imagery = pd.merge(gdf, Rh, left_on = 'Year', right_on = 'Survey', how = 'left')
gdf_imagery = gdf_imagery.drop(columns=['Survey', 'sum_P', 'Q_s-i', 'Year'])
all_expanded = pd.concat([gdf_imagery, imagery_perennial, dry_df])
all_expanded = gpd.GeoDataFrame(all_expanded, geometry = 'geometry', crs='EPSG:26912')
all_expanded['x'] = all_expanded.geometry.x
all_expanded['y'] = all_expanded.geometry.y
all_expanded = all_expanded.rename(columns = {'imagery':'date_first'})
all_expanded['date'] = all_expanded['Imagery'].combine_first(all_expanded['date_first'])
all_expanded

In [None]:
#reading and concatenating the processed imagery 
path = '../data/Ramsey/processed_imagery'

processed_imagery = glob.glob(path + '/*.csv')
processed_imagery.sort(key = lambda x: int(x.split('_buffer_')[1].split('.')[0]))

con_ready_imagery = []
for processed in processed_imagery:
    df= pd.read_csv(processed)
    con_ready_imagery.append(df)

concatenated = pd.concat(con_ready_imagery)

In [None]:
#turning the processed imagery into a gdf with x and y columns 
concatenated['geometry'] = concatenated['geometry'].apply(wkt.loads)
gdf_processed = gpd.GeoDataFrame(concatenated, geometry = 'geometry', crs='EPSG:26912')
gdf_processed['date'] = pd.to_datetime(gdf_processed['date'], format='%Y%m%d')
gdf_processed['x'] = gdf_processed.geometry.x
gdf_processed['y'] = gdf_processed.geometry.y

In [None]:
gdf_processed = gdf_processed.drop_duplicates()
gdf_processed

In [None]:
precision = 5
all_expanded['x'] = all_expanded['x'].round(precision)
all_expanded['y'] = all_expanded['y'].round(precision)
gdf_processed['x'] = gdf_processed['x'].round(precision)
gdf_processed['y'] = gdf_processed['y'].round(precision)

In [None]:
#merged = all_expanded.merge(gdf2, on=['date', 'x', 'y'])
#merged['geometry'] = merged['geometry'].apply(wkt.loads)
#merged = gpd.GeoDataFrame(merged, geometry = 'geometry', crs='EPSG:26912')
#merged['x'] = merged.geometry.x
#merged['y'] = merged.geometry.y


In [None]:
merged = all_expanded.merge(gdf2, on=['date', 'x', 'y'])
if len(merged) == 0:
    print("Merge returned empty. Doing a spatial join based on proximity.")
    # Perform a nearest spatial join
    result = gpd.sjoin_nearest(all_expanded, gdf_processed, how='inner', max_distance=0.4)  # Adjust max_distance as needed
    result = result[result['date_left'] == result['date_right']]
    result = result.drop(columns = ['geometry', 'x_right', 'y_right', 'date_right', 'index_right', 'Imagery', 'date_first'])

In [None]:
result = result.rename(columns = {'x_left':'x', 'y_left':'y', 'date_left':'date'})
result_sorted = result.sort_values(by='date')

In [None]:
result_sorted = result_sorted.drop_duplicates()
result_sorted.dropna(inplace = True)
result_sorted

In [None]:
result_sorted.to_csv('../data/Ramsey/processed_assumptions/processed_with_dates_and_assumptions.csv', encoding='utf-8', index=False)

In [None]:
#in case geometry gets fixed and the file increases in size as a result 
#start = 0
#splitnum = ?
#for i in range(1,splitnum+1):
#    newstart = int(len(result_sorted)/splitnum*i)
#    result_sorted.iloc[start:newstart].to_csv('../data/Ramsey/processed_assumptions/processed_with_dates_and_assumptions'+str(i)+'.csv',index=False,
#                      float_format='%.2f')
#    start = newstart

