In [1]:
import pandas as pd
import csv 
import geopandas as gpd
from datetime import timedelta
from shapely.geometry import Point
from shapely import wkt
import glob
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'geopandas'

In [None]:
#read csv files and adjust to datetime
C_im_date = pd.read_csv('../data/Cienega/CienegaImageryDates.csv', parse_dates=['date'])
C_sur_date = pd.read_csv('../data/Cienega/Cienega_survey_dates.csv', parse_dates=['Cienega date'])
C_sur_date['Cienega date'] = pd.to_datetime(C_sur_date['Cienega date'])
C_im_date['date'] = pd.to_datetime(C_im_date['date'])

C_hyd = pd.read_csv('../data/Cienega/CienegaHydroData.csv')
C_hyd['datetime'] = pd.to_datetime(C_hyd['datetime'])

C_precipitation = pd.read_csv('../data/Cienega/daymet_precip.csv')
C_precipitation['system:time_start'] = pd.to_datetime(C_precipitation['system:time_start'])
C_precipitation.rename( columns={'00000000000000000000':'P','system:time_start':'day'}, inplace=True )

C_sur_date = C_sur_date.dropna(subset=['Cienega date'])
C_im_date = C_im_date.dropna(subset=['date'])

C_surveyData = pd.read_csv('../data/Cienega/Cienega_surveyData.csv')
C_surveyData['Year'] = pd.to_datetime(C_surveyData['Year'])


In [None]:
#finding closest matching dates between survey and imagery
matching_dates = []
tolerance = timedelta(days = 5)


for date1 in C_sur_date['Cienega date']:
    exact_date = False
    tol = False 
    for date2 in C_im_date['date']:
        if date1 == date2:
            matching_dates.append({'Survey': date1, 'Imagery': date2})
            exact_date = True
    if not exact_date:
        for date2 in C_im_date['date']:
            if abs(date1 - date2) <= tolerance:
                matching_dates.append({'Survey': date1, 'Imagery': date2})
                tol = True
        if not tol:
            for date2 in C_im_date['date']:
                if abs(date1-date2) < timedelta(days = 10): 
                    matching_dates.append({'Survey': date1, 'Imagery': date2})


matching_dates_df = pd.DataFrame(matching_dates)



In [None]:
#merging dfs to use to determine imagery dates for survey dates
C_datessurData = pd.merge(matching_dates_df, C_hyd, left_on = 'Survey', right_on = 'datetime', how = 'left')
C_datesimData = pd.merge(matching_dates_df, C_hyd, left_on = 'Imagery', right_on = 'datetime')
C_datessurData = C_datessurData.drop(columns = ['Imagery','datetime'])
C_datesimData = C_datesimData.drop(columns = ['Survey','datetime'])

In [None]:
#sum precipitation for dates in between survey and imagery
def sum_pdatesbetween(d1, d2):
    r = pd.date_range(start=min(d1,d2), end=max(d1,d2))
    return C_hyd[C_hyd['datetime'].isin(r)]['P [mm]'].sum()

In [None]:
#making a dataframe to determine which imagery dates to use
Ch = pd.DataFrame([])

Ch['Survey'] = matching_dates_df['Survey']
Ch['Imagery'] = matching_dates_df['Imagery']
Ch['sum_P'] = [sum_pdatesbetween(C_datessurData.loc[i, 'Survey'], C_datesimData.loc[i, 'Imagery']) for i in range(len(Ch))]
Ch['Q_diff [%]'] = (C_datessurData['Q [mm/d]'] - C_datesimData['Q [mm/d]']) / C_datessurData['Q [mm/d]'] * 100
Ch['Use/not'] = ['use', 'use', 'use', 'use', 'not', 'not', 'use?',
                 'not', 'only option', 'not', 'not', 'use', 'not',
                 'not', 'not', 'use', 'not', 'not', 'not', 'not',
                 'use', 'use', 'use', 'only option','use', 'use', 
                 'use','use', 'not', 'not', 'not', 'not', 'use', 
                 'not', 'use', 'use', 'use', 'use', 'use', 'not', 
                 'not', 'not', 'not', 'use', 'not', 'not', 'not', 
                 'not', 'not', 'use', 'not', 'not', 'not', 'use', 
                 'not', 'use', 'use']


conditions = (Ch['sum_P'] > 3) | (Ch['Q_diff [%]'] > 8) | (Ch['Use/not'] == 'not')

Ch = Ch[~conditions]

Ch = Ch.drop(columns=['Use/not'])


In [None]:
Ch.to_csv('../data/Cienega/Cienega_survey_imagery_HydroData.csv', encoding='utf-8', index=False)

In [None]:
#reading surveydata, making it into a geodataframe and adding x and y from the geometry to facilitate merge later 
C_surveyData['geometry'] = C_surveyData['geometry'].apply(wkt.loads)
gdf = gpd.GeoDataFrame(C_surveyData, geometry = 'geometry', crs='EPSG:26912')
gdf['x'] = gdf.geometry.x
gdf['y'] = gdf.geometry.y
gdf = gdf[['geometry', 'x', 'y', 'wetdry', 'Year']]
gdf['Year'] = pd.to_datetime(gdf['Year'])

gdf

In [None]:
# using new data for precipitation 
C_new_hyd = C_hyd.merge(C_precipitation, left_on = 'datetime', right_on = 'day')
C_new_hyd = C_new_hyd.drop(columns = ['day', 'P [mm]'])
C_new_hyd.rename( columns={'P':'P [mm]'}, inplace=True )
C_new_hyd.set_index(['datetime'], inplace = True)

In [None]:
# function to define assumptions around dates to choose, based on streamflow and precipitation
# Q_condition could be completely removed
def tolerance(Q_P_data, date, start, adjust, tolerance_p, P_condition = -999, Q_condition = -999):
    
    sub_grupp = Q_P_data.copy()
    
    if adjust == 'start':      
        sub_grupp = Q_P_data.loc[start:].copy()       
        
    elif adjust == 'end': #reverse index to loop backwards
        sub_grupp = sub_grupp.loc[:start].copy().iloc[::-1]        
    
    else:
        print('Invalid adjust parameter. Please use "start" or "end"')
        return

    
    # Reset index if reversed
    sub_grupp.reset_index(inplace=True)

    #creating a column for difference in streamflow
    sub_grupp['Q_diff'] = sub_grupp['Q [mm/d]'].diff().fillna(0)

    #checking to see if streamflow is overall decreasing, but a tolerance of x for any daily increase
    if adjust == 'start':
        sub_grupp['tolerance_condition'] = (sub_grupp.Q_diff < tolerance_p * sub_grupp['Q [mm/d]'])

    if adjust == 'end':
        sub_grupp['tolerance_condition'] = (sub_grupp.Q_diff > -tolerance_p * sub_grupp['Q [mm/d]'])

    
    if P_condition == -999 == Q_condition:
        print('not a valid condition')
        return 
        
    elif P_condition == -999:
        if Q_condition > 0:
            sub_grupp['condition'] = sub_grupp['Q [mm/d]'] > Q_condition
        else:
            sub_grupp['condition'] = sub_grupp['Q [mm/d]'] < -Q_condition
            
    elif Q_condition == -999:
        if P_condition > 0:
            sub_grupp['condition'] = sub_grupp['P [mm]'] > P_condition
        else:
            sub_grupp['condition'] = sub_grupp['P [mm]'] < -P_condition
            
    else:
        if (Q_condition > 0) & (P_condition > 0):
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] > Q_condition) & (sub_grupp['P [mm]'] > P_condition)
        elif (Q_condition < 0) & (P_condition > 0):
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] < -Q_condition) & (sub_grupp['P [mm]'] > P_condition)           
        elif (Q_condition > 0) & (P_condition < 0):
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] > Q_condition) & (sub_grupp['P [mm]'] < -P_condition)            
        else:
            sub_grupp['condition'] = (sub_grupp['Q [mm/d]'] < -Q_condition) & (sub_grupp['P [mm]'] < -P_condition)

    # where both conditions are true
    yesgroup = sub_grupp[(sub_grupp['condition'] == True) & (sub_grupp['tolerance_condition'] == True)] 
       
    
    if len(yesgroup) == 0:
        print('No data where conditions are met')
        return pd.DataFrame()

    #the first instance where conditions are false after conditions hev been met 
    nogroup = sub_grupp[(sub_grupp['condition'] == False) | (sub_grupp['tolerance_condition'] == False)]  
    
    if len(nogroup) == 0:
        print('nogroup = 0')
        return sub_grupp.loc[yesgroup.index[0]:]

    if yesgroup.index[0] < nogroup.index[0]:
        print('everything is fine')
        return sub_grupp.loc[:nogroup.index[0]]
    
    else:
        print('No valid range found between yesgroup and nogroup indices')
        return pd.DataFrame()



In [None]:
# assuming perennial reaches
perennial = pd.DataFrame(gdf.groupby('geometry')['wetdry'].apply(lambda x: sum(x == 'wet'))).reset_index(drop=False)
perennialcount = pd.DataFrame(gdf.groupby('geometry')['wetdry'].count()).reset_index(drop=False)

# whichever number is reasonable based on data?
perennial = perennial[(perennial['wetdry'] >= (perennialcount['wetdry']))]

#assume always wet
perennial = perennial.assign(wetdry = 'wet')

#perennial['geometry'] = perennial['geometry'].apply(wkt.loads)
gdf_perennial = gpd.GeoDataFrame(perennial, geometry = 'geometry', crs='EPSG:26912')
gdf_perennial['x'] = gdf_perennial.geometry.x
gdf_perennial['y'] = gdf_perennial.geometry.y

#making the gdf matching the perennial reaches to all the imagery dates available 
imagery_perennial = pd.concat([gdf_perennial.assign(imagery = date) for date in C_im_date['date']], ignore_index=True)
imagery_perennial = imagery_perennial[~imagery_perennial['imagery'].isin(Ch['Imagery'])]
imagery_perennial['assumption'] = len(imagery_perennial)*['assumed perennial']
imagery_perennial 

In [None]:
#assuming wet stretches for the dates before
#assumption is made with 5 % difference in streamflow and for dates before survey when in a recession 

wet_list = []

for date in C_surveyData['Year'].unique():
            
    wet1 = tolerance(C_new_hyd, 'datetime', date, 'end', 0.05, Q_condition = -999, P_condition = -1)
    if len(wet1) == 0:
        print('wet1 is empty')
        continue
    wet1 = wet1[~wet1['datetime'].isin(Ch['Imagery'])]
    wet_imagery = pd.merge(wet1, C_im_date, left_on = ['datetime'], right_on = ['date'], how = 'inner')
    #print(len(wet_imagery))
    wet_points = pd.DataFrame(gdf[gdf['Year']== (date)].groupby('geometry')['wetdry'].apply(lambda x: sum(x == 'wet'))).reset_index(drop = False)
    wet_points = wet_points[(wet_points['wetdry'] == 1)]
    wet_points = wet_points.assign(wetdry = 'wet')
    wet_im_points = [wet_points.assign(imagery = date) for date in wet_imagery['date']]
        
    try:
        wet = pd.concat(wet_im_points).drop(columns = ['level_1'])
        wet_list.append(wet)
    except:
        if len(wet_im_points)==0:
            print('No data for date '+ date.strftime('%Y-%m-%d'))
        else:
            wet = wet_im_points[0]
            wet_list.append(wet)
        

wet_df = pd.concat(wet_list)


wet_df['assumption'] = len(wet_df)*['assumed wet']




In [None]:
#assuming dry stretches for the dates after
#assumption is made with 5 % difference in streamflow and for dates after survey when in a recession 

dry_list = []


for date in C_surveyData['Year'].unique():
            
    dry1 = tolerance(C_new_hyd, 'datetime', date, 'start', 0.05, Q_condition = -999, P_condition = -1)
    if len(dry1) == 0:
        print('wet1 is empty')
        continue
    dry1 = dry1[~dry1['datetime'].isin(Ch['Imagery'])]
    dry_imagery = pd.merge(dry1, C_im_date, left_on = ['datetime'], right_on = ['date'], how = 'inner')
        #print(len(wet_imagery))
    dry_points = pd.DataFrame(gdf[gdf['Year']== (date)].groupby('geometry')['wetdry'].apply(lambda x: sum(x == 'dry'))).reset_index(drop = False)
    dry_points = dry_points[(dry_points['wetdry'] == 1)].assign(wetdry = 'dry')
    dry_im_points = [dry_points.assign(imagery = date) for date in dry_imagery['date']]
        
    try:
        dry = pd.concat(dry_im_points).drop(columns = ['level_1'])
        dry_list.append(dry)
        
    except:
        if len(dry_im_points)==0:
            print('No data for date '+ date.strftime('%Y-%m-%d'))
        else:
            dry = dry_im_points[0]
            dry_list.append(dry)
        #print(len(dry))
        

dry_df = pd.concat(dry_list)

dry_df['assumption'] = len(dry_df)*['assumed dry']





In [None]:
# concatenate all dfs with assumptions and survey matched to imagery dates and turn to gdf
gdf['assumption'] = len(gdf)*['survey/imagery match']
gdf_imagery = pd.merge(gdf, Ch, left_on = 'Year', right_on = 'Survey', how = 'left')
gdf_imagery = gdf_imagery.drop(columns=['Survey', 'sum_P', 'Q_diff [%]', 'Year'])
all_expanded = pd.concat([gdf_imagery, imagery_perennial, wet_df, dry_df])
all_expanded = gpd.GeoDataFrame(all_expanded, geometry = 'geometry', crs='EPSG:26912')
all_expanded['x'] = all_expanded.geometry.x
all_expanded['y'] = all_expanded.geometry.y
all_expanded = all_expanded.rename(columns = {'imagery':'date_first'})
all_expanded['date'] = all_expanded['Imagery'].combine_first(all_expanded['date_first'])
all_expanded

In [None]:
#reading and concatenating the processed imagery 
path = '../data/Cienega/processed_imagery'

processed_imagery = glob.glob(path + '/*.csv')
processed_imagery.sort(key = lambda x: int(x.split('_buffer_')[1].split('.')[0]))
con_ready_imagery = []
for processed in processed_imagery:
    df= pd.read_csv(processed)
    con_ready_imagery.append(df)

concatenated = pd.concat(con_ready_imagery)

In [None]:
concatenated['geometry'] = concatenated['geometry'].apply(wkt.loads)
gdf_processed = gpd.GeoDataFrame(concatenated, geometry = 'geometry', crs='EPSG:26912')
gdf_processed['date'] = pd.to_datetime(gdf_processed['date'], format='%Y%m%d')
gdf_processed['x'] = gdf_processed.geometry.x
gdf_processed['y'] = gdf_processed.geometry.y

In [None]:
gdf2 = gdf2.drop_duplicates()
gdf2

In [None]:
precision = 6
all_expanded['x'] = all_expanded['x'].round(precision)
all_expanded['y'] = all_expanded['y'].round(precision)
gdf_processed['x'] = gdf_processed['x'].round(precision)
gdf_processed['y'] = gdf_processed['y'].round(precision)

In [None]:
merged = all_expanded.merge(gdf_processed, on=['date', 'x', 'y'])

In [None]:
merged = merged.drop(columns = ['geometry_x', 'geometry_y', 'Imagery', 'date_first']) 
merged_sorted = merged.sort_values(by='date')

In [None]:
merged_sorted = merged_sorted.drop_duplicates()
merged_sorted.dropna(inplace= True)
merged_sorted

In [None]:
start = 0
splitnum = 10
for i in range(1,splitnum+1):
    newstart = int(len(merged_sorted)/splitnum*i)
    merged_sorted.iloc[start:newstart].to_csv('../data/Cienega/processed_assumptions/processed_with_dates_and_assumptions'+str(i)+'.csv',index=False,
                      float_format='%.2f')
    start = newstart


