### Merging turbidity to SSC samples

In [1]:
import pandas as pd
import numpy as np
# import data from csv, Date_Time column as index
spring_SSC = pd.read_csv('../../../data/SSC/corrected_SSC/samples_spring_SSC_corrected.csv', parse_dates=['Date_Time'], index_col='Date_Time')
summer_SSC = pd.read_csv('../../../data/SSC/corrected_SSC/samples_summer_SSC_corrected.csv', parse_dates=['Date_Time'], index_col='Date_Time')
spring_turbidity = pd.read_csv('../../../data/SSC/turb_SP23_clean_complete.csv', parse_dates=['Date_Time'], index_col='Date_Time')
summer_turbidity = pd.read_csv('../../../data/SSC/turb_SM23_clean_complete.csv', parse_dates=['Date_Time'], index_col='Date_Time')

In [2]:
# check for and remove duplicate datetime indices
print("Spring turbidity duplicates:", spring_turbidity.index.duplicated().sum())
print("Summer turbidity duplicates:", summer_turbidity.index.duplicated().sum())
# remove duplicates by keeping the first occurrence
spring_turbidity = spring_turbidity[~spring_turbidity.index.duplicated(keep='first')]
summer_turbidity = summer_turbidity[~summer_turbidity.index.duplicated(keep='first')]
print("Spring turbidity duplicates:", spring_turbidity.index.duplicated().sum())
print("Summer turbidity duplicates:", summer_turbidity.index.duplicated().sum())

Spring turbidity duplicates: 0
Summer turbidity duplicates: 47
Spring turbidity duplicates: 0
Summer turbidity duplicates: 0


In [3]:
# resample to 1 min intervals 
spring_turbidity = spring_turbidity.resample('1min').mean().interpolate()
summer_turbidity = summer_turbidity.resample('1min').mean().interpolate()

In [4]:
# merge SSC and turbidity data on Date_Time index of SSC data
spring_merged = pd.merge(spring_SSC, spring_turbidity, left_index=True, right_index=True, how='inner')
summer_merged = pd.merge(summer_SSC, summer_turbidity, left_index=True, right_index=True, how='inner')

In [5]:
# export as csv
spring_merged.to_csv('spring_merged_SSC_turbidity.csv')
summer_merged.to_csv('summer_merged_SSC_turbidity.csv')