In [9]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets
from ipywidgets import interact, FloatSlider
from IPython.display import display, clear_output

file_path = 'data/Hurricane_Irene_Hudson_River.xlsx'
do_data = pd.read_excel('data/Hurricane_Irene_Hudson_River.xlsx', sheet_name = 5).drop(['Piermont D.O. (ppm)'], axis = 1)
rainfall_data = pd.read_excel(file_path, sheet_name='Rainfall').drop(['Piermont  Rainfall Daily Accumulation (Inches)'], axis = 1)
turbidity_data = pd.read_excel(file_path, sheet_name='Turbidity').drop(['Piermont Turbidity in NTU'], axis = 1)

In [10]:
do_data

Unnamed: 0,Date Time (ET),Port of Albany D.O. (ppm),Norrie Point D.O. (ppm)
0,2011-08-25 00:00:00,7.68,7.81
1,2011-08-25 00:15:00,7.60,7.73
2,2011-08-25 00:30:00,7.57,7.63
3,2011-08-25 00:45:00,7.72,7.67
4,2011-08-25 01:00:00,7.74,7.63
...,...,...,...
1147,2011-09-05 22:45:00,8.73,6.84
1148,2011-09-05 23:00:00,8.76,6.78
1149,2011-09-05 23:15:00,8.66,6.83
1150,2011-09-05 23:30:00,8.75,6.79


In [12]:
# Merge the two datasets
data = do_data.merge(rainfall_data, on = 'Data Time (ET)')
data = data.merge(turbidity_data, on = 'Date Time (ET)')
data.head()

# Update the column names
data.columns = ['date', 'albany_do', 'norrie_do', 'albany_rainfall', 'norrie_rainfall', 'albany_turbidity', 'norrie_turbidity']

# Conver data to datetime format and set it as index
date['date'] = pd.to_datetime(data['date'])

# Set the date as the index
data.set_index('data', inplace = True)


KeyError: 'Data Time (ET)'

In [None]:
# Define predictors and the target variable
X = data['albany_do', 'albany_rainfall']
y = data['albany_turbidity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and fit the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(f"R_Squared: {r2_score(y_test, y_pred)}")

rmse = np.sqrt(mean_squared_error(y_test, y_pred=))
rmse

r2 = r2_score(y_test, y_pred)
r2

In [None]:
# Create a widget for selecting predictors
predictor_selector = widgets.SelectMultiple(
    options = data.columns,
    value = data.columns[0],
    description = 'Predictors'
)

# Create a dropdown for selecting the target variable
target_selector = widgets.Dropdown(
    option = data.columns,
    value = data.columns[1],
    description = 'Target'
)

# Button to evaluate the model
evaluate_button = widgets.Button(description = 'Evaluate Model')


# Output widget to display results
output = widgets.Output()

# Define the function to handle button clciks 
def evaluate_model(b):
    with output:
        clear_output(wait=True) # Clear output of display area

        # Make sure the target is not in the predictors
        selected_predictors = [item for item in predictor_selector.value]
        if target_selector.value in selected_predictors:
            print('Target variable must not be in the predictors.')
            return
        
        # Prepare the data
        X = data[[selected_predictors]]
        y = data[[target_selector.value]]

        # Split data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

        # create and fit the model
        model = LinearRegression()
        model.fit(X_train, y_train)

        # Predict and calculate R^2 and MSE
        y_pred = model.predict(X_test)
        r2 = r2_score(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)

        # Display the R^2 score and MSE
        print(f"R^2: {r2:.4f}")
        print(f"MSE: {mse:.4f}")

# Display the widgets and connect the button to the function
display(predictor_selector, target_selector, evaluate_button, output)
evaluate_button.on_click(evaluate_model)