In [None]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
import pickle

In [None]:
data = pd.read_csv("data.csv")
data.head()

In [None]:
data.drop(columns=["wind_direction"],inplace=True)
data.head()

In [None]:
data['time'] = pd.to_datetime(data['time'])
data.set_index('time', inplace=True)
data.index = pd.to_datetime(data.index)
daily_data = data.resample('D').mean()
daily_data.reset_index(inplace=True)
daily_data.head()

In [None]:
daily_data['time'] = pd.to_datetime(daily_data['time'])
daily_data.set_index('time', inplace=True)

In [None]:
rolling_avg = daily_data.rolling(window=7).mean().shift(-1)
rolling_avg.dropna(inplace=True)
rolling_avg.head(10)

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 12))
axes = axes.flatten()
for idx, column in enumerate(rolling_avg.columns):
    sns.lineplot(ax=axes[idx], data=rolling_avg, x=rolling_avg.index, y=column)
    axes[idx].set_title(f'7-Day Rolling Average of {column}')
    axes[idx].set_xlabel('Date')
    axes[idx].set_ylabel('Average Value')
plt.tight_layout()
plt.show()

In [None]:
train_size = int(len(rolling_avg) * 0.6)
test_size = len(rolling_avg) - train_size
train = rolling_avg.iloc[:train_size]
test = rolling_avg.iloc[train_size:]

In [None]:
X_train = train[:-1] 
y_train = train['water_amount'].shift(-1)[:-1]
X_test = test[:-1]
y_test = test['water_amount'].shift(-1)[:-1]

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
predictions = model.predict(X_test)
error = mean_squared_error(y_test, predictions)
print(f"Test MSE: {error}")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(test.index[:-1], y_test, label='Actual')
plt.plot(test.index[:-1], predictions, label='Predicted', linestyle='--')
plt.title('Random Forest Predictions vs Actual')
plt.legend()
plt.show()

In [None]:
with open('random_forest.pkl', 'wb') as file:
    pickle.dump(model, file)
