In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Dowloading the data
df = pd.read_csv('../input/acea-water-prediction/Lake_Bilancino.csv')
df.info()

In [None]:
df.head()

In [None]:
df['Rainfall_Mangona'].unique()

In [None]:
df.describe()

# Data visualization

In [None]:
# plotting histograms of different features
df.hist(figsize=(20,15))

In [None]:
sns.heatmap(df.corr('spearman').abs()[['Lake_Level']].sort_values('Lake_Level'))

In [None]:
df['Date']= pd.to_datetime(df['Date'])
df['Year']= df['Date'].dt.year
df['Month']=df['Date'].dt.month
df.info()

In [None]:
# plotting the Temperature Le Croci over the years
sns.set_theme(style="darkgrid")

sns.lineplot(x="Year", y="Temperature_Le_Croci",data=df)

In [None]:
#plotting the lake level over the years
sns.set_theme(style="darkgrid")

sns.lineplot(x="Year", y="Lake_Level",data=df)

In [None]:
# plotting the flow rate over the years

sns.set_theme(style="darkgrid")
sns.lineplot(x='Year', y='Flow_Rate',data=df)

In [None]:
df_rain = df.groupby(['Year'])[['Rainfall_S_Piero','Rainfall_Mangona','Rainfall_S_Agata','Rainfall_Cavallina','Rainfall_Le_Croci']].mean().reset_index()
df_rain.head(3)

In [None]:
# plotting the rain pattern over the years
import matplotlib.pyplot as plt
plt.plot('Year', 'Rainfall_S_Piero', data = df_rain, color='blue' )
plt.plot('Year', 'Rainfall_Mangona', data = df_rain, color='orange' )
plt.plot('Year', 'Rainfall_S_Agata', data = df_rain, color='yellow' )
plt.plot('Year', 'Rainfall_Cavallina', data = df_rain, color='olive' )
plt.plot('Year', 'Rainfall_Le_Croci', data = df_rain, color='red' )
plt.legend()
plt.figure(figsize=(20,15))

In [None]:
df_rain = df.groupby(['Month'])[['Rainfall_S_Piero','Rainfall_Mangona','Rainfall_S_Agata','Rainfall_Cavallina','Rainfall_Le_Croci']].mean().reset_index()
df_rain.head(3)

In [None]:
# plotting the rain pattern over the months
plt.plot('Month', 'Rainfall_S_Piero', data = df_rain, color='blue' )
plt.plot('Month', 'Rainfall_Mangona', data = df_rain, color='orange' )
plt.plot('Month', 'Rainfall_S_Agata', data = df_rain, color='yellow' )
plt.plot('Month', 'Rainfall_Cavallina', data = df_rain, color='olive' )
plt.plot('Month', 'Rainfall_Le_Croci', data = df_rain, color='red' )
plt.legend()


# Preparing the data for modeling

In [None]:
df['Rainfall_net']=df[['Rainfall_S_Piero','Rainfall_Mangona', 'Rainfall_S_Agata','Rainfall_Cavallina','Rainfall_Le_Croci']].sum(axis = 1)
df['Rainfall_mean']=df[['Rainfall_S_Piero','Rainfall_Mangona', 'Rainfall_S_Agata','Rainfall_Cavallina','Rainfall_Le_Croci']].mean(axis = 1)
df.head()

# Linear Regression for Modeling


In [None]:
import sklearn
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score



In [None]:

train = df[df['Year'].isin([2017,2018,2019,2020])]
test = df[df['Year'].isin([2017])]

x_train = train[["Year","Month","Flow_Rate","Rainfall_mean","Rainfall_net"]]
y_train = train[["Lake_Level"]]

x_test = test[["Year","Month","Flow_Rate","Rainfall_mean", "Rainfall_net"]]
y_test = test[["Lake_Level"]]

In [None]:
lnrg= LinearRegression(normalize=True)

In [None]:
x_train.shape

In [None]:
x_test.shape

In [None]:
lnrg.fit(x_train,y_train)

In [None]:
y_pred= lnrg.predict(x_test)

In [None]:
rs = r2_score(y_test, y_pred)
rs

# Random forest for Modeling


In [None]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=100, random_state=0)
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)

In [None]:
rs = r2_score(y_test, y_pred)
rs