# ***Sreeram Shenai RA1811003010290 Water Pollution Project***

Chennai has one of the worst availabilities of potable and household water in the country with the city lying in the lowest per capita water consumptions among all major cities of India. This project is to estimate future availabilities of water given the water avialed to the city between 2004 and 2020

In [None]:


# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# ***Exploratory Data Analysis***

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")

In [None]:
lvl = pd.read_csv("../input/chennai-water-management/chennai_reservoir_levels.csv")
rain = pd.read_csv("../input/chennai-water-management/chennai_reservoir_rainfall.csv")

In [None]:
#just to check the headings available
lvl.head()

In [None]:
rain.head()

In [None]:
#to understand the dataset
lvl.describe()

In [None]:
rain.describe()

# Using EDA to understand water plots for all areas

In [None]:
fig = px.line(lvl, x = "Date", y= "POONDI", title = "Water levels in the poondi water reservoir")
fig.show()

In [None]:
fig = px.line(rain, x = "Date", y= "POONDI", title = "Rain levels in the poondi water reservoir")
fig.show()

In [None]:
#similarly, for chovaram
fig = px.line(lvl, x = "Date", y= "CHOLAVARAM", title = "Water levels in the cholavaram water reservoir")
fig.show()

In [None]:
#similarly, for chovaram
fig = px.line(rain, x = "Date", y= "CHOLAVARAM", title = "rain levels in the cholavaram water reservoir")
fig.show()

In [None]:
#similarly, for red hills
fig = px.line(lvl, x = "Date", y= "REDHILLS", title = "Water levels in the RED HILLS water reservoir")
fig.show()

In [None]:
#similarly, for red hills
fig = px.line(rain, x = "Date", y= "REDHILLS", title = "rain levels in the RED HILLS water reservoir")
fig.show()

In [None]:
#similarly, for chembaranbakkam hills
fig = px.line(lvl, x = "Date", y= "CHEMBARAMBAKKAM", title = "Water levels in the chembarakkam water reservoir")
fig.show()

In [None]:
#similarly, for chembaranbakkam hills
fig = px.line(rain, x = "Date", y= "CHEMBARAMBAKKAM", title = "rain levels in the chembarakkam water reservoir")
fig.show()

In [None]:
#now, let us sum up all these observations

rain.Date = pd.to_datetime(rain.Date)
rain.set_index('Date', inplace = True)

In [None]:
rain.total = rain.POONDI+rain.CHOLAVARAM+rain.REDHILLS+rain.CHEMBARAMBAKKAM
rain.total.plot(figsize = (20,10), linewidth = 3, fontsize = 15)
plt.xlabel('Year', fontsize= 15)
plt.ylabel('Rainfall', fontsize = 15)

In [None]:
#To establish a correlation between reservoir water levels
corr = lvl.corr()
corr.style.background_gradient(cmap='PuBu')

In [None]:
#correlation between rainfall levels in all the reservoirs
corr = rain.corr()
corr.style.background_gradient(cmap='PuBu')

# ***Time Series Analysis starts here***

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import itertools
import statsmodels.api as sm
import matplotlib
matplotlib.rcParams['axes.labelsize'] = 14
matplotlib.rcParams['xtick.labelsize'] = 12
matplotlib.rcParams['ytick.labelsize'] = 12
matplotlib.rcParams['text.color'] = 'k'

import warnings
warnings.filterwarnings("ignore")
              

In [None]:
#again , import datasets
lvl = pd.read_csv("../input/chennai-water-management/chennai_reservoir_levels.csv")
rain = pd.read_csv("../input/chennai-water-management/chennai_reservoir_rainfall.csv")

In [None]:
#creating a new column of total in both datasets
lvl['Total'] = lvl.POONDI+lvl.CHOLAVARAM + lvl.REDHILLS + lvl.CHEMBARAMBAKKAM
rain['Total'] = rain.POONDI + rain.CHOLAVARAM + rain.REDHILLS + rain.CHEMBARAMBAKKAM

In [None]:
#removal of unneccessary columns
cols = ['POONDI','CHOLAVARAM', 'REDHILLS', 'CHEMBARAMBAKKAM']
lvl.drop(cols, axis = 1, inplace = True)
rain.drop(cols, axis = 1, inplace = True)


In [None]:
#sorting all datasets by date
lvl = lvl.sort_values('Date')
lvl = lvl.set_index('Date')

rain = rain.sort_values('Date')
rain = rain.set_index('Date')

In [None]:
#modelling the ARIMA
#ARIMA is used as ARIMA(p,q,d) where 
#p, q, d are the seaonality of data, noise in data and trend in data

p = q = d = range(0,2)
pdq = list(itertools.product(p,d,q))
seasonal_pdq = [(x[0],x[1],[2],12) for x in list(itertools.product(p,d,q))]

In [None]:
print('Examples of parameter combinations for seasonal ARIMA as in this case...')
print('SARIMA: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMA: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMA: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMA: {} x {}'.format(pdq[2], seasonal_pdq[4]))

In [None]:
#fitting the ARIMA model and summarisation

lvl_model = sm.tsa.statespace.SARIMAX(lvl, order=(1,0,0), seasonal_order=(1,1,1,12), enforce_stationary=False, enforce_invertibility = False)
lvl_results = lvl_model.fit()
print(lvl_results.summary().tables[1])

In [None]:
#plotting the new results for water level 
lvl_results.plot_diagnostics(figsize=(16,8))
plt.show()

In [None]:
#now, fitting the time analaysis mdoel for rain fall levels

rain_model = sm.tsa.statespace.SARIMAX(rain, order = (1,0,1), seasonal_order = (0,1,1,12), enforce_stationary = False, enforce_invertibility = False)

rain_results = rain_model.fit()
print(rain_results.summary().tables[1])

In [None]:
#plotting the new results for  rain level 
rain_results.plot_diagnostics(figsize=(16,8))
plt.show()