In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pwd

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.subplots import make_subplots

## Reading data

In [None]:
df = pd.read_csv('/kaggle/input/chennai-water-management/chennai_reservoir_levels.csv',
                parse_dates=['Date'], dayfirst=True)
df.head()

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

## 1. Compare the water levels over time

In [None]:
fig = make_subplots(rows=2, cols=2, 
                    subplot_titles=['Poondi Reserviour (in mcft)'])

fig.add_trace(go.Scatter(x=df.Date, y=df.POONDI, name='Poondi'), row=1, col=1)
fig.add_trace(go.Scatter(x=df.Date, y=df.REDHILLS, name='Redhills'), row=1, col=2)
fig.add_trace(go.Scatter(x=df.Date, y=df.CHEMBARAMBAKKAM, name='Chembarambakkam'), row=2, col=1)
fig.add_trace(go.Scatter(x=df.Date, y=df.CHOLAVARAM, name='Cholavaram'), row=2, col=2)

fig.update_layout(title_text=f"Water availability of Chennai's four major water resorviour ({df.Date.dt.year.min()} - {df.Date.dt.year.max()})")
fig.show()

In [None]:
df.head()

In [None]:
df_tidy = df.melt(id_vars=['Date'], var_name='Reservoir', value_name='Water_level')
df_tidy.head()

In [None]:
import plotly_express as px

fig = px.line(df_tidy,
       x='Date',
       y='Water_level',
       facet_row='Reservoir',
       facet_col_wrap=1,
       color='Reservoir',
        height=1200, width=1500,
       title=f"Water availability of Chennai's four major water resorviour ({df.Date.dt.year.min()} - {df.Date.dt.year.max()})"
       )
fig.update_yaxes(matches=None)
fig.show()

## 2. Look at the overall water availability

In [None]:
df['Total'] = df.drop(columns='Date').sum(axis=1)
df.head()

In [None]:
px.line(df,
       x='Date',
       y='Total',
       title='Total water availability from all four reservoirs (in mcft)')

## 3. Rainfall levels

In [None]:
rain_df = pd.read_csv('/kaggle/input/chennai-water-management/chennai_reservoir_rainfall.csv',
                parse_dates=['Date'], dayfirst=True)
rain_df.head()

In [None]:
rain_df.dtypes

### Daily rainfall in Chennai

In [None]:
fig = px.line(rain_df.melt(id_vars='Date', var_name='Reservoir', value_name='Rainfall'),
       x='Date',
       y='Rainfall',
       facet_col='Reservoir',
       facet_col_wrap=2,
       color='Reservoir',
       title='Daily rainfall in Chennai'
       )
fig.update_yaxes(matches=None)
fig.show()

In [None]:
rain_df['YearMonth'] = pd.to_datetime(rain_df.Date.dt.year.astype(str) + rain_df.Date.dt.month.astype(str), format='%Y%m')
rain_df.head()

In [None]:
rain_df.YearMonth.value_counts()

In [None]:
rain_df['Total'] = rain_df.drop(columns=['Date', 'YearMonth']).sum(axis=1)
rain_df.head()

In [None]:
rain_df_monthly = rain_df.groupby('YearMonth')['Total'].sum().reset_index()
rain_df_monthly.head()

In [None]:
def season_convert(dt):
    if 1<=dt.month<=2:
        season = 'Winter'
    elif 3<=dt.month<=5:
        season = 'Summer'
    elif 6<=dt.month<=9:
        season = 'Monsoon'
    else:
        season = 'Post-Monsoon'
    
    return season

In [None]:
rain_df_monthly['Season'] = rain_df_monthly['YearMonth'].apply(lambda x: season_convert(x))
rain_df_monthly.head()

In [None]:
px.bar(rain_df_monthly,
      x='YearMonth',
      y='Total',
      color = 'Season'
      )

### Total yearly rainfall

In [None]:
rain_df['Year'] = pd.to_datetime(rain_df.Date.dt.year.astype(str), format='%Y')
rain_df.head()

In [None]:
px.bar(rain_df.groupby('Year')['Total'].sum().reset_index(),
       x='Year',
       y='Total',
       title='Yearly rainfall in Chennai'
      )

## 4. Water shortage estimation

In [None]:
px.bar(df.query('Date.dt.month == 3 and Date.dt.day == 1'),
       x='Date',
       y='Total',
       title='Total water availability in the begining of the summer'
      )

## Activity

 - Delhi air pollution (High in winter or monsoon?, impact of agricultural activities? compare with other cities like Beijing, Singapore?)
 - US accidents dataset