In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pwd

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px


## Reading the data

In [None]:
df = pd.read_csv('/kaggle/input/chennai-water-management/chennai_reservoir_levels.csv',
                parse_dates = ['Date'], dayfirst =True)
#df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')
df.head()

In [None]:
df.dtypes

In [None]:
df.isna().sum()

## 1. Compare the water levels over time

In [None]:
df.Date.dt.year.min(), df.Date.dt.year.max()

In [None]:
fig = make_subplots(rows=2, cols=2,
                   subplot_titles=['Poondi Resoviour (in mcft)',
                                  'Redhills Resoviour (in mcft)'])

fig.add_trace(go.Scatter(x= df.Date, y=df.POONDI, name='Poondi'), row=1, col=1)
fig.add_trace(go.Scatter(x= df.Date, y=df.REDHILLS, showlegend=False), row=1, col=2)
fig.add_trace(go.Scatter(x= df.Date, y=df.CHEMBARAMBAKKAM), row=2, col=1)
fig.add_trace(go.Scatter(x= df.Date, y=df.CHOLAVARAM), row=2, col=2)

fig.update_layout(title_text = "Water availability in Chennai's four major reserviours {}-{}".format(df.Date.dt.year.min(), df.Date.dt.year.max()))

fig.show()

In [None]:
df.head()

In [None]:
df_tidy = df.melt(id_vars = ['Date'], var_name='Resoviour', value_name='Water_Level')
df_tidy.head()

In [None]:

px.line(df_tidy, 
        x="Date",
        y='Water_Level',
        facet_col="Resoviour",
        facet_col_wrap=2,
        color='Resoviour',
        title="Water availability in Chennai's four major reserviours {}-{}".format(df.Date.dt.year.min(), df.Date.dt.year.max()))

In [None]:
fig = px.line(df.melt(id_vars=['Date'],var_name='Resoviour',value_name='Water_Level'), 
              x="Date", 
              y="Water_Level", 
              color="Resoviour",                  
              facet_col="Resoviour",
              facet_col_wrap=1,
              height=1200, width=1200,
             )
fig.update_yaxes(matches=None)
fig.show()

## 2. Look at the overall water level

In [None]:
df['total'] = df.drop(columns='Date').sum(axis=1)
df.head()

In [None]:
px.line(df,
       x='Date',
       y='total',
       title='Total water availability from all four reserviours in mcft')

## 3. Rainfall levels

In [None]:
rain_df = pd.read_csv('/kaggle/input/chennai-water-management/chennai_reservoir_rainfall.csv',
                     parse_dates=['Date'], dayfirst=True)
rain_df.head()

In [None]:
rain_df.dtypes

### Daily rainfall

In [None]:

px.line(rain_df.melt(id_vars=['Date'], var_name='Resoviour', value_name='Rainfall'), 
        x="Date",
        y='Rainfall',
        facet_col="Resoviour",
        facet_col_wrap=2,
        color='Resoviour',
        title='Daily rainfall in Chennai'
        )

### Monthly rainfall

In [None]:
rain_df['YearMonth'] = pd.to_datetime(rain_df.Date.dt.year.astype(str) + rain_df.Date.dt.month.astype(str), format='%Y%m') 
rain_df.head()

In [None]:
rain_df['total'] = rain_df.drop(columns=['Date', 'YearMonth']).sum(axis=1)
rain_df

In [None]:
rain_df.groupby('YearMonth').total.sum().reset_index()

In [None]:
px.bar(rain_df.groupby('YearMonth').total.sum().reset_index(),
       x='YearMonth',
       y='total',
       title='Monthly rainfall in Chennai'
      )

### add a season column and use it as color in the bar plot

https://stackoverflow.com/a/51289428/8210613

In [None]:
month_to_season = {1: 'winter', 2: 'winter', 3: 'summer', 4: 'summer', 5: 'summer', 6: 'monsoon', 7: 'monsoon', 8: 'monsoon', 9: 'monsoon',
 10: 'post-monsoon', 11: 'post-monsoon', 12: 'post-monsoon'}

In [None]:
monthly_rain_df = rain_df.groupby('YearMonth').total.sum().reset_index()
monthly_rain_df.head()

In [None]:
monthly_rain_df['season'] = monthly_rain_df.YearMonth.dt.month.map(month_to_season) 
monthly_rain_df

In [None]:
px.bar(monthly_rain_df,
       x='YearMonth',
       y='total',
       color='season',
       title='Monthly rainfall in Chennai by season'
      )

## 3. Total yearly rainfall

In [None]:
rain_df['Year'] = pd.to_datetime(rain_df.Date.dt.year.astype(str), format='%Y') 
rain_df.head()

In [None]:
rain_df.groupby('Year').total.sum().reset_index()

In [None]:
px.bar(rain_df.groupby('Year').total.sum().reset_index(),
       x='Year',
       y='total',
       title='Year rainfall in Chennai'
      )

## 4. Water Shortage estimation

In [None]:
px.bar(df.query("Date.dt.month == 3 and Date.dt.day== 1"),
       x='Date',
       y='total',
       title='Availability of water in total at the begining of summer ')

## Activity
Can you think of a similar urban large scale problem having a real time affect that you would like to analyze and solve with the help data? Note them, break down the possible ways and steps to solve the same

- Delhi air pollution (high-- winter or monsoon ??)
    - corelate with Beijing and Singapore
- https://www.kaggle.com/sobhanmoosavi/us-accidents
- https://www.kaggle.com/sudalairajkumar/covid19-in-india?select=StatewiseTestingDetails.csv
