In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# setting colors for plots.
colors = ['#794C74', '#00AF91', '#F39233', '#0F9D58']
sns.set_palette(colors)
plt.rc('font', size=14)

In [None]:
data = pd.read_csv("../input/rainfall-data-from-1901-to-2017-for-india/Rainfall_Data_LL.csv")
data.head()

In [None]:
# shape
print("#rows:", data.shape[0])
print("#columns:", data.shape[1])

In [None]:
# Unique values per rows.
for col in data.select_dtypes(include=object):
    print(col, data[col].nunique())

In [None]:
# Drop Name column.
data.drop('Name', axis=1, inplace=True)

In [None]:
# Replace '-' with '_'.

for col in data.columns:
    data.rename(columns={col:col.replace('-', '_')}, inplace=True)

In [None]:
data.head()

In [None]:
# Statistical Description.
data.describe()

In [None]:
# Summary
data.info()

There are no missing values in data.

In [None]:
# distribution of four seasons.
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16,12))

ax1 = sns.histplot(data['Jan_Feb'], ax=axs[0, 0], kde=True, label='Winter')
ax1.legend()

ax2 = sns.histplot(data['Mar_May'], ax=axs[0, 1], kde=True, label='Summer/Pre-Monsoon')
ax2.legend()

ax3 = sns.histplot(data['June_September'], ax=axs[1, 0], kde=True, label='Rainy-season/Monsoon')
ax3.legend()

ax4 = sns.histplot(data['Oct_Dec'], ax=axs[1, 1], kde=True, label='Autumn/Post-monsoon')
ax4.legend()

fig.suptitle("Rainfall, by seasons")
plt.show()


The months of June_September(Rainy Season) has higher observations ~(>1000 mm) values. In months between March and May also have some higher observation for rainfall.

In [None]:
year_data = data.groupby('YEAR').mean()

# Percentage rainfall in each month over years

In [None]:
# % rainfall in each month over the years.
months_data = year_data.loc[:,'JAN':'ANNUAL']
months_data.head()

percent_rain = {}
cols = months_data.columns[:-1]

for col in cols:
    percent_rain[col] = round((months_data[col]/months_data['ANNUAL']).sum(), 2)
    
percent_df = pd.DataFrame(data = percent_rain.values(), index=percent_rain.keys(), columns=['Percent_rainfall'])
percent_df.style.background_gradient()

In [None]:
# plot.
plt.figure(figsize=(12, 6))
plt.bar(percent_df.index, percent_df.Percent_rainfall)

# Annotate
for row in percent_df.itertuples():
    plt.text(row.Index, row.Percent_rainfall, s=round(row.Percent_rainfall, 2), horizontalalignment= 'center', verticalalignment='bottom')

plt.xlabel('Months')
plt.ylabel('Percentage')
plt.title("% of Rainfall in each months over the years.", fontdict={'size':20})
plt.show()

**Above barplot shows the percentage of rainfall in each months. We can see that average rainfall in "July" and "August" is high followed by the months June and September. They together contribute 88% of rainfall over the years all over India.**

# Pre-Monsoon analysis (Mar-May)

In [None]:
# Let's examine rainfall trends in 'Mar_May'(pre-monsoon)

min_data = year_data['Mar_May'].min()

max_data = year_data['Mar_May'].max()
max_data_yr = year_data[year_data['Mar_May'] == max_data].index[0]    

last_max_data = year_data.loc[2010:2020, 'Mar_May'].max()
last_max_data_yr = year_data[year_data['Mar_May'] == last_max_data].index[0]

plt.figure(figsize=(14, 8))
sns.lineplot(data = year_data,x = year_data.index, y = 'Mar_May', linewidth=2, label='Mar-May')

plt.axvline(x = max_data_yr, linewidth=2, linestyle= '--', color='#222')
plt.text(max_data_yr+1, y=max_data, s= max_data, color='r')
plt.text(max_data_yr, y=min_data-5, s= max_data_yr, color='r')

plt.axvline(x = last_max_data_yr, linewidth=2, linestyle= '--', color='#222')
plt.text(last_max_data_yr+1, y=last_max_data, s= last_max_data, color='r')
plt.text(last_max_data_yr, y=min_data-5, s= last_max_data_yr, color='r')

plt.ylabel("Rainfall in (mm)")
plt.title("All over India Pre-Monsoon Annual average rainfall over the years")

plt.show()

**Highest average Pre-monsoon rainfall receives in 1990 and it was ~245mm, In last 10 years, it was recorded in 2015 which was ~203mm.**

# Monsoon Rainfall Analysis

In [None]:

def monsoon_plots(month):
    
    """Line plots and Trends"""
    
    months = {'JUL': 'July', 'AUG': 'August'}
    
    year_data[f'{month}_SMA_10'] = year_data[month].rolling(30, min_periods=10).mean()
    
    plt.figure(figsize=(14, 6))
    sns.scatterplot(data = year_data, x = year_data.index, y = month,label=months[month])
    sns.lineplot(data = year_data, x = year_data.index, y = f'{month}_SMA_10', linewidth=2, label= 'movAvg')
    
    plt.ylabel("Rainfall in (mm)")
    plt.title(f"Yearly Average Rainfall trend for {months[month]} month all over India.")
    
    plt.legend()
    plt.xticks(rotation=45)
    plt.show()

## Monthly Average Rainfall in JULY & AUGUST

In [None]:
# Annual Average rainfall observations in July and August.
monsoon_plots('JUL')
monsoon_plots('AUG')

**We can see that, Rainfall in July shows increasing trend while rainfall in August shows decreasing trend all over India over the Years.**  

In [None]:
# Annual observations for Rainfall?
year_data['SMA_10'] = year_data['ANNUAL'].rolling(20, min_periods=10).mean()

plt.figure(figsize=(14, 8))

sns.lineplot(data = year_data, x = year_data.index, y = 'ANNUAL', linewidth=2, label='Average')
sns.lineplot(data = year_data, x = year_data.index, y = 'SMA_10', linewidth=2, label='movAvg')

plt.ylabel("Rainfall in (mm)")
plt.title("Annual Average Rainfall in all over India", fontdict={'size':20})

plt.plot()

**We can see that, There is decreasing trend in Annual average rainfall in recent years in all over India**

## Rainfall by Subdivision 

In [None]:
# Average rainfall by subdivision.
avgSubDiv = data.groupby(by = ['SUBDIVISION']).agg(['mean', 'std'])['June_September']
avgSubDiv['CV'] = avgSubDiv['std'] / avgSubDiv['mean']
avgSubDiv.sort_values(by=['mean'], ascending=False, inplace=True)

# plot
plt.figure(figsize=(14, 8))
sns.barplot(data=avgSubDiv, x=avgSubDiv.index, y='mean', color='#4C4C6D')

plt.ylabel('Rainfall (in mm)')
plt.title("Average Rainfall by SubDivisions.", fontdict={'size':20})

plt.xticks(rotation=90)
plt.show()

**High average rainfall is observed in NorthEast and SouthWest Regions of India.**

In [None]:
# Coefficient of Variation in rainfall by subdivision.

# Visualization.
plt.figure(figsize=(14, 8))
sns.barplot(data=avgSubDiv, x=avgSubDiv.index, y='CV', color='#4C4C6D')

plt.ylabel('Coefficient of Variation')
plt.title("Coefficient of Variation by SubDivisions",fontdict={'size':20})

plt.xticks(rotation=90)
plt.show()

**Higher rainfall subdivisions has lower coeffient of variation, whereas lower average rainfall subdivisions has higher coefficient of variation.**

## Maharashtra rainfall southwest monsoon analysis.

#### Let's examine the last 30 years of Rainfall in Maharashtra and find trends.

In [None]:
# select Maharashtra's subdivision
sub_div = data.set_index('SUBDIVISION')

# select last 30 years
sub_div = sub_div.loc[sub_div.YEAR > 1987]

MahaData = sub_div.loc['Konkan & Goa':'Vidarbha']
MahaData.reset_index(inplace=True)
MahaData.head()

In [None]:
# percent average rainfall in Maharashtra in each month over last 30 years.
Maha_months_data = MahaData.loc[:,'JAN':'ANNUAL']
Maha_months_data.head()

percent_rain = {}
cols = Maha_months_data.columns[:-1]

for col in cols:
    percent_rain[col] = round((Maha_months_data[col]/Maha_months_data['ANNUAL']).sum(), 2)
    
percent_df = pd.DataFrame(data = percent_rain.values(), index=percent_rain.keys(), columns=['Percent_rainfall'])
percent_df.style.background_gradient()

In [None]:
fig, ax = plt.subplots(figsize=(14,8))
ax.vlines(x=percent_df.index, ymin=-1, ymax=percent_df.Percent_rainfall, alpha = 0.8, linewidth=25)
ax.scatter(x=percent_df.index, y=percent_df.Percent_rainfall, s=60, color='#293B5F')

# Title, Label, and Ticks.
ax.set_xlabel('Months')
ax.set_ylabel('Rainfall (in mm)')
ax.set_title('% Rainfall in each Month all over Maharashtra for last 30 years', fontdict={'size':20})

ax.set_xticks(percent_df.index)

# Annotate
for row in percent_df.itertuples():
    ax.text(row.Index, row.Percent_rainfall+.5, s=round(row.Percent_rainfall, 2), horizontalalignment= 'center', verticalalignment='bottom')

plt.show()

**This is bar plot for percentage rainfall in each months all over Maharashtra for last 30 years, we can see that, July and August has highest average percentage followed by June and September. July and August together contributes almost 65% of rainfall all over Maharashtra. This is mainly because Konkan & Goa has higher average rainfall in all over india.**

In [None]:
# Average rainfall trend in Maharashtra.
MahaGrp = pd.DataFrame(MahaData.groupby(by=['YEAR']).agg(['mean', 'std'])[['JUN', 'JUL', 'AUG', 'SEP','ANNUAL', 'June_September']])

# Trends in June, July, August and September.
# Simple Moving average to find monthly trends.
MahaGrp['JunSMA_10'] = MahaGrp['JUN']['mean'].rolling(10, min_periods=3).mean()
MahaGrp['AugSMA_10'] = MahaGrp['AUG']['mean'].rolling(10, min_periods=3).mean()
MahaGrp['SepSMA_10'] = MahaGrp['SEP']['mean'].rolling(10, min_periods=3).mean()
MahaGrp['JulSMA_10'] = MahaGrp['JUL']['mean'].rolling(10, min_periods=3).mean()

plt.figure(figsize=(14, 8))

sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['JUN']['mean'], color='#FF2442', linewidth=1.8, alpha=0.6, label='June')
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['JunSMA_10'], color='#FF2442', linewidth=2, linestyle='--')

sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['JUL']['mean'], color='#3DB2FF', linewidth=1.8, alpha=0.6, label='July')
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['JulSMA_10'], color='#3DB2FF', linewidth=2, linestyle='--')

sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['AUG']['mean'], color='#F39233', linewidth=1.8, alpha=0.6, label='August')
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['AugSMA_10'], color='#F39233', linewidth=2, linestyle='--')

sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['SEP']['mean'], color='#81B214', linewidth=1.8, alpha=0.6, label='September')
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['SepSMA_10'], color='#81B214', linewidth=2, linestyle='--')
 
plt.xlabel('Rainfall (in mm)')
plt.ylabel("Years")
plt.title("Maharashtra state monthly rainfall trends", fontdict={'size':22})
plt.show()

**This is Monthly trend lines for Rainfall in Maharashtra, The monthly rainfall in July and September shows increasing trend while rainfall in June and August shows decreasing trend.**

In [None]:
# Let's examine Seasonal and Annual rainfall trend for Maharashtra.

# Simple Moving Average over a period of 10 years.
MahaGrp['AnnualSMA10'] = MahaGrp['ANNUAL']['mean'].rolling(30, min_periods=2).mean()
MahaGrp['JJAS_SMA10'] = MahaGrp['June_September']['mean'].rolling(30, min_periods=2).mean()

# visualization for the same
plt.figure(figsize=(14, 8))
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['ANNUAL']['mean'], color='#40394A', linewidth=2.6, alpha=0.6, label='Annual')
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y='AnnualSMA10', linewidth=2, color='#40394A', linestyle='--')

sns.lineplot(data = MahaGrp, x=MahaGrp.index, y= MahaGrp['June_September']['mean'],color='#9B3675', linewidth=2.6, alpha=0.7, label='JJAS')
sns.lineplot(data = MahaGrp, x=MahaGrp.index, y='JJAS_SMA10', linewidth=2,color='#9B3675', linestyle='--')

plt.ylabel('Rainfall (in mm)')
plt.title("Maharashtra state southwest monsoon", fontdict={'size':20})

plt.show()

**We can see that, Both seasonal and annual rainfall shows neither increasing nor decreasing trend all over maharashtra for the recent years.**

In [None]:
# Let's examine the Percent average of each SubDivisions of Maharashtra.
a = pd.DataFrame(MahaData.groupby(['SUBDIVISION']).mean())

# Compute the annual percentage of each Subdivision.
total_avg = a['ANNUAL'].sum()
b = pd.DataFrame((a['June_September'] / total_avg) * 100)

# plot
b.plot(kind='bar', figsize=(14, 8))

plt.ylabel("Percent Rainfall (in mm)")
plt.title("Maharashtra's subdivision percentage rainfall in SW monsoon")
plt.xticks(rotation=0)
plt.show()

**Almost 50% of annual rainfall receive in the Konkan and Goa region of Maharashtra in SW monsoon seasoon. While Vidharbha, Madhya Maharashtra, and Marathwada receives less than 20% of rainfall in monsoon.**

## Konkan and Goa SW Monsoon Rainfall

In [None]:
# Let's examine rainfall trends in konkan and goa in southwest monsoon.

konkan_data = MahaData.loc[MahaData.SUBDIVISION == 'Konkan & Goa']
konkan_grpdata = konkan_data.groupby('YEAR').mean()
konkan_grpdata['SMA_10'] = konkan_grpdata['ANNUAL'].rolling(20, min_periods=2).mean()

min_data = konkan_grpdata['ANNUAL'].min()

max_data = konkan_grpdata['ANNUAL'].max()
max_data_yr = konkan_grpdata[konkan_grpdata['ANNUAL'] == max_data].index[0]    

plt.figure(figsize=(14, 8))
sns.lineplot(data = konkan_grpdata,x = konkan_grpdata.index, y = 'ANNUAL', linewidth=2, color='#184D47',label='ANNUAL')
sns.lineplot(data = konkan_grpdata,x = konkan_grpdata.index, y = 'SMA_10', linewidth=2, linestyle='--', color='#A685E2',label='mov_avg2')


plt.axvline(x = max_data_yr, linewidth=2, linestyle= '--', color='#96BB7C')
plt.text(max_data_yr+0.2, y=max_data-2, s= max_data, color='#6155A6')
plt.text(max_data_yr+0.2, y=min_data-2, s= max_data_yr, color='#6155A6')

plt.ylabel("Rainfall in (mm)")
plt.title("Konkan&Goa annual rainfall in sw monsoon seasons")

plt.show()

**We can see that, There is increasing rainfall trend in Konkan and goa region in South west monsoon season. The highest rainfall receive in Konkan & Goa is 3825.2(mm) in the year 2011.**

## Marathwada SW Monsoon Rainfall

In [None]:
# Let's examine rainfall trends in Marathwada in southwest monsoon.

marathwada_data = MahaData.loc[MahaData.SUBDIVISION == 'Matathwada']
marathwada_grpdata = marathwada_data.groupby('YEAR').mean()
marathwada_grpdata['SMA_10'] = marathwada_grpdata['ANNUAL'].rolling(20, min_periods=2).mean()

min_data = marathwada_grpdata['ANNUAL'].min()

max_data = marathwada_grpdata['ANNUAL'].max()
max_data_yr = marathwada_grpdata[marathwada_grpdata['ANNUAL'] == max_data].index[0]    

plt.figure(figsize=(14, 8))
sns.lineplot(data = marathwada_grpdata,x = marathwada_grpdata.index, y = 'ANNUAL', linewidth=2, color='#184D47',label='ANNUAL')
sns.lineplot(data = marathwada_grpdata,x = marathwada_grpdata.index, y = 'SMA_10', linewidth=2, linestyle='--', color='#A685E2',label='mov_avg2')


plt.axvline(x = max_data_yr, linewidth=2, linestyle= '--', color='#96BB7C')
plt.text(max_data_yr+0.2, y=max_data-2, s= max_data, color='#6155A6')
plt.text(max_data_yr+0.2, y=min_data-2, s= max_data_yr, color='#6155A6')

plt.ylabel("Rainfall in (mm)")
plt.title("Marathwada annual rainfall in sw monsoon seasons")

plt.show()

**While Marathwada receives lowest rainfall in sw monson season, the highest rainfall receive was 1198.1(in mm) in the year 1990, we can see that there is decreasing rainfall trend in Marathwada.**