# Rainfall Trend Analysis in India

This notebook explores historical rainfall patterns in India from 1901 to 2015, identifies anomalies, and forecasts future rainfall using time series models like Prophet.

### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from sklearn.ensemble import IsolationForest
from prophet import Prophet


### Understanding Data

In [39]:
df = pd.read_csv("rainfall_area-wt_India_1901-2015.csv")

In [40]:
df.head()

Unnamed: 0,REGION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
0,INDIA,1901,34.7,37.7,18.0,39.3,50.8,113.4,242.2,272.9,124.4,52.7,38.0,8.3,1032.3,72.4,108.1,752.8,99.0
1,INDIA,1902,7.4,4.3,19.0,43.5,48.3,108.8,284.0,199.7,201.5,61.5,27.9,24.4,1030.2,11.7,110.8,794.0,113.8
2,INDIA,1903,17.0,8.3,31.3,17.1,59.5,118.3,297.0,270.4,199.1,117.9,36.9,17.7,1190.5,25.3,107.9,884.8,172.5
3,INDIA,1904,14.4,9.6,31.8,33.1,72.4,164.8,261.0,206.4,129.6,69.0,11.2,16.3,1019.8,24.0,137.4,761.8,96.6
4,INDIA,1905,25.3,20.9,42.7,33.7,55.7,93.3,252.8,200.8,178.4,51.4,9.7,10.5,975.3,46.2,132.2,725.4,71.6


In [41]:
df.describe()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
count,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0,115.0
mean,1958.0,19.75913,23.434783,28.254783,38.241739,62.193913,168.36,291.022609,258.40087,172.473043,75.701739,29.205217,14.98,1182.034783,43.189565,128.694783,890.26087,119.882609
std,33.341666,9.992628,11.512739,12.286408,10.35304,15.673378,35.569654,41.16139,34.975419,36.641234,28.268152,16.101056,8.788761,110.686214,14.476335,22.895134,89.179055,32.467126
min,1901.0,2.7,2.7,7.2,16.1,32.1,86.5,138.9,191.7,96.9,20.0,3.6,1.6,920.8,11.7,84.5,679.5,52.7
25%,1929.5,13.0,13.3,19.75,31.6,51.6,144.05,267.35,233.95,144.85,55.6,17.3,9.6,1102.4,33.8,112.35,823.55,97.05
50%,1958.0,17.8,22.5,25.5,37.4,59.5,165.6,295.8,259.3,173.1,69.2,26.1,14.1,1190.5,41.3,125.1,897.8,116.2
75%,1986.5,24.85,30.3,34.4,43.85,71.2,192.05,318.65,287.95,198.3,92.85,39.65,19.0,1243.55,51.4,139.65,959.65,142.15
max,2015.0,58.5,53.8,63.3,69.4,114.5,275.5,383.4,335.5,281.0,158.8,74.2,54.4,1480.3,86.3,209.7,1094.5,207.5


In [42]:
df.shape

(115, 19)

In [43]:
df.dtypes

REGION      object
YEAR         int64
JAN        float64
FEB        float64
MAR        float64
APR        float64
MAY        float64
JUN        float64
JUL        float64
AUG        float64
SEP        float64
OCT        float64
NOV        float64
DEC        float64
ANNUAL     float64
Jan-Feb    float64
Mar-May    float64
Jun-Sep    float64
Oct-Dec    float64
dtype: object

In [44]:
df.isnull().sum()

REGION     0
YEAR       0
JAN        0
FEB        0
MAR        0
APR        0
MAY        0
JUN        0
JUL        0
AUG        0
SEP        0
OCT        0
NOV        0
DEC        0
ANNUAL     0
Jan-Feb    0
Mar-May    0
Jun-Sep    0
Oct-Dec    0
dtype: int64

### Visualise Annual and monthly trend using Plotly

#### Annual Rainfall Trend

Using Plotly, we visualize annual rainfall over the years, alongside the mean line to observe general patterns and deviations.

In [45]:
fig = go.Figure()

annual_rainfall = df[['YEAR', 'ANNUAL']]

fig.add_trace(go.Scatter(
    x=annual_rainfall['YEAR'],
    y=annual_rainfall['ANNUAL'],
    mode='lines+markers',
    name='Annual Rainfall',
    line=dict(color='royalblue', width=1)
))

fig.add_trace(go.Scatter(
    x=annual_rainfall['YEAR'],
    y=[annual_rainfall['ANNUAL'].mean()] * len(annual_rainfall),
    mode='lines',
    name='Mean Rainfall',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Annual Rainfall Trend in India (1901 - 2015)',
    xaxis_title='Year',
    yaxis_title='Rainfall (mm)',
    template='plotly_white',
    hovermode='x unified',
    height=500
)

fig.show()

#### Monthly Rainfall Analysis

We calculate and plot the average rainfall for each month across the entire time period. This helps us understand rainfall seasonality.


In [50]:
monthly_columns = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 
                   'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']

monthly_avg = df[monthly_columns].mean()
monthly_avg_df = monthly_avg.reset_index()
monthly_avg_df.columns = ['Month', 'Average Rainfall (mm)']

fig = px.bar(
    monthly_avg_df,
    x='Month',
    y='Average Rainfall (mm)',
    text='Average Rainfall (mm)',
    title='Average Monthly Rainfall in India (1901 - 2015)',
    template='plotly_white'
)

fig.add_hline(
    y=monthly_avg.mean(),
    line_dash="dash",
    line_color="red",
    annotation_text="Mean Rainfall",
    annotation_position="top right"
)

fig.update_layout(
    xaxis_title='Month',
    yaxis_title='Rainfall (mm)',
    height=500
)

fig.show()

#### Seasonal Rainfall Distribution

Rainfall is grouped into 4 major seasons:
	•	Winter (Jan-Feb)
	•	Pre-Monsoon (Mar-May)
	•	Monsoon (Jun-Sep)
	•	Post-Monsoon (Oct-Dec)

We plot average seasonal rainfall to highlight dominant periods.

In [59]:
seasonal_columns = ['Jan-Feb', 'Mar-May', 'Jun-Sep', 'Oct-Dec']
season_names = ['Winter (Jan-Feb)', 'Pre-Monsoon (Mar-May)', 'Monsoon (Jun-Sep)', 'Post-Monsoon (Oct-Dec)']

seasonal_avg = df[seasonal_columns].mean().round(1)

seasonal_df = pd.DataFrame({
    'Season': season_names,
    'Average Rainfall (mm)': seasonal_avg.values
})

fig = px.bar(
    seasonal_df,
    x='Season',
    y='Average Rainfall (mm)',
    text='Average Rainfall (mm)',
    title='Average Seasonal Rainfall in India (1901 - 2015)',
    template='plotly_white'
)

fig.update_layout(
    xaxis_title='Season',
    yaxis_title='Rainfall (mm)',
    height=500
)

fig.show()

#### Annual Rainfall with 10-Year Rolling Average

To smooth short-term fluctuations and highlight long-term trends, we overlay a 10-year rolling average on the annual rainfall plot.


In [76]:
df['Annual'] = df[['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 
                   'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']].sum(axis=1)

fig = px.line(
    df, x='YEAR', 
    y='Annual',
    title='Annual Rainfall Trend in India (1901–2015)',
    labels={'YEAR': 'Year', 'Annual': 'Rainfall (mm)'},
    template='plotly_white')

fig.add_trace(go.Scatter(
    x=df['YEAR'],
    y=df['ANNUAL'].rolling(window=10).mean(),
    mode='lines',
    name='10-Year Rolling Avg',
    line=dict(color='red', width=3)
))

fig.update_traces(mode='lines+markers')
fig.show()

#### Seasonal Rainfall Heatmap

A heatmap helps visualize rainfall intensity across years and seasons. This gives an intuitive idea of changes in seasonal rainfall patterns over time.

In [111]:
heatmap_df = df[['YEAR'] + seasonal_columns].copy()
heatmap_df = heatmap_df.melt(id_vars='YEAR', var_name='Season', value_name='Rainfall')

fig = px.density_heatmap(
    heatmap_df,
    x='Season',
    y='YEAR',
    z='Rainfall',
    color_continuous_scale='Viridis',
    title='Seasonal Rainfall Distribution (Heatmap)',
    height=700
)

fig.update_layout(yaxis_nticks=20)
fig.show()

#### Anomaly Detection (Statistical)

Using standard deviation thresholds, we classify years with extremely high or low rainfall as anomalies, and plot them along with the mean line.

In [None]:
mean_rainfall = df['ANNUAL'].mean()
std_rainfall = df['ANNUAL'].std()

upper_limit = mean_rainfall + 1.5 * std_rainfall
lower_limit = mean_rainfall - 1.5 * std_rainfall

df['Anomaly'] = df['ANNUAL'].apply(
    lambda x: 'High' if x > upper_limit else ('Low' if x < lower_limit else 'Normal')
)

fig = px.scatter(
    df,
    x='YEAR',
    y='ANNUAL',
    color='Anomaly',
    color_discrete_map={'High': 'red', 'Low': 'blue', 'Normal': 'gray'},
    title='Anomaly Detection in Annual Rainfall (India)',
    labels={'YEAR': 'Year', 'ANNUAL': 'Rainfall (mm)'},
    template='plotly_white'
)

fig.add_hline(y=mean_rainfall, line_dash='dot', annotation_text='Mean', line_color='green')

for i, row in df[df['Anomaly'] != 'Normal'].iterrows():
    fig.add_annotation(
        x=row['YEAR'],
        y=row['ANNUAL'],
        text=row['YEAR'],
        showarrow=True,
        arrowhead=2,
        font=dict(color='black', size=10),
        ax=0,
        ay=-30
    )

fig.show()

#### Anomaly Detection using Isolation Forest

Using Isolation Forest, a machine learning-based outlier detection technique, we identify abnormal rainfall patterns in:
	•	Annual data
	•	Monthly data

These anomalies may indicate unusual climatic events or data collection issues.

In [None]:
features = df[['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',
               'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC', 'ANNUAL']]

iso_forest = IsolationForest(contamination=0.05, random_state=42)
df['anomaly_iforest'] = iso_forest.fit_predict(features)

# -1 = anomaly, 1 = normal
df_anomalies_iforest = df[df['anomaly_iforest'] == -1]

df_anomalies_iforest.head()

Unnamed: 0,REGION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,...,AUG_Z,SEP_Z,OCT_Z,NOV_Z,DEC_Z,anomaly_iforest,Monthly_Anomaly,Pre_Monsoon,Post_Monsoon,Annual
6,INDIA,1907,16.2,46.0,37.8,62.8,32.6,154.4,225.4,310.4,...,1.486734,-2.062514,-1.874963,-0.416446,-0.327691,-1,-1,133.2,57.3,1039.8
10,INDIA,1911,45.7,5.6,49.9,22.8,47.6,191.9,162.7,213.5,...,-1.283784,0.268194,-0.180477,0.844341,-0.339069,-1,-1,120.3,125.4,1047.4
16,INDIA,1917,8.7,38.7,22.8,43.2,75.0,231.8,285.2,296.5,...,1.089312,2.96188,2.939642,-0.062432,-0.532498,-1,-1,141.0,197.3,1480.2
17,INDIA,1918,12.2,4.4,41.6,38.8,102.8,212.6,183.8,242.7,...,-0.448912,-1.71318,-1.970477,0.738758,0.16157,-1,-1,183.2,77.5,1026.1
66,INDIA,1967,11.2,13.4,63.3,29.1,42.4,144.9,304.6,262.9,...,0.128637,-0.056577,-1.252354,-1.105842,4.485274,-1,1,134.8,106.1,1148.3


In [114]:
monthly_data = df[['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']]
df['Monthly_Anomaly'] = iso_forest.fit_predict(monthly_data)
df_monthly_anomalies_df = df[df['Monthly_Anomaly'] == -1][['YEAR'] + monthly_columns]
df_monthly_anomalies_df.head()

Unnamed: 0,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC
6,1907,16.2,46.0,37.8,62.8,32.6,154.4,225.4,310.4,96.9,22.7,22.5,12.1
10,1911,45.7,5.6,49.9,22.8,47.6,191.9,162.7,213.5,182.3,70.6,42.8,12.0
16,1917,8.7,38.7,22.8,43.2,75.0,231.8,285.2,296.5,281.0,158.8,28.2,10.3
17,1918,12.2,4.4,41.6,38.8,102.8,212.6,183.8,242.7,109.7,20.0,41.1,16.4
66,1967,11.2,13.4,63.3,29.1,42.4,144.9,304.6,262.9,170.4,40.3,11.4,54.4


#### Monthly Rainfall Anomalies

We identify specific months and years with unusual rainfall using Isolation Forest and highlight them over the monthly trends.

In [115]:
monthly_anomalies = []
for column in monthly_columns:
    for _, row in monthly_anomalies_df.iterrows():
        monthly_anomalies.append({'Year': row['YEAR'], 'Month': column, 'Rainfall': row[column]})

monthly_anomalies_df_long = pd.DataFrame(monthly_anomalies)

fig_monthly_anomalies = px.line(
    df,
    x='YEAR',
    y=monthly_columns,
    labels={'YEAR': 'Year', 'value': 'Rainfall (mm)', 'variable': 'Month'},
    title='Monthly Rainfall Anomalies in India (1901-2015)',
    color_discrete_sequence=px.colors.qualitative.Set3
)

fig_monthly_anomalies.add_trace(go.Scatter(
    x=monthly_anomalies_df_long['Year'],
    y=monthly_anomalies_df_long['Rainfall'],
    mode='markers',
    name='Anomalous Months',
    marker=dict(color='red', size=5, symbol='circle')
))

fig_monthly_anomalies.update_layout(
    template='plotly_white',
    legend=dict(title="Legend"),
    height=500
)

fig_monthly_anomalies.show()

#### Seasonal Rainfall Correlation with Annual Total

We analyze how each season’s rainfall correlates with the total annual rainfall, using a correlation matrix and bar plot.

In [None]:
df['Winter'] = df[['JAN', 'FEB']].sum(axis=1)
df['Pre_Monsoon'] = df[['MAR', 'APR', 'MAY']].sum(axis=1)
df['Monsoon'] = df[['JUN', 'JUL', 'AUG', 'SEP']].sum(axis=1)
df['Post_Monsoon'] = df[['OCT', 'NOV', 'DEC']].sum(axis=1)

df['Annual'] = df[['JAN','FEB','MAR','APR','MAY','JUN',
                   'JUL','AUG','SEP','OCT','NOV','DEC']].sum(axis=1)

correlation = df[['Winter', 'Pre_Monsoon', 'Monsoon', 'Post_Monsoon', 'Annual']].corr()

season_corr = correlation['Annual'].drop('Annual').reset_index()
season_corr.columns = ['Season', 'Correlation']

fig = px.bar(season_corr, x='Season', y='Correlation', title='Correlation of Seasonal Rainfall with Annual Total')
fig.update_layout(yaxis=dict(range=[0, 1]))
fig.show()

#### Decadal Trends in Seasonal Rainfall

To observe long-term changes, we aggregate seasonal rainfall by decade. This helps identify macro trends like increasing/decreasing monsoon strength.

In [117]:
season_cols = ['Winter', 'Pre_Monsoon', 'Monsoon', 'Post_Monsoon']
df['Decade'] = (df['YEAR'] // 10) * 10

season_trend = df.groupby('Decade')[season_cols].mean().reset_index()

import plotly.graph_objects as go

fig = go.Figure()
for season in season_cols:
    fig.add_trace(go.Scatter(x=season_trend['Decade'], y=season_trend[season], mode='lines+markers', name=season))
fig.update_layout(title='Decadal Trend of Seasonal Rainfall', xaxis_title='Decade', yaxis_title='Average Rainfall (mm)')
fig.show()

#### Forecasting Annual Rainfall using Prophet

We use Facebook Prophet to model and forecast future rainfall for the next 20 years. Prophet handles seasonality and trend shifts well.
	•	Training is done on data from 1901–2015.
	•	Forecast is visualized interactively using Plotly.

In [142]:
df['DATE'] = pd.to_datetime(df['YEAR'], format='%Y')
annual_rainfall_ts = df.set_index('DATE')['ANNUAL']

prophet_data = annual_rainfall_ts.reset_index()
prophet_data.columns = ['ds', 'y']

from prophet.plot import plot_plotly, plot_components_plotly

prophet_model = Prophet()
prophet_model.fit(prophet_data)

future = prophet_model.make_future_dataframe(periods=20, freq='Y')
forecast = prophet_model.predict(future)

fig_forecast = plot_plotly(prophet_model, forecast)

fig_forecast.update_layout(
    title='Annual Rainfall Forecast Using Prophet',
    xaxis_title='Year',
    yaxis_title='Rainfall (mm)',
    template='plotly_white',
    height=500
)

fig_forecast.show()

13:43:05 - cmdstanpy - INFO - Chain [1] start processing
13:43:05 - cmdstanpy - INFO - Chain [1] done processing


### Conclusion
	•	The Indian rainfall pattern shows strong seasonality with significant contribution from the monsoon.
	•	Anomalies are present, especially during certain years and months.
	•	Forecasting models like Prophet can help estimate future rainfall trends, though accuracy should be validated and compared with other models like ARIMA or LSTM.