### Import neccesary libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objects as go
%matplotlib inline
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

### Loading data


In [None]:
data = pd.read_csv('../input/rainfall-data-from-1901-to-2017-for-india/Rainfall_Data_LL.csv')
data.columns

In [None]:
data.head(10)

### Compare rainfall in each year

In [None]:
years = []
rainfalls = []
for year, df in data.groupby('YEAR'):
    rainfall = df.ANNUAL.sum()
    years.append(year)
    rainfalls.append(rainfall)
    
year_trace = go.Bar(x = years, y = rainfalls, 
                   marker = dict(color = '#03bafc', line = dict(color = '#03bafc', width = 1.2)))
year_layout = go.Layout(template = 'presentation', title = 'ANNUAL RAINFALLS IN INDIA',
                       xaxis = dict(title = 'Year'), yaxis = dict(title = 'Annual Rainfall'))
year_fig = go.Figure(data = [year_trace], layout = year_layout)
year_fig.show()

### Compare the rainfall in each area

In [None]:
areas = []
rainfalls_ = []
for area, df in data.groupby('SUBDIVISION'):
    rainfall = df.ANNUAL.sum()
    areas.append(area)
    rainfalls_.append(rainfall)
    
area_trace = go.Bar(x = areas, y = rainfalls_, 
                   marker = dict(color = '#f5f558', line = dict(color = '#f5f558', width = 1.2)))
area_layout = go.Layout(template = 'plotly_white', title = 'RAINFALLS IN EACH AREA IN INDIA',
                       xaxis = dict(title = 'Subdivision'), yaxis = dict(title = 'Rainfall'))
area_fig = go.Figure(data = [area_trace], layout = area_layout)
area_fig.show()

In [None]:
data.columns

### Rainfall over months from 2014 to 2017 in India

In [None]:
hist_df1 = data[data['YEAR'] == 2014]
hist_months = ['Jan-Feb','Mar-May', 'June-September', 'Oct-Dec']
values1 = []
for month in hist_months:
    value1 = hist_df1[month].sum()
    values1.append(value1)

hist_df2 = data[data['YEAR'] == 2015]
values2 = []
for month in hist_months:
    value2 = hist_df2[month].sum()
    values2.append(value2)

hist_df3 = data[data['YEAR'] == 2016]
values3 = []
for month in hist_months:
    value3 = hist_df3[month].sum()
    values3.append(value3)

hist_df4 = data[data['YEAR'] == 2017]
hist_months = ['Jan-Feb','Mar-May', 'June-September', 'Oct-Dec']
values4 = []
for month in hist_months:
    value4 = hist_df4[month].sum()
    values4.append(value4)


In [None]:
labels = hist_months
x = np.arange(len(labels))  # the label locations
width = 0.18  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x + width, values1, width, label='2014')
rects2 = ax.bar(x - width, values2, width, label='2015')
rects3 = ax.bar(x + width*2, values3, width, label='2016')
rects4 = ax.bar(x, values4, width, label='2017')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Rainfall', fontsize = 20)
ax.set_title('RAINFALL OVER MONTHS FROM 2014 TO 2017 IN INDIA', fontsize = 25)
ax.set_xticks(x)
ax.set_xticklabels(labels, fontsize = 20)
ax.legend()

fig.set_figheight(8)
fig.set_figwidth(15)

ax.bar_label(rects1, padding=2)
ax.bar_label(rects2, padding=2)
ax.bar_label(rects3, padding=2)
ax.bar_label(rects4, padding=2)

# fig.tight_layout()

plt.show()


### When did the rainfall reach highest?

In [None]:
area2017 = hist_df4[['SUBDIVISION', 'ANNUAL']]
area2017.head(10)

In [None]:
months = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
pie1 = []  #2014
pie2 = []  #2015
pie3 = []  #2016
pie4 = []  #2017
for month in months:
    x1 = hist_df1[month].sum()
    pie1.append(x1)
    x2 = hist_df2[month].sum()
    pie2.append(x2)
    x3 = hist_df3[month].sum()
    pie3.append(x3)
    x4 = hist_df4[month].sum()
    pie4.append(x4)

In [None]:
from matplotlib import cm
viridis = cm.get_cmap('viridis', 12)
magma = cm.get_cmap('magma', 12)
plasma = cm.get_cmap('plasma', 12)
cividis = cm.get_cmap('cividis', 12)

plt.figure(0)
plt.pie(x = pie1, labels = months, colors = magma(np.linspace(0, 1,12)), radius = 1.5)
plt.title('2014')

plt.figure(1)
plt.pie(x = pie2, labels = months, colors = viridis(np.linspace(0, 1,12)), radius = 1.5)
plt.title('2015')

plt.figure(2)
plt.pie(x = pie3, labels = months, colors = plasma(np.linspace(0, 1,12)), radius = 1.5)
plt.title('2016')

plt.figure(3)
plt.pie(x = pie4, labels = months, colors = cividis(np.linspace(0, 1,12)), radius = 1.5)
plt.title('2017')

plt.show()

In [None]:
data.columns

### We don't really have a specific task so I just do a little bit of analysis, may be it's not good enough for you guys to learn, but I hope it will give you some ideas about tasks and problem.
### This is a clear and great dataset, thanks for your contribution