In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as py # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv("/kaggle/input/rainfall-in-pakistan/Rainfall_1901_2016_PAK.csv")
data.info()

In [None]:
data = data.rename({'Rainfall - (MM)':'rainfall', 
                    ' Year':'year', 'Month':'month'}, axis=1)
data.columns

In [None]:
data.head()

In [None]:
getmonths = data['month'].unique()
getyears = data['year'].unique()
print("Total Months: ", len(getmonths))
print("Total Years: ", len(getyears))
print("Maximum Rainfall: ", data['rainfall'].max() )
print("Minimum Rainfall: ", data['rainfall'].min() )

In [None]:
print("Average Rainfall of Year 1901: ",
data[data.year == 1901]["rainfall"].sum()/len(data[data.year == 1901]["rainfall"]) )

print("Mean Rainfall of Year 1901:    ",
      data[data.year == 1901]["rainfall"].mean() )


# Yearly Mean Rainfall

In [None]:
yearData = pd.DataFrame(columns = ["year", "meanRainfall"])
for x in getyears:
    avgRainfall = data[data.year == x]["rainfall"].mean()
    df = {"year":x, "meanRainfall":avgRainfall}
    yearData = yearData.append(pd.DataFrame([df]), ignore_index=True)
yearData.head()

# Monthly Mean Rainfall

In [None]:
monthData = pd.DataFrame(columns = ["month", "meanRainfall"])
for x in getmonths:
    avgRainfall = data[data.month == x]["rainfall"].mean()
    df = {"month":x, "meanRainfall":avgRainfall}
    monthData = monthData.append(pd.DataFrame([df]), ignore_index=True)
monthData

In [None]:
sns.set_style('whitegrid')
plt.subplots(figsize=(20,8))
plt.title("Monthly Average Rainfall", fontsize=40)
sns.barplot(x="month", y="meanRainfall", data=monthData)
plt.xticks(rotation = 45)
plt.xticks(fontsize=18)
plt.ylabel("Mean Rainfall",fontsize=20)
plt.xlabel("Months",fontsize=20)

In [None]:
sns.set_style('whitegrid')
plt.subplots(figsize=(20,8))
plt.title("Yearly Average Rainfall", fontsize=40)
plt.xticks(rotation = 90)
sns.barplot(x="year", y="meanRainfall", data=yearData)
plt.ylabel("Mean Rainfall",fontsize=20)
plt.xlabel("Years",fontsize=20)

# Last Five (5) Years

In [None]:
filterYear = yearData['year'].max()-16
last5year = yearData[yearData['year']>=filterYear]
last5year

In [None]:
sns.set_style('whitegrid')
plt.subplots(figsize=(20,8))
plt.title("Last 5-Years Rainfall", fontsize=40)
sns.barplot(x="year", y="meanRainfall", data=last5year)
plt.xticks(rotation = 45, fontsize = 18)
plt.xlabel("Years", fontsize = 20)
plt.yticks(fontsize = 18)
plt.ylabel("Average Rain Fall", fontsize = 20)

In [None]:
plt.subplots(figsize=(20,8))
ypoints = py.array(yearData['meanRainfall'])
plt.plot(ypoints, linestyle = 'solid')
plt.show()

In [None]:
plt.subplots(figsize=(20,8))
ypoints = py.array(monthData['meanRainfall'])
plt.plot(ypoints, linestyle = 'solid')
plt.show()

In [None]:
px.scatter(data.loc[(data.year >=2000)], x="year", 
           y="rainfall", animation_frame="year", animation_group="month",
           size="rainfall", color="month", hover_name="month", 
           title=' Rainfall in (mm) of each month from 2000 to 2016',
           log_x=True, size_max=100, range_x=[1998,2018], range_y=[0,155])

# **Neighbour Country Data - [INDIA]**

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
indData = pd.read_csv("/kaggle/input/rainfall-in-india/rainfall in india 1901-2015.csv")
indData.info()

In [None]:
indData = indData.rename({'SUBDIVISION':'subdivision', 'YEAR':'year', 'JAN':'jan', 
                    'FEB':'feb', 'MAR':'mar', 'APR':'apr', 'MAY':'may', 
                    'JUN':'jun', 'JUL':'jul', 'AUG':'aug', 'SEP':'sep', 
                    'OCT':'oct', 'NOV':'nov', 'DEC':'dec', 
                    'ANNUAL':'annual', 'Jan-Feb':'jan-feb', 
                    'Mar-May':'mar-may', 'Jun-Sep':'jun-sep', 
                    'Oct-Dec':'oct-dec'}, axis=1)
print("Columns Name: ", indData.columns)

getMonth = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 
            'aug', 'sep', 'oct', 'nov', 'dec']
print ("\n\nMonths: ", getMonth)

In [None]:
print("Total Number of Years: ", len(indData.year.unique()))
getYears = indData.year.sort_values().unique()
getYears

In [None]:
indRainfall = pd.DataFrame(columns = ["rainfall", "year", "month"])
for x in getYears:
    for y in getMonth:
        getRainfall = round(indData[(indData.year == x)][y].sum(), 5)
        df = {"rainfall":getRainfall, "year":x, "month":y}
        indRainfall = indRainfall.append(pd.DataFrame([df]), ignore_index=True)
print("Yearly Monthwise Rainfall in India")
indRainfall

In [None]:
monTranslate = {'jan':'January', 'feb':'February', 'mar':'March', 
                'apr':'April', 'may':'May', 'jun':'June', 'jul':'July', 
               'aug':'August', 'sep': 'September', 'oct':'October', 
               'nov':'November', 'dec':'December'}
indRainfall.month = indRainfall.month.replace(monTranslate)
indRainfall

In [None]:
indRainfall.year = indRainfall.year.astype(int)
data.year = data.year.astype(int)
indRainfall.info()

In [None]:
print("Pakistan Rainfall Data Excluding Year 2016 as Indian Rainfall Data")
pakData = data[data.year != 2016]
pakData

In [None]:
print("Indian Rainfall Data\n--------------------"
      "\nNumber of Unique Months: ", len(indRainfall.month.unique()), 
      "\nNumber of Unique Years: ", len(indRainfall.year.unique()) )

print("\n\n\nPakistan Rainfall Data\n--------------------"
      "\nNumber of Unique Months: ", len(pakData.month.unique()), 
      "\nNumber of Unique Years: ", len(pakData.year.unique()) )

In [None]:
print ("Pakistan Data Correlation\n--------------------------\n", 
       pakData.corr(),
      "\n\n\nIndian Data Correlation\n------------------------\n", 
       indRainfall.corr() )

In [None]:
#pakData.corrwith(indRainfall.rainfall)
pakData["rainfall"].corr(indRainfall.rainfall)

In [None]:
sns.heatmap(pakData.corr())
plt.show()

In [None]:
sns.heatmap(indRainfall.corr())
plt.show()

In [None]:
#indRainfall.corrwith(pakData.rainfall)
indRainfall["rainfall"].corr(pakData.rainfall)

In [None]:
plt.scatter(pakData['rainfall'], indRainfall['rainfall'])

In [None]:
sns.jointplot(x = "rainfall", y="year", data=pakData, kind = 'kde'),
sns.jointplot(x = "rainfall", y="year", data=indRainfall, kind = 'kde')

In [None]:
pak = sns.pairplot(pakData, diag_kind="kde")
pak.map_lower(sns.kdeplot, levels=4, color=".2")

ind = sns.pairplot(indRainfall, diag_kind="kde")
ind.map_lower(sns.kdeplot, levels=4, color=".2")

# **Joining 2 Datasets**

In [None]:
joinData = pd.merge(pakData, indRainfall, how='outer', on=['year', 'month'])
joinData

In [None]:
jData = sns.pairplot(joinData, diag_kind="kde")
jData.map_lower(sns.kdeplot, levels=4, color=".2")

In [None]:
joinData_correlation = joinData.corr()
sns.heatmap(joinData_correlation)
plt.show()

**Like -> Upvote ---> Motivation**

Student of Data Science/Analysis try python code for learning, need all of your guys support and Appriciation with lots of love, looking for your guidance, Like, UPVOTES for Motivation. Previously Notebook comments and like/Upvores highly appriciated and motivated. Thanks with lots of LOVE :)

Please visit also

Uber Drives - EDA & Viusalization
https://www.kaggle.com/methoomirza/uber-drives-eda-viusalization

Pakistan-Temperature-Visualization
https://www.kaggle.com/methoomirza/pakistan-temperature-visualization

Data Cleaning & Visualization
https://www.kaggle.com/methoomirza/data-cleaning-visualization

Pakistan COVID19 - EDA & Visuallization of CoronaVirus
https://www.kaggle.com/methoomirza/covid19-datavisualization-pakistan-till-june2020

cities Visualization
https://www.kaggle.com/methoomirza/cities-visualization

cleaning of cities name
https://www.kaggle.com/methoomirza/cleaning-of-cities-name

variant_name_of_pakistan
https://www.kaggle.com/methoomirza/variant-name-of-pakistan

Top10-Books
https://www.kaggle.com/methoomirza/top10-books

Gufhtugu-Analysis
https://www.kaggle.com/methoomirza/gufhtugu-analysis


