# Exploring the dataset
<img src="https://www.travelmate.com.bd/wp-content/uploads/2018/09/Weather-Of-Bangladesh.jpg" alt="Weather" class="center">

In [None]:
import numpy as np
import pandas as pd
import os
import pandas_profiling
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio
%matplotlib inline
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

In [None]:
df = pd.read_csv('../input/bangladesh-weather-dataset/Temp_and_rain.csv')
df.head(5)

In [None]:
mapping = {1:"January",2:"February",3:"March",4:"April",5:"May",6:"June",7:"July",
           8:"August",9:"September",10:"October",11:"November",12:"December"}

df["month"] = df["Month"].map(mapping).astype(str)

In [None]:
mapping = {1 : "Winter",2:"Winter",12: "Winter",3:"Spring",4:"Spring",10:"Late autumn",
           11:"Late autumn",8:"Autumn",9:"Autumn",6:"Rainy",7:"Rainy",4:"Summer",5:"Summer"}

df["Season"] = df["Month"].map(mapping).astype(str)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
print("The dataset started from: ",df['Year'].min(),"\nThe dataset Ends at: ",df['Year'].max())

### Judging from the data, I think I can answer few questions, like:
- Temparature of Bangladesh
- Rainfall of Bangladesh
- Relation between Temparature and Rainfall

# Let's analyze the temparature

In [None]:
print("Highest Temparature: ",df['tem'].max())
print("Lowest Temparature: ",df['tem'].min())

**The max temp recorded was on May 1979**

**The min temp recorded was on Janurary 1978**

In [None]:
print(df[df['tem']==df['tem'].max()])
print(df[df['tem']==df['tem'].min()])

In [None]:
print("Total entries when temparature is above average: ",len(df[df['tem']>=df['tem'].mean()]))
print("Total entries when temparature is less than average: ",len(df)-len(df[df['tem']>=df['tem'].mean()]))

#### 'January', 'February', 'November', 'December' are the Cold
#### And 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October' are Warm

In [None]:
pio.templates.default = "plotly_dark"
fig = px.scatter(df, x = "month", y = "tem", color="tem", title = "Measurement of Temparature per Month")
fig.show()

In [None]:
pio.templates.default = "plotly_dark"
fig = px.scatter(df, x = "Season", y = "tem", color="tem", title = "Measurement of Temparature per Season")
fig.show()

## We can conclude that Bangladesh is a warm country 

In [None]:
pio.templates.default = "plotly_dark"
fig = px.bar(df[0:300],x='Year',y='tem',color="tem", title = "Measurement of Temparature throughout the year")
fig.show()

#### Max and Min Temperature per months

In [None]:
for i in range(1,13):
    print("Max value for month ",i," is: ",df[df['Month']==i]['tem'].max())
    print("Min value for month ",i," is: ",df[df['Month']==i]['tem'].min())
    print("____________________________________")

#### Max and Min Temperature per year

In [None]:
for i in range(1901,2016):
    print("Max value for Year ",i," is: ",df[df['Year']==i]['tem'].max())
    print("Min value for Year ",i," is: ",df[df['Year']==i]['tem'].min())
    print("____________________________________")

# Let's analyze the Rainfall data

**Highest rainfall: 1012 was recorded on August 2011**

**Lowest rainfall: 0 was on recorded December 1927**

In [None]:
print(df[df['rain']==df['rain'].max()][['rain','tem','month','Year',]])
print(df[df['rain']==df['rain'].min()][['rain','tem','month','Year',]])

In [None]:
print("Total entries when rainfall is above average: ",len(df[df['rain']>=df['rain'].mean()]))
print("Total entries when rainfall is less than average: ",len(df)-len(df[df['rain']>=df['rain'].mean()]))

### 'May', 'June', 'July','August', 'September' are the months when heavy rainfall takes place

In [None]:
pio.templates.default = "plotly_dark"
fig = px.scatter(df, x = "month", y = "rain", color="rain", title = "Measurement of Rain")
fig.show()

In [None]:
pio.templates.default = "plotly_dark"
fig = px.scatter(df, x = "Season", y = "rain", color="rain")
fig.show()

In [None]:
fig = px.pie(df,values="rain",names='month',title="Rainfall according to  Month")
fig.update_traces(textposition ='inside',textinfo='percent+label')
fig.show()

## Rainfall occurs throughout the year

In [None]:
pio.templates.default = "plotly_dark"
fig = px.bar(df[0:300],x='Year',y='rain',color="rain")
fig.show()

#### Max and Min Rainfall per months

In [None]:
for i in range(1,13):
    print("Max value for month ",i," is: ",df[df['Month']==i]['rain'].max())
    print("Min value for month ",i," is: ",df[df['Month']==i]['rain'].min())
    print("____________________________________")

#### Max and Min Rainfall per Year

In [None]:
for i in range(1901,2016):
    print("Max value for Year ",i," is: ",df[df['Year']==i]['rain'].max())
    print("Min value for Year ",i," is: ",df[df['Year']==i]['rain'].min())
    print("____________________________________")

# Corellation between rain and temparature

### Strong correlation between temperature and rain

In [None]:
corr = df.corr(method ='pearson')
fig = px.imshow(corr)
fig.show()

In [None]:
pio.templates.default = "plotly_dark"
fig = px.scatter(df, x = "month", y = "tem", color="rain", title="Temperature vs Rain per month")
fig.show()

In [None]:
pio.templates.default = "plotly_dark"
fig = px.scatter(df, x = "Season", y = "tem", color="rain", title="Temperature vs Rain per Season")
fig.show()

In [None]:
pio.templates.default = "plotly_dark"
fig = px.bar(df, x='month', y='tem', color="rain", title="Temperature vs Rain per Month (Barchart)")
fig.show()

In [None]:
fig = px.scatter(df,x='tem',y ='rain',color="month", title="Relation between Temperature and Rain")
fig.show()

In [None]:
fig = px.scatter(df,x='tem',y ='rain', color= 'tem',animation_frame ='Year',
                 size="rain",size_max = 55, title="Relation between Temperature and Rain per Year")
fig.show()