### Import libraries.

In [None]:
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Display all rows of a dataframe.
pd.set_option('display.max_rows', 10)

### Prepare data for analysis.

In [None]:
# Load Seattle weather dataset.
weather = pd.read_csv('../input/did-it-rain-in-seattle-19482017/seattleWeather_1948-2017.csv')
weather

In [None]:
# Data basic information.
weather.info()

There are 3 missing values in "PRCP" and "RAIN" columns. Find out the missing values and fix them.

In [None]:
# Select rows with missing values.
display(weather[weather['RAIN'].isnull()])

In [None]:
# display nearby rows.
weather.iloc[[18413, 18414, 18417, 18418, 21065, 21066, 21068, 21069]]

In [None]:
# fill missing "PRCP" values with 0.0, fill missing "RAIN" values with False.
weather['PRCP'].fillna(0.0, inplace=True)
weather['RAIN'].fillna(False, inplace=True)
display(weather[weather['RAIN'].isnull()])

Now all the missing values have been filled. 

In [None]:
# Convert 'DATE' column to datetime type.
weather['DATE'] = pd.to_datetime(weather['DATE'])
weather.info()

### Select June weather data for analysis.

In [None]:
# Select June weather data.
weather_june = weather[weather['DATE'].dt.month == 6]
weather_june

In [None]:
# June data basic information.
display(weather_june.describe())
display(weather_june['RAIN'].value_counts(normalize=True))

For June from 1948 to 2017, about 30.1% are rainy days.  
### Plotting
How about every single day, take June 1st for example, how many June 1st are rainy days over the years?

In [None]:
# Plot rainy days by date in June from 1948 to 2017.
date = weather_june['DATE'].dt.strftime("%m-%d")
plt.figure(figsize=(15,8))
sns.countplot(data=weather_june, x=date, hue='RAIN')
plt.xticks(rotation=90)
plt.show()

Next, for June in every year, how many days are raining?

In [None]:
# Plot rainy days in June by year.
year = weather_june['DATE'].dt.year
plt.figure(figsize=(15,8))
fig = sns.countplot(data=weather_june, x=year, hue='RAIN')
fig.set_xlabel('YEAR')
plt.xticks(rotation=90)
plt.show()

In [None]:
# Histplot rainy days by year.
rainyday = weather_june[weather_june['RAIN']==True]
rainydays = rainyday['RAIN'].groupby(weather_june['DATE'].dt.year).value_counts()
fig = sns.histplot(rainydays, bins=range(2,17))
fig.set_xlabel('Rainy days')

The histagram shows that, normally in June, there will be at least 2 rainy days, and no more than 15 days.   
Mostly, rainy days will between 4 and 14.

Now, let's plot average daily "PRCP" changes over the years.

In [None]:
# Calculate average daily "PRCP" values for each June.
prcp_year = weather_june['PRCP'].groupby(weather_june['DATE'].dt.year).mean()
# Line plot the result.
year = weather_june["DATE"].dt.year.unique()
plt.figure(figsize=(15,8))
fig = sns.lineplot(x=year, y=prcp_year, marker="o")
fig.set_xlabel("Year")
plt.xticks(ticks=year, labels=year, rotation=90)
plt.show()

### Future work

**Rainfall intensity** is classified according to the rate of **precipitation**, which depends on the considered time.   
The following categories are used to classify rainfall intensity:

*Light rain* — when the precipitation rate is < 2.5 mm (0.098 in) per hour  
*Moderate rain* — when the precipitation rate is between 2.5 mm (0.098 in) - 7.6 mm (0.30 in) or 10 mm (0.39 in) per hour  ?
*Heavy rain* — when the precipitation rate is > 7.6 mm (0.30 in) per hour, or between 10 mm (0.39 in) and 50 mm (2.0 in) per hour  ?
*Violent rain* — when the precipitation rate is > 50 mm (2.0 in) per hour  ?

Finally, let's take a look at "TMAX" & "TMIN" changes over the years.

In [None]:
june_tmax = weather_june['TMAX'].groupby(weather_june['DATE'].dt.year).mean()
june_tmin = weather_june['TMIN'].groupby(weather_june['DATE'].dt.year).mean()
year = range(1948, 2018)
fig, ax = plt.subplots(figsize=(15,8))
fig = sns.lineplot(x=year, y=june_tmax, ax=ax, color='darkred', marker='o')
fig = sns.lineplot(x=year, y=june_tmin, ax=ax, color='darkblue', marker='o')
fig.set(xlabel='Year', ylabel='Temperature')
fig.legend(labels=['TMAX','TMIN'], labelcolor=['darkred', 'darkblue'])
plt.xticks(ticks=year, labels=year, rotation=90)
plt.show()