In [None]:
# Import packeges from python lib
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) #ignore warnings

In [None]:
# Load CSV file 'weatherHistory.csv' using pandas
df_wehter_Data =  pd.read_csv('weatherHistory.csv')
df_wehter_Data.head()

In [None]:
# Checking for unique value in dataframe
df_wehter_Data.nunique()

In [None]:
# calculate null value in data
df_wehter_Data.isna().sum()

In [None]:
# Replace a null value with rain precip type
df_wehter_Data['Precip Type'].fillna("rain", inplace=True)
df_wehter_Data.isna().sum()

In [None]:
# Return datatype of each column
df_wehter_Data.dtypes

In [None]:
# change type of Formated date column in dataframe from object to datatime.
df_wehter_Data['FormattedDate'] = pd.to_datetime(df_wehter_Data.FormattedDate, format='%Y-%m-%d %H:%M:%S', errors='coerce', utc=True)
df_wehter_Data.dtypes

In [None]:
# Split year from datetime
df_wehter_Data['Year'] = df_wehter_Data['FormattedDate'].dt.year
grp_weather_data = df_wehter_Data.groupby('Year', as_index=False).mean() # group data by year calculate average
grp_weather_data.head(20)

In [None]:
# ploting a Temperature, Humidity and Apparent Temperature over last 10 year' 
plt.figure(figsize=(6,6))
plt.plot(grp_weather_data['Year'],grp_weather_data['TemperatureC'], label = 'Temperature (C)')
plt.plot(grp_weather_data['Year'],grp_weather_data['ApparentTemperatureC'], label = 'Apparent Temperature (C)')
plt.plot(grp_weather_data['Year'],grp_weather_data['Humidity'], label = 'Humidity')
plt.legend()
plt.title('The past 10 years, the average for temperature, humidity, and apparent temperature')
plt.xlabel('Years')
plt.ylabel('Average Temperature (C)')
plt.show()
plt.savefig('1_average_tempreture.jpg')

In [None]:
# Slice data between 2007 and 2010 years
specific_year_weather_type = df_wehter_Data.query('Year >= 2007 and Year <= 2010')

# Count Type of weather/Summary and reset index
count_weather_Type = specific_year_weather_type['Summary'].value_counts().reset_index()  

# indexing a new column name
count_weather_Type.columns = ['Type of Weather', 'Count']  
count_weather_Type.head(10)

In [None]:
# ploting a value in pie chart
plt.figure(figsize=(7,7)).set_facecolor('white')
plt.pie(count_weather_Type['Count'].head(5), labels=count_weather_Type['Type of Weather'].head(5), startangle=90, autopct='%1.1f%%', textprops={'fontweight':'normal','fontsize': 12,})
plt.legend(loc = 2,bbox_to_anchor=(1, 0, 0.4, 1))
plt.title('Different weather conditions between 2007 and 2010')
plt.xlabel('Summary/Type of Weather')
plt.show()
plt.savefig('2_Different_condition.jpg')

In [None]:
# seperate data by year 2012
df_year_2012_db = df_wehter_Data.query('Year == 2012')
df_year_2012_db['Months'] = df_year_2012_db['FormattedDate'].dt.month_name() # split a months from dateTime
df_year_2012_db.head()

In [None]:
# Group by months and calculate average / mean 
grp_by_Months = df_year_2012_db.groupby(['Months'], as_index = False).mean()
grp_by_Months.head(10)

In [None]:
# Convert a months, windSpeed, Visibility column to numpy array
months_arr = grp_by_Months['Months'].to_numpy()
windSpeed_arr = grp_by_Months['WindSpeed(km/h)'].to_numpy()
visiblity_arr = grp_by_Months['Visibility(km)'].to_numpy()

In [None]:
# Plot a value in bar graph
width = 0.35
n = len(months_arr)
x = np.arange(n)
plt.figure(figsize=(10,7.5))
ax = plt.subplot()
ax.bar(x, windSpeed_arr, width, label='WindSpeed (km/h)')
ax.bar(x + width, visiblity_arr, width, label='Visiblity (Km)')
plt.xticks(x + width / 2, months_arr)
plt.title('Windspeed and visibility averages by month in 2012.')
plt.ylabel('Average speed')
plt.xlabel('Months of 2012')
plt.legend()

#x axis label roation
plt.setp(plt.gca().get_xticklabels(), rotation = 45, horizontalalignment='right')
plt.show()
plt.savefig('3_average_wind_speed.jpg')

In [None]:
# Copy selected columns from df_weather_data datafrom for ploting histogram
df_wehter_Data_hist = df_wehter_Data[['TemperatureC', 'ApparentTemperatureC', 'Humidity', 'WindSpeed(km/h)','Wind Bearing(degrees)', 'Visibility(km)', 'Pressure(millibars)']]
df_wehter_Data_hist.head()

In [None]:
# Ploting a histogram
fig = plt.figure(figsize=(11,11))

#Create a column list to plot name and title for histogram
column_list = list(df_wehter_Data_hist.iloc[:0])
length = len(column_list)
i = 1

while i < length:
    plt.subplot(3,2,i)
    plt.hist(df_wehter_Data_hist[column_list[i]], density=True, bins=200, label=df_wehter_Data_hist[column_list[i]])
    plt.title(column_list[i])
    i+=1
plt.suptitle('Histogram for Weather condition between 2005 and 2016')
plt.legend()
plt.ylabel('Frequency')
plt.savefig('4_Histogram_weather.jpg')