# Task04: Analysis on Out_task00 data

__Goal__ : Which station reported the highest or lowest temperature each year??

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

__Read and clean data from task00__

In [None]:
df = pd.read_csv("./outputs/out_task00", sep="\t|,", engine="python", skipfooter=1)

In [None]:
df.columns = ['id', 'station_id', 'year', 'month', 'temp']
col_names = df.columns

for col in col_names:
    df[col] = df[col].map(lambda x: str(x).lstrip('[').rstrip(']')).astype(float)

df = df.astype({'id': int, 'station_id': int, 'year': int, 'month' : int})    

In [None]:
new_df = df[['id','year','temp']]
new_df.head()

__Check for any missing values__

In [None]:
new_df.isnull().values.any()

## Maximum temp

In [None]:
table_max = pd.pivot_table(new_df, values='temp', index=['id'],
                    columns=['year'], aggfunc=np.max, fill_value=0)
table_max

In [None]:
max_index = table_max.idxmax()
max_index_df = max_index.to_frame()

In [None]:
max_temp = table_max.max()
max_temp_df = max_temp.to_frame()

In [None]:
result_max = pd.concat([max_index_df, max_temp_df], axis=1)
result_max['Group'] = 'Max'
result_max.columns = ['id', 'temp', 'Group']
result_max

## Minimum temp

In [None]:
table_min = pd.pivot_table(new_df, values='temp', index=['id'],
                    columns=['year'], aggfunc=np.min, fill_value=0)
table_min

In [None]:
min_index = table_min.idxmin()
min_index_df = min_index.to_frame()

In [None]:
min_temp = table_min.min()
min_temp_df = min_temp.to_frame()

In [None]:
result_min = pd.concat([min_index_df, min_temp_df], axis=1)
result_min['Group'] = 'Min'
result_min.columns = ['id', 'temp', 'Group']
result_min

In [None]:
result_max = result_max.reset_index()
result_min = result_min.reset_index()

frames = [result_max, result_min]

final_result = pd.concat(frames, keys=['year','Group'])
final_result


In [None]:
final_result.dtypes

In [None]:
plt.figure(figsize = (50,8))
g = sns.catplot(x='Group', y='temp', col='year', 
                kind='bar', hue='id',
                col_wrap=4, data=final_result, height=5, palette = "Paired", dodge=False)

(g.set_axis_labels("", "Temp")
  .set_xticklabels(["Max", "Min"])
  .despine(left=True))  

plt.show()

plt.savefig('task04.png', dpi=300)
