## Global Historical Climatology Network Dataset
Variables are stored in both rows and columns.
This dataset represents the daily weather records for a weather station (MX17004) in Mexico for five months in 2010.

In [None]:
import pandas as pd

In [None]:
weather_df = pd.read_csv('../weather-raw.csv')

weather_df

In [None]:
# Melt days (values) into column 'day' (variable)

weather_df = weather_df.melt(id_vars = ['id', 'year', 'month', 'element'], var_name = ['day'], value_name = "temp")

weather_df.head()

In [None]:
# Rows (values) 'tmax' + 'tmin' into columns (variables) 'tmax' + 'tmin'

weather_df = weather_df.pivot_table(index = ['id', 'year', 'month', 'day'], columns = 'element', values = 'temp')

weather_df.head()

In [None]:
# Reset indexes

weather_df = weather_df.reset_index()

weather_df.head()

In [None]:
# Rename index name 'element' -> 'id'
# Drop weather station name column (as it is always the same anyways)

weather_df.rename_axis('id', axis = 1, inplace = True)
weather_df = weather_df.drop('id', axis = 1)

weather_df.head()

In [None]:
# Merging year, month, day into year-month-day

# Stripping 'd' from day value
weather_df.update([{'day': day[1:]} for day in weather_df['day']])

# Changing data types of year, month, day to 'string'
weather_df = weather_df.astype({'year' : 'string', 'month': 'string', 'day' : 'string'})

# Concatenate year, month, day strings in new column 'date'
weather_df.insert(0, 'date', weather_df['year'] + '-' + weather_df['month'].str.zfill(2) + '-' + weather_df['day'].str.zfill(2))

# Drop old columns year, month, day
weather_df = weather_df.drop(['year', 'month', 'day'], axis = 1)

# Change date column data type to 'date'
weather_df['date'] = pd.to_datetime(weather_df['date']).dt.date

# Sort data frame by date
weather_df = weather_df.sort_values(by = ['date'])

# Reset index
weather_df = weather_df.reset_index(drop = True)

weather_df