In [None]:
# load libraries
import os
import json
import pandas as pd
import numpy as np
import plotly.express as px
from pycountry_convert import country_alpha2_to_country_name, country_name_to_country_alpha3

import warnings
warnings.filterwarnings('ignore')

In [None]:
# define a function to clean daily temperature data 
def clean_data(df):
    # rename the columns to get rid of unnecessary spaces in naming
    df.columns = df.columns.str.replace(' ','')
    # resclae the TG data
    df['TG'] = df['TG'] * 0.1
    # get rid of suspect and missing values 
    df = df[df['Q_TG'] == 0]
    
    return df

In [None]:
# load stations data and clean columns name
stations = pd.read_csv('../data/stations.txt',header=13,sep=',',index_col=0)
stations.columns = stations.columns.str.replace(' ','')

In [None]:
# get the daily temperature data file name
# Due to the size of the files, those were not upladed on github. You can download them from https://knmi-ecad-assets-prd.s3.amazonaws.com/download/ECA_blend_tg.zip  
files=os.listdir('../data/ECA_blend_tg/') 

In [None]:
# extraxt the yearly temperature datas from all the stations and put them in a new dataframe (it takes some minutes)
temp = pd.DataFrame(columns=['DATE','STAID', 'TEMP','STANAME', 'CN', 'LAT', 'LON', 'HGHT'])
for file in files:
    if file == '.DS_Store':
        continue
    temp_dum = pd.DataFrame(columns=['STAID', 'TEMP'])
    df = pd.read_csv(f'../data/ECA_blend_tg/{file}',
                         header=15,sep=',', index_col=2, parse_dates=True)
    df = clean_data(df)
       
    temp_dum['TEMP'] = df.groupby(df.index.year).mean()['TG']
    temp_dum['STAID'] = df.groupby(df.index.year).mean()['STAID']
    temp_dum.reset_index(inplace=True)
    temp_dum = pd.merge(temp_dum,stations,on='STAID')
    temp_dum.columns = temp_dum.columns.str.replace(' ','')
    temp = temp.append(temp_dum)
temp.rename(columns = {'DATE':'YEAR'}, inplace = True)

In [None]:
temp.head(2)

In [None]:
# save the yearly temperature datas into a csv file
temp.to_csv('../data/temp.csv')

In [None]:
temp = pd.read_csv('../data/temp.csv',index_col=0)
temp.head(2)

In [None]:
# filter the data regarding the YEAR greater than 1950
temp = temp[temp['YEAR'] > 1950]

In [None]:
# take average with respect to the countries and YEAR
temp = temp.groupby(['YEAR','CN']).mean()[['TEMP']].reset_index()

In [None]:
# map country alpha 2 code to alpha 3 and save them into CN3 column
temp['CN3'] = temp.CN.apply(lambda x: country_name_to_country_alpha3(country_alpha2_to_country_name(x)))

In [None]:
# create a choropleth map
fig = px.choropleth(temp,               
              locations="CN3",               
              color="TEMP",
              projection='orthographic',     
              scope='world',
              animation_frame="YEAR",
              animation_group='CN3',
              color_continuous_scale='sunset',
              title= 'mean temperature in european countries 1951-2022',
              range_color=(-10, 20),
              height=600             
)

In [None]:
# save choropleth map in an html file
fig.write_html("../figures/temp_europe_1951-2022.html", include_plotlyjs='cdn')