In [1]:
import pandas as pd
import numpy as np
import json
import geojson
from translate import Translator

In [2]:
#################################################################################################################
#Clean all country climate date 1961 - 2019

In [17]:
#import climate change data
df = pd.read_csv('./raw/climate_change_orig.csv')

#delete not used columns
df.drop(['Domain Code', 'Area Code', 'Domain', 'Element Code', 'Element', 'Months Code', 'Months', 'Year Code', 'Unit', 'Flag', 'Flag Description'], axis=1, inplace=True)

#rename columns
df.columns = ['country_eng', 'year', 'value']

df.head()

Unnamed: 0,country_eng,year,value
0,Afghanistan,1961,-0.08
1,Afghanistan,1962,-0.12
2,Afghanistan,1963,0.882
3,Afghanistan,1964,-0.727
4,Afghanistan,1965,-0.2


In [18]:
#import country codes for mapping
df_country_code = pd.read_csv('./raw/country_codes.CSV', index_col=5)

#delete not used columns
df_country_code.drop(['LANG', 'LANG_NAME', 'COUNTRY_ALPHA3_CODE', 'COUNTRY_NUMERIC_CODE'], axis=1, inplace=True)

#create dict: (<countryname>: <countycode>)
country_dict = df_country_code.to_dict('dict')


In [19]:
def get_country_code(country_name, country_dict):
    """returns the country code of specific country
    
        Parameters:
        country_name: Name of the country in english (String)
        country_dict: Dictionary with country - country code mapping (Dictionary)

        Returns: country code if found, otherwise empty string (String)

       """    

    #handling countries with different names
    if (country_name == "Norway"):
        return "no"
    if (country_name == "France"):
        return "fx"
    if (country_name == "Democratic People's Republic of Korea"):
        return "kp"
    if (country_name == "Republic of Korea"):
        return "kr"
    if (country_name == "French Southern and Antarctic Territories"):
        return "tf"
    if (country_name == "Ethiopia PDR"):
        return "et"
    if (country_name == "Midway Island"):
        return "mi"
    if (country_name == "Netherlands Antilles (former)"):
        return "an"
    if (country_name == "Pacific Islands Trust Territory"):
        return "pc"
    if (country_name == "Pitcairn Islands"):
        return "pn"
    if (country_name == "Republic of Moldova"):
        return "md"
    if (country_name == "Serbia and Montenegro"):
        return "cs"
    if (country_name == "Sudan (former)"):
        return "sd"
    if (country_name == "Svalbard and Jan Mayen Islands"):
        return "sj"
    if (country_name == "United Republic of Tanzania"):
        return "tz"
    if (country_name == "United States Virgin Islands"):
        return "vi"
    if (country_name == "Wake Island"):
        return "wk"
    if (country_name == "Wallis and Futuna Islandss"):
        return "wf"
    if (country_name == "Yugoslav SFR"):
        return "yu"
    if (country_name == "Belgium-Luxembourg"):
        return "be"
    if (country_name == "Yugoslav SFR"):
        return "yu"
    if (country_name == "Channel Islands"):
        return "ky"
    if (country_name == "China, Hong Kong SAR"):
        return "hk"
    if (country_name == "China, Macao SAR"):
        return "mo"
    if (country_name == "China, mainland"):
        return "cn"
    if (country_name == "China, Taiwan Province of"):
        return "tw"
    if (country_name == "Wallis and Futuna Islands"):
        return "wf"
    if (country_name == "Kosovo"):
        return "xk"
    if (country_name == "Namibia"):
        return "na"
    if (country_name == "Somaliland"):
        return "so"


    for key, value in country_dict.get('COUNTRY_ALPHA2_CODE').items():
        if key == country_name:
            return str(value).lower()


    return "NO_CC"

In [20]:
#mapping (country -> country code) & add to dataframe
df['country_code'] = df['country_eng'].apply(lambda x: get_country_code(x, country_dict))

In [21]:
#change data that are not consistent and lead to strange/wrong linegraph line.

#China and China, mainland have same cc -> China, mainland not used
df = df.drop(df[df.country_eng == "China, mainland"].index)

#duplicated entry for sudan in year 2011
df = df.drop(df[(df.country_eng == "Sudan (former)") & (df.year == 2011)].index)


#south sudan add mission years as nan
#N = 50
#startyear = 1961
#for i in range(50):
    #df = df.append({'country_eng': 'South Sudan', 'year' : startyear + i, 'value': np.nan, 'country_code': 'ss'},ignore_index=True)

In [22]:
#countries which doensnt exist since 1961 -> add data from former country

#south sudan -> independent from sudan since 2011
df_ss = df[df.country_eng == 'Sudan (former)']
df_ss.drop(['country_code', 'country_eng'], 1, inplace=True)
df_ss['country_code'] = "ss"
df_ss['country_eng'] = "South Sudan"
df = df.append(df_ss, ignore_index = True)

#Czech Republic and Slovakia -> old czechoslovakia since 1993
df_sk = df[df.country_eng == 'Czechoslovakia']
df_sk.drop(['country_code', 'country_eng'], 1, inplace=True)
df_sk['country_code'] = "sk"
df_sk['country_eng'] = "Slovakia"
df = df.append(df_sk, ignore_index = True)

df_cz = df[df.country_eng == 'Czechoslovakia']
df_cz.drop(['country_code', 'country_eng'], 1, inplace=True)
df_cz['country_code'] = "cz"
df_cz['country_eng'] = "Czechia"
df = df.append(df_cz, ignore_index = True)

#old soviet union countries -> independent from ussr since 1992
df_ussr = df[df.country_eng == 'USSR']
df_ussr.drop(['country_code', 'country_eng'], 1, inplace=True)

#dict with present-day countries and countrycode
df_old_ussr_dict = {
    "Russian Federation": "ru",
    "Georgia": "ge",
    "Ukraine": "ua",
    "Moldova": "md",
    "Belarus": "by",
    "Armenia": "am",
    "Azerbaijan": "az",
    "Kazakhstan": "kz",
    "Uzbekistan": "uz",
    "Turkmenistan": "tm",
    "Kyrgyzstan": "kg",
    "Estonia": "ee",
    "Latvia": "lv",
    "Lithuania": "lt"
}

for key, value in df_old_ussr_dict.items():
    df_temp = df_ussr
    df_temp['country_code'] = value
    df_temp['country_eng'] = key
    df = df.append(df_temp, ignore_index = True)


In [23]:
#sort df by cc and years
df = df.sort_values(by=['country_code', 'year'])
df = df.reset_index(drop=True)

In [34]:
#save as a new csv file
df.to_csv("./climate_change_cleaned.csv")

In [17]:
#################################################################################################################
#Add temperature change value to geojsonOutput: climate_change_cleaned.csv
#################################################################################################################

In [7]:
#open geojson default db
with open('./raw/ne_110m_admin_0_countries.geojson', 'r') as f:
    geodata = geojson.load(f)

In [8]:
#set missing country code from norway & france 
for feat in geodata['features']:
    if feat['properties']['ADMIN'] == "France":
        feat['properties']['ISO_A2'] = "fx"

    if feat['properties']['ADMIN'] == "Norway":
        feat['properties']['ISO_A2'] = "no"

In [9]:
def get_temperature(country_code, df):
    """returns the temperature of specific country with country code
    
        Parameters:
        country_code: ISO alpha 2 Country Code (String)
        df: dataframe with country code and temperature values (Dataframe)

        Returns: temperature value if found, otherwise 999 (Str)

       """  

    for index, row in df.iterrows():
        if row['country_code'].lower() in country_code.lower() and row['year'] == 2019:
            return str(row['value'])
    return "NO_DATA"

In [10]:
#add temp value to geojson properties
for feat in geodata['features']:
    feat['properties']['TEMP'] = get_temperature(feat['properties']['ISO_A2'], df)

In [13]:
#No climate data: kosovo, Burundi, Falkland Islands, Rwanda, Yemen

In [14]:
#Create translator instance for de, fr & it
translator_de = Translator(to_lang='de')
translator_fr = Translator(to_lang='fr')
translator_it = Translator(to_lang='it')


In [15]:
def get_translation(country_name, translator):
    """returns the the translated country
    
        Parameters:
        country_name: Country to be translated in ENGLISH
        translator: instance of translator in specific language

        Returns: translated country (Str)

       """  
    #handling specific tranlsations
    if (country_name == "United States of America" and translator == translator_it):
        return "Stati Uniti d'America";
    
    if (country_name == "Iran" and translator == translator_it):
        return "Iran";
    
    if (country_name == "Czechia" and translator == translator_it):
        return "Cechia";
    
    return translator.translate(country_name)

In [16]:
#add translations to geojson properties
for feat in geodata['features']:
    feat['properties']['ADMIN_DE'] = get_translation(feat['properties']['ADMIN'], translator_de)
    feat['properties']['ADMIN_FR'] = get_translation(feat['properties']['ADMIN'], translator_fr)
    feat['properties']['ADMIN_IT'] = get_translation(feat['properties']['ADMIN'], translator_it)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176


In [18]:
#save geojson with added temp valu and tranlsations
with open('./geojson_temp_translations.geojson', 'w') as f:
    geojson.dump(geodata, f)

In [None]:
#################################################################################################################
    #Clean global temperature median 1961 - 2019
    #Output: climate_change_global_cleaned.csv
#################################################################################################################

In [6]:
#import only used data
df_global = pd.read_csv('./raw/climate_change_global.csv', usecols=["Year", "Value"])

#rename columns
df_global.columns = ['year', 'value']

df_global.head()

#save df_global to csv
df_global.to_csv('./climate_change_global_cleaned.csv')