In [116]:
import numpy as np
import pandas as pd

## Load and format the temperature anomaly data

In [117]:
path_raw_temp_data = 'data/raw/temperature/'
path_processed_temp_data = 'data/processed/temperature/'

In [118]:
path_countries_land_files = 'countries-land/'
path_countries_list_file = 'temp-countries-list.csv'

In [119]:
# Todo: remove loading temp_countries from processed data when combining jupyter notebooks
temp_countries: pd.DataFrame = pd.read_csv(path_processed_temp_data+path_countries_list_file,sep=',')
print(temp_countries)

            Country ISO-alpha3
0       Afghanistan        AFG
1     Åland Islands        ALA
2           Albania        ALB
3           Algeria        DZA
4    American Samoa        ASM
..              ...        ...
228  Virgin Islands        VIR
229  Western Sahara        ESH
230           Yemen        YEM
231          Zambia        ZMB
232        Zimbabwe        ZWE

[233 rows x 2 columns]


### Load temp-land-country data

In [120]:
column_names = ["country_code", "country_name", "year", "month", "monthly_anomaly", "monthly_unc", "annual_anomaly", "annual_unc", "five_year_anomaly", "five_year_unc", "ten_year_anomaly", "ten_year_unc", "twenty_year_anomaly", "twenty_year_unc"]

temp_land_country = pd.DataFrame(columns=column_names)

country_names = temp_countries['Country']

# Todo when combining use the dictionary in the data-integration notebook
new_country_names = {
    "Denmark (Europe)": "Denmark",
    "France (Europe)": "France",
    "Netherlands (Europe)": "Netherlands",
    "United Kingdom (Europe)": "United Kingdom",
    "Åland": "Åland Islands",
    "Czech Republic": "Czechia",
    "Turkey": "Türkiye",
    "Svalbard and Jan Mayen": "Svalbard and Jan Mayen Islands",
    "Cape Verde": "Cabo Verde",
    "Turks and Caicas Islands": "Turks and Caicos Islands",
    "Swaziland": "Eswatini",
    "Macedonia": "North Macedonia",
    "Côte d'Ivoire": "Côte d’Ivoire",
    "Federated States of Micronesia": "Micronesia (Federated States of)",
    "South Georgia and the South Sandwich Isla": "South Georgia and the South Sandwich Islands",
    "Bonaire, Saint Eustatius and Saba": "Bonaire, Sint Eustatius and Saba",
    "Congo (Democratic Republic of the)": "Democratic Republic of the Congo",
    "South Korea": "Korea, South",
    "North Korea": "Korea, North",
    "Palestina": "State of Palestine"
}

def swap_keys_values(d):
    return {v: k for k, v in d.items()}

map_country_to_filename = swap_keys_values(new_country_names)

# Todo shorten list of countries during development, change later before running the final script
country_names = country_names[0:10]


for country_name in country_names:
    country_file_name = country_name
    if country_name in map_country_to_filename:
        country_file_name = map_country_to_filename[country_name]
    path = path_raw_temp_data + path_countries_land_files + country_file_name + '.txt'
    one_country = pd.read_csv(path, comment="%", header=None, delim_whitespace=True)
    iso_name = temp_countries.loc[temp_countries['Country'] == country_name]['ISO-alpha3'].iloc[0]

    one_country.insert(0, 'country_code', iso_name)
    one_country.insert(1, 'country_name', country_name)

    one_country.columns=column_names

    temp_land_country = pd.concat([temp_land_country, one_country])

temp_land_country.head()

Unnamed: 0,country_code,country_name,year,month,monthly_anomaly,monthly_unc,annual_anomaly,annual_unc,five_year_anomaly,five_year_unc,ten_year_anomaly,ten_year_unc,twenty_year_anomaly,twenty_year_unc
0,AFG,Afghanistan,1848,5,-0.297,2.037,,,,,,,,
1,AFG,Afghanistan,1848,6,-0.796,2.136,,,,,,,,
2,AFG,Afghanistan,1848,7,-0.113,1.937,-0.777,0.639,,,,,,
3,AFG,Afghanistan,1848,8,-0.462,1.937,-0.743,0.644,,,,,,
4,AFG,Afghanistan,1848,9,-1.272,1.865,-0.676,0.669,,,,,,


### Load temp-land-region data

The region datasets get created from the temp-and-country dataset.

In [121]:
# Todo: Calculate regional anomalies

# Todo: compare to the original regional data

### Load temp-land-ocean-global data

In [122]:
path_global_land_ocean_file = 'global-land-ocean.txt'

In [123]:
temp_land_ocean_global = pd.read_csv(path_raw_temp_data + path_global_land_ocean_file, comment="%", header=None, delim_whitespace=True)
temp_land_ocean_global.columns = ["year", "annual_anomaly", "annual_unc", "five_year_anomaly", "five_year_unc", "annual_anomaly", "annual_unc", "five_year_anomaly", "five_year_unc"]

In [124]:
temp_land_ocean_global.head(3)

Unnamed: 0,year,annual_anomaly,annual_unc,five_year_anomaly,five_year_unc,annual_anomaly.1,annual_unc.1,five_year_anomaly.1,five_year_unc.1
0,1850,-0.441,0.179,,,-0.423,0.162,,
1,1851,-0.327,0.169,,,-0.324,0.154,,
2,1852,-0.3,0.17,-0.334,0.149,-0.286,0.154,-0.323,0.134


### Format the data

In [125]:
def cut_before_1900(temp_data):
    return temp_data[temp_data['year'] >= 1900]

In [126]:
temp_land_country = cut_before_1900(temp_land_country)
#temp_land_region = cut_before_1900(temp_land_region)
temp_land_ocean_global = cut_before_1900(temp_land_ocean_global)

### Save the data as csv files

In [127]:
temp_land_country.to_csv(path_processed_temp_data+'temp-land-country.csv',index=False)
#temp_land_region.to_csv(path_processed_temp_data+'temp-land-region.csv',index=False)
temp_land_ocean_global.to_csv(path_processed_temp_data+'temp-land-ocean-global.csv',index=False)