In [36]:
import requests
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from data_management import load_tsv
from data_utils import string_encoding_to_datetime, select_by_dict, get_df_with_split_time, print_unique_values
import datetime

In [23]:
iso3_code_to_country = {}
iso3_country_to_code = {}
for index, row in pd.read_csv("data_store/ISO3.csv").iterrows():
    country = row["Country"]
    code = row['Alpha-3 code']
    
    # Exceptions in naming
    if country == 'United Kingdom of Great Britain and Northern Ireland (the)':
        country = "United Kingdom"
    if country == "Germany":
        country = "Germany (until 1990 former territory of the FRG)"
    if country == 'Netherlands (the)':
        country = "Netherlands"
    if country == "Republic of North Macedonia":
        country = "North Macedonia"
    
    iso3_code_to_country[code] = country
    iso3_country_to_code[country] = code
iso3_country_to_code

{'Afghanistan': 'AFG',
 'Albania': 'ALB',
 'Algeria': 'DZA',
 'American Samoa': 'ASM',
 'Andorra': 'AND',
 'Angola': 'AGO',
 'Anguilla': 'AIA',
 'Antarctica': 'ATA',
 'Antigua and Barbuda': 'ATG',
 'Argentina': 'ARG',
 'Armenia': 'ARM',
 'Aruba': 'ABW',
 'Australia': 'AUS',
 'Austria': 'AUT',
 'Azerbaijan': 'AZE',
 'Bahamas (the)': 'BHS',
 'Bahrain': 'BHR',
 'Bangladesh': 'BGD',
 'Barbados': 'BRB',
 'Belarus': 'BLR',
 'Belgium': 'BEL',
 'Belize': 'BLZ',
 'Benin': 'BEN',
 'Bermuda': 'BMU',
 'Bhutan': 'BTN',
 'Bolivia (Plurinational State of)': 'BOL',
 'Bonaire, Sint Eustatius and Saba': 'BES',
 'Bosnia and Herzegovina': 'BIH',
 'Botswana': 'BWA',
 'Bouvet Island': 'BVT',
 'Brazil': 'BRA',
 'British Indian Ocean Territory (the)': 'IOT',
 'Brunei Darussalam': 'BRN',
 'Bulgaria': 'BGR',
 'Burkina Faso': 'BFA',
 'Burundi': 'BDI',
 'Cabo Verde': 'CPV',
 'Cambodia': 'KHM',
 'Cameroon': 'CMR',
 'Canada': 'CAN',
 'Cayman Islands (the)': 'CYM',
 'Central African Republic (the)': 'CAF',
 'Chad': 

In [27]:
nights_spent = load_tsv("tour_occ_nim.tsv", decode=True)
countries = nights_spent["geo"].unique()
for country in countries:
    if country not in iso3_country_to_code:
        print("Warning: {0} discarded".format(country))
countries = [c for c in countries if c in iso3_country_to_code]



In [85]:
# Reference https://datahelpdesk.worldbank.org/knowledgebase/articles/902061-climate-data-api
# Format: http://climatedataapi.worldbank.org/climateweb/rest/v1/country/type/var/start/end/ISO3[.ext]
def get_data(field):
    country_to_data = {}
    for country in countries:
        URL = "http://climatedataapi.worldbank.org/climateweb/rest/v1/country/mavg/{0}/1980/1999/{1}".format(
            field,
            iso3_country_to_code[country]
        )
        data = requests.get(url = URL).json()
        monthVals = [el['monthVals'] for el in data]
        monthVals = pd.DataFrame(monthVals).sum().values / pd.DataFrame(monthVals).shape[0]
        country_to_data[country] = monthVals
    
    country_to_data_df = pd.DataFrame(country_to_data, index = range(1, 12+1))
    country_to_data_df = country_to_data_df.rename_axis('month').reset_index().melt(id_vars="month", var_name="country", value_name=field)
    return country_to_data_df

tas_df = get_data("tas")
pr_df = get_data("pr")

In [86]:
tas_df

Unnamed: 0,month,country,tas
0,1,Albania,4.737906
1,2,Albania,5.296609
2,3,Albania,7.577846
3,4,Albania,10.959544
4,5,Albania,15.309108
...,...,...,...
427,8,United Kingdom,14.483837
428,9,United Kingdom,12.569213
429,10,United Kingdom,9.829463
430,11,United Kingdom,7.253223


In [87]:
full_df = pd.merge(tas_df, pr_df, on=["month", "country"], how="inner")
full_df

Unnamed: 0,month,country,tas,pr
0,1,Albania,4.737906,82.500861
1,2,Albania,5.296609,65.376423
2,3,Albania,7.577846,64.389188
3,4,Albania,10.959544,56.989191
4,5,Albania,15.309108,51.310747
...,...,...,...,...
427,8,United Kingdom,14.483837,66.207411
428,9,United Kingdom,12.569213,78.247843
429,10,United Kingdom,9.829463,104.415156
430,11,United Kingdom,7.253223,110.589771


In [88]:
full_df[full_df["country"] == "Spain"]

Unnamed: 0,month,country,tas,pr
132,1,Spain,9.765195,68.057598
133,2,Spain,10.102067,53.024258
134,3,Spain,11.476593,54.440702
135,4,Spain,12.942443,48.518018
136,5,Spain,15.344139,41.818017
137,6,Spain,18.876467,29.969587
138,7,Spain,21.763574,21.633093
139,8,Spain,21.874154,22.739978
140,9,Spain,19.729503,30.99613
141,10,Spain,16.169937,50.474815


In [93]:
all_months_to_month = [
    ("{0}M{1:02}".format(year, month), month) for year in range(1970, 2020) for month in range(1, 13)
]
all_months_to_month

[('1970M01', 1),
 ('1970M02', 2),
 ('1970M03', 3),
 ('1970M04', 4),
 ('1970M05', 5),
 ('1970M06', 6),
 ('1970M07', 7),
 ('1970M08', 8),
 ('1970M09', 9),
 ('1970M10', 10),
 ('1970M11', 11),
 ('1970M12', 12),
 ('1971M01', 1),
 ('1971M02', 2),
 ('1971M03', 3),
 ('1971M04', 4),
 ('1971M05', 5),
 ('1971M06', 6),
 ('1971M07', 7),
 ('1971M08', 8),
 ('1971M09', 9),
 ('1971M10', 10),
 ('1971M11', 11),
 ('1971M12', 12),
 ('1972M01', 1),
 ('1972M02', 2),
 ('1972M03', 3),
 ('1972M04', 4),
 ('1972M05', 5),
 ('1972M06', 6),
 ('1972M07', 7),
 ('1972M08', 8),
 ('1972M09', 9),
 ('1972M10', 10),
 ('1972M11', 11),
 ('1972M12', 12),
 ('1973M01', 1),
 ('1973M02', 2),
 ('1973M03', 3),
 ('1973M04', 4),
 ('1973M05', 5),
 ('1973M06', 6),
 ('1973M07', 7),
 ('1973M08', 8),
 ('1973M09', 9),
 ('1973M10', 10),
 ('1973M11', 11),
 ('1973M12', 12),
 ('1974M01', 1),
 ('1974M02', 2),
 ('1974M03', 3),
 ('1974M04', 4),
 ('1974M05', 5),
 ('1974M06', 6),
 ('1974M07', 7),
 ('1974M08', 8),
 ('1974M09', 9),
 ('1974M10', 10),
 

Unnamed: 0,time,month
0,1970M01,1
1,1970M02,2
2,1970M03,3
3,1970M04,4
4,1970M05,5
...,...,...
595,2019M08,8
596,2019M09,9
597,2019M10,10
598,2019M11,11


In [101]:
extrapolated_df = pd.merge(pd.DataFrame(all_months_to_month, columns=["time", "month"]), full_df, on="month", how="inner")
extrapolated_df = extrapolated_df.sort_values(["country", "time"])
extrapolated_df

Unnamed: 0,time,month,country,tas,pr
0,1970M01,1,Albania,4.737906,82.500861
1800,1970M02,2,Albania,5.296609,65.376423
3600,1970M03,3,Albania,7.577846,64.389188
5400,1970M04,4,Albania,10.959544,56.989191
7200,1970M05,5,Albania,15.309108,51.310747
...,...,...,...,...,...
14399,2019M08,8,United Kingdom,14.483837,66.207411
16199,2019M09,9,United Kingdom,12.569213,78.247843
17999,2019M10,10,United Kingdom,9.829463,104.415156
19799,2019M11,11,United Kingdom,7.253223,110.589771


In [102]:
extrapolated_df.to_csv("data_store/climate.tsv", sep='\t')