In [1]:
#import packages needed
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import quandl
import requests
from pandas.plotting import register_matplotlib_converters

from collections import defaultdict

import time
import itertools

In [2]:
#store the API_KEY
quandl.ApiConfig.api_key = "*"

In [3]:
API_KEY = "*"

### Define Functions needed

In [4]:
#define the function to get the data series we want
def get_econ_data(start_date, end_date, series_id):
    """Download the economic data from the quandl database and reshape it for further use with datetime index"""
    data = quandl.get_table('EDIA/ECD', date = { 'gte': start_date, 'lte': end_date}, series_id = series_id)
    #drop the column not needed and set the date as column
    data_reshaped = data[['date', 'country_code', 'indicator_code', 'value']].set_index(['date'])
    return data_reshaped

### Import the Data available for each country

In [5]:
#read in the dataframe with all the different indicators
overview = pd.read_csv('Data/Documentation/EDIA_ECD.csv', parse_dates = ['from_date', 'to_date'])

#calculate the timespan of the dataset
overview['timespan'] = overview['to_date'] - overview['from_date']

#get the index of the row with the biggest timespan
idx = overview.groupby(['country_code', 'indicator_code'])['timespan'].transform(max) == overview['timespan']

#filter the dataframe to only show the data series with the biggest timespan per indicator
overview = overview[idx]

#drop duplicates if country_code, indicator_code and timespan are the same
overview = overview.drop_duplicates(['country_code', 'indicator_code', 'timespan'])

#test it the filter was executed correctly
overview.head()

Unnamed: 0,country_code,indicator_code,identifier,series_id,description,from_date,to_date,timespan
0,AD,43,AD.043.A.01,718,Andorra: Corporate Tax Rate [AD: Corporate Tax...,2017-01-01,2020-01-01,1095 days
1,AD,66,AD.066.A.01,719,Andorra: Export Prices [Andorra: Export unit v...,2000-01-01,2018-01-01,6575 days
2,AD,67,AD.067.A.01,720,Andorra: Exports [Andorra: Merchandise exports],2014-01-01,2014-01-01,0 days
3,AD,81,AD.081.1.A.01,721,Andorra: GDP (USD) [Andorra: National accounts...,1970-01-01,2018-01-01,17532 days
5,AD,82,AD.082.A.01,723,Andorra: GDP Annual Growth Rate [Andorra: Annu...,1971-01-01,2018-01-01,17167 days


In [7]:
#export the indicator table as csv to use it again n the analysis
overview.to_csv('Data/Documentation/selected_indicators.csv', index = False)

In [6]:
#create a dictionary from with the key country_code and the series_id as values
countries_indicator = overview[['country_code', 'series_id']]

#store the unique countries
countries = countries_indicator['country_code'].unique()

#create a defaultdict to store the series_id under the
countries_data = defaultdict()

#store the series_ids under every country
for i in countries:
    countries_data[i] = countries_indicator[countries_indicator['country_code'] == i]['series_id'].tolist()

In [8]:
#initiate a defaultdict to store the dataframes in a dictionary to be accessible by the key(series_id)
dataframes = defaultdict()

#slice the dictionary in order to allow for batch request of the data
for key, value in dict(itertools.islice(countries_data.items(), 1, 10)).items():
    #create an empty dataframe
    econ_data = pd.DataFrame()
    for i in range(len(value)):
        #print the country key and the series_id to check until where the program was executed
        print('key: ', key, 'value: ', value[i])
        #import the economic data from a country in the requested time frame
        data = get_econ_data('1950-01-01', '2020-04-29', value[i])
        #store the economic data in the dataframe
        econ_data = econ_data.append(data)
        #take a random rest to avoid API request rejection
        time.sleep(np.random.randint(10, size=1))
    #assign the economic data under the respective country
    dataframes[key] = econ_data
    #store it as a csv to avoid redownloading it every time
    dataframes[key].to_csv('Data/Economic_Data/' + key + '.csv')
    #take a random rest to avoid API request rejection
    time.sleep(np.random.randint(25, size=1))

key:  ZA value:  37487
key:  ZA value:  37489
key:  ZA value:  37491
key:  ZA value:  37496
key:  ZA value:  37499
key:  ZA value:  37501
key:  ZA value:  37502
key:  ZA value:  37503
key:  ZA value:  37504
key:  ZA value:  37508
key:  ZA value:  37510
key:  ZA value:  37513
key:  ZA value:  37517
key:  ZA value:  37518
key:  ZA value:  37520
key:  ZA value:  37522
key:  ZA value:  37525
key:  ZA value:  37527
key:  ZA value:  37529
key:  ZA value:  37535
key:  ZA value:  37536
key:  ZA value:  37537
key:  ZA value:  37539
key:  ZA value:  37541
key:  ZA value:  37542
key:  ZA value:  37544
key:  ZA value:  37545
key:  ZA value:  37548
key:  ZA value:  37551
key:  ZA value:  37553
key:  ZA value:  37554
key:  ZA value:  37555
key:  ZA value:  37556
key:  ZA value:  37558
key:  ZA value:  37560
key:  ZA value:  37565
key:  ZA value:  37568
key:  ZA value:  37571
key:  ZA value:  37573
key:  ZA value:  37574
key:  ZA value:  37576
key:  ZA value:  37580
key:  ZA value:  37582
key:  ZA va

key:  ZW value:  45809
key:  ZW value:  45810
key:  ZW value:  45811
key:  ZW value:  45814
key:  ZW value:  45815
key:  ZW value:  45816
key:  ZW value:  45817
key:  ZW value:  45819
key:  ZW value:  45820
key:  ZW value:  45822
key:  ZW value:  45823
key:  ZW value:  45824
key:  ZW value:  45825
key:  ZW value:  45827
key:  ZW value:  45828
key:  ZW value:  45831
key:  ZW value:  45832
key:  ZW value:  45834
key:  ZW value:  45837
key:  ZW value:  45839
key:  ZW value:  45841
