## Unemployment Rate

In [5]:
import pandas as pd
import numpy as np

- **path_ue**: path of the unemployment rate file for an individual country (the data can only be downloaded one country at a time)
- **country_code**: country code used throughout (refer to README)
- **fred_code**: alphanumeric code given for each dataset, for each country by the FRED website

In [6]:
def clean_data(path_ue, country_code, fred_code):
    
    df_ue = pd.read_csv(path_ue)
    df_ue = df_ue.rename(columns={fred_code: f'{country_code}_UE', 'DATE': 'index'})

    df_ue['index'] = pd.to_datetime(df_ue['index'])

    df_ue['year'] = df_ue['index'].dt.year
    df_ue['month'] = df_ue['index'].dt.month

    df_ue[f'{country_code}_UE'] = df_ue[f'{country_code}_UE']/100

    return df_ue

In [60]:
def if_quarterly(df):

    """
    For some countries, the data frequency will not be consistent, in that case, use this function too. 
    """
    test = pd.read_csv('/Users/zumiis/final_not4git/interest rate/switzerland from fred.csv')
    test['DATE'] = pd.to_datetime(test['DATE'])
    test['month'] = test['DATE'].dt.month
    test['year'] = test['DATE'].dt.year
    test = test.rename(columns={'IRSTCI01CHM156N': 'CHF_IR', 'DATE': 'index'})

    merged_df = pd.merge(test, df, left_on=['index'], right_on=['index'], how='outer').interpolate()#.ffill().bfill()

    merged_df.drop(columns= ['CHF_IR', 'month_y', 'year_y'], inplace=True)
    merged_df.rename(columns={'month_x':'month', 'year_x':'year'}, inplace=True)

    return merged_df


In [8]:
def data_combine(path, df_ue):
    
    exchange_df = pd.read_csv(path) #still 4997rows
    print(exchange_df.shape)

    df_with_ue = pd.merge(exchange_df, df_ue, left_on=['month', 'year'], right_on=['month', 'year'])
    df_with_ue.drop(columns='index', inplace=True)

    return df_with_ue

In [9]:
def append_df(df, df_ue):
    
    df_ue.drop(columns='index', inplace=True)
    df = pd.merge(df, df_ue, left_on=['month', 'year'], right_on=['month', 'year'])

    return df

In [56]:
#replace country_code & fred_code values with values for the chosen unemployment file 

path_ue = '<path to unemployment file>'
country_code = 'USD'
fred_code = 'LRHUTTTTUSM156S'

In [57]:
df_ue = clean_data(path_ue, country_code, fred_code)
df_ue #if rows less than 240, use if_quarterly as well 

Unnamed: 0,index,USD_UE,year,month
0,2000-01-01,0.040,2000,1
1,2000-02-01,0.041,2000,2
2,2000-03-01,0.040,2000,3
3,2000-04-01,0.038,2000,4
4,2000-05-01,0.040,2000,5
...,...,...,...,...
235,2019-08-01,0.037,2019,8
236,2019-09-01,0.035,2019,9
237,2019-10-01,0.036,2019,10
238,2019-11-01,0.035,2019,11


In [25]:
df_ue = if_quarterly(df_ue)
df_ue #240 rows

Unnamed: 0,index,month,year,BRL_UE
0,2000-01-01,1,2000,0.137662
1,2000-02-01,2,2000,0.143241
2,2000-03-01,3,2000,0.134971
3,2000-04-01,4,2000,0.132854
4,2000-05-01,5,2000,0.132453
...,...,...,...,...
235,2019-08-01,8,2019,0.080067
236,2019-09-01,9,2019,0.080067
237,2019-10-01,10,2019,0.080067
238,2019-11-01,11,2019,0.080067


As the data can only be downloaded one country at a time, we have to manually merge the unemployment rate values into one dataframe. In order to do so, a copy of the first 'df_ue' is made and set aside as the dataframe (df) to which the other unemployment rate values will be appended to. From the second 'df_ue' onward, we will be appending the values to 'df' to get a dataset that only contains the unemployment rate values.

In [12]:
#use only for the first instance, comment out afterwards
df = df_ue.copy()

In [58]:
#start using from the second instance
df = append_df(df, df_ue)

In [59]:
df

Unnamed: 0,index,AUD_UE,year,month,NZD_UE,GBP_UE,BRL_UE,CND_UE,KRW_UE,MXN_UE,DKK_UE,JPY_UE,NOK_UE,SEK_UE,USD_UE
0,2000-01-01,0.067678,2000,1,0.065000,0.057,0.137662,0.068,0.051,0.023000,0.050,0.047,0.034,0.062,0.040
1,2000-02-01,0.066192,2000,2,0.064333,0.057,0.143241,0.069,0.048,0.021000,0.046,0.049,0.034,0.061,0.041
2,2000-03-01,0.065651,2000,3,0.063667,0.056,0.134971,0.069,0.045,0.022000,0.044,0.049,0.033,0.059,0.040
3,2000-04-01,0.063764,2000,4,0.063000,0.056,0.132854,0.067,0.044,0.027921,0.044,0.048,0.032,0.059,0.038
4,2000-05-01,0.064146,2000,5,0.062000,0.055,0.132453,0.066,0.043,0.027226,0.045,0.046,0.030,0.056,0.040
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,2019-08-01,0.052471,2019,8,0.041667,0.038,0.080067,0.057,0.031,0.035217,0.049,0.023,0.037,0.072,0.037
236,2019-09-01,0.052005,2019,9,0.041333,0.037,0.080067,0.055,0.034,0.035361,0.052,0.024,0.039,0.067,0.035
237,2019-10-01,0.052979,2019,10,0.041000,0.037,0.080067,0.056,0.035,0.035525,0.052,0.024,0.038,0.067,0.036
238,2019-11-01,0.051451,2019,11,0.041000,0.037,0.080067,0.059,0.036,0.034918,0.051,0.022,0.040,0.073,0.035


In [61]:
df.to_csv('<path to save unemployment rate dataset>', index=False)

- **new_path**: path of new main dataset 3, created from the 'load_data_trade' notebook

In [62]:
new_path = '<path of new main dataset 3>'

In [63]:
df_with_ue = data_combine(new_path, df)
print(df_with_ue.shape) #4997 rows
print(df_with_ue.isna().sum())

(4997, 69)
(4997, 81)
Time Series    0
AUD_USD        0
NZD_USD        0
GBP_USD        0
BRL_USD        0
              ..
DKK_UE         0
JPY_UE         0
NOK_UE         0
SEK_UE         0
USD_UE         0
Length: 81, dtype: int64


In [65]:
df_with_ue.to_csv('<path to save final dataset>', index=False)

In [64]:
#what the final dataset should look like 
df_with_ue

Unnamed: 0,Time Series,AUD_USD,NZD_USD,GBP_USD,BRL_USD,CND_USD,CNY_USD,IDR_USD,KRW_USD,MXN_USD,...,GBP_UE,BRL_UE,CND_UE,KRW_UE,MXN_UE,DKK_UE,JPY_UE,NOK_UE,SEK_UE,USD_UE
0,2000-01-03,1.5172,1.9033,0.6146,1.8050,1.4465,8.2798,43.55,1128.00,9.4015,...,0.057,0.137662,0.068,0.051,0.023000,0.050,0.047,0.034,0.062,0.040
1,2000-01-04,1.5239,1.9238,0.6109,1.8405,1.4518,8.2799,43.55,1122.50,9.4570,...,0.057,0.137662,0.068,0.051,0.023000,0.050,0.047,0.034,0.062,0.040
2,2000-01-05,1.5267,1.9339,0.6092,1.8560,1.4518,8.2798,43.55,1135.00,9.5350,...,0.057,0.137662,0.068,0.051,0.023000,0.050,0.047,0.034,0.062,0.040
3,2000-01-06,1.5291,1.9436,0.6070,1.8400,1.4571,8.2797,43.55,1146.50,9.5670,...,0.057,0.137662,0.068,0.051,0.023000,0.050,0.047,0.034,0.062,0.040
4,2000-01-07,1.5272,1.9380,0.6104,1.8310,1.4505,8.2794,43.55,1138.00,9.5200,...,0.057,0.137662,0.068,0.051,0.023000,0.050,0.047,0.034,0.062,0.040
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4992,2019-12-20,1.4489,1.5135,0.7671,4.0859,1.3170,7.0063,71.07,1160.30,18.9140,...,0.037,0.080067,0.056,0.037,0.032259,0.049,0.022,0.039,0.067,0.035
4993,2019-12-23,1.4457,1.5088,0.7742,4.0621,1.3167,7.0124,71.24,1163.64,18.9310,...,0.037,0.080067,0.056,0.037,0.032259,0.049,0.022,0.039,0.067,0.035
4994,2019-12-26,1.4411,1.5002,0.7688,4.0602,1.3124,6.9949,71.28,1161.18,18.9440,...,0.037,0.080067,0.056,0.037,0.032259,0.049,0.022,0.039,0.067,0.035
4995,2019-12-27,1.4331,1.4919,0.7639,4.0507,1.3073,6.9954,71.45,1160.87,18.8190,...,0.037,0.080067,0.056,0.037,0.032259,0.049,0.022,0.039,0.067,0.035
