# Basic Model

This notebook is for highlighting the initial data transformations, and running the data through a baseline model

In [19]:
import pandas as pd
import numpy as np

In [35]:
df = pd.read_csv('data/Foreign_Exchange_Rates.csv', index_col=0, na_values=['ND'])
df.head()

Unnamed: 0,Time Serie,AUSTRALIA - AUSTRALIAN DOLLAR/US$,EURO AREA - EURO/US$,NEW ZEALAND - NEW ZELAND DOLLAR/US$,UNITED KINGDOM - UNITED KINGDOM POUND/US$,BRAZIL - REAL/US$,CANADA - CANADIAN DOLLAR/US$,CHINA - YUAN/US$,HONG KONG - HONG KONG DOLLAR/US$,INDIA - INDIAN RUPEE/US$,...,SINGAPORE - SINGAPORE DOLLAR/US$,DENMARK - DANISH KRONE/US$,JAPAN - YEN/US$,MALAYSIA - RINGGIT/US$,NORWAY - NORWEGIAN KRONE/US$,SWEDEN - KRONA/US$,SRI LANKA - SRI LANKAN RUPEE/US$,SWITZERLAND - FRANC/US$,TAIWAN - NEW TAIWAN DOLLAR/US$,THAILAND - BAHT/US$
0,2000-01-03,1.5172,0.9847,1.9033,0.6146,1.805,1.4465,8.2798,7.7765,43.55,...,1.6563,7.329,101.7,3.8,7.964,8.443,72.3,1.5808,31.38,36.97
1,2000-01-04,1.5239,0.97,1.9238,0.6109,1.8405,1.4518,8.2799,7.7775,43.55,...,1.6535,7.218,103.09,3.8,7.934,8.36,72.65,1.5565,30.6,37.13
2,2000-01-05,1.5267,0.9676,1.9339,0.6092,1.856,1.4518,8.2798,7.778,43.55,...,1.656,7.208,103.77,3.8,7.935,8.353,72.95,1.5526,30.8,37.1
3,2000-01-06,1.5291,0.9686,1.9436,0.607,1.84,1.4571,8.2797,7.7785,43.55,...,1.6655,7.2125,105.19,3.8,7.94,8.3675,72.95,1.554,31.75,37.62
4,2000-01-07,1.5272,0.9714,1.938,0.6104,1.831,1.4505,8.2794,7.7783,43.55,...,1.6625,7.2285,105.17,3.8,7.966,8.415,73.15,1.5623,30.85,37.3


In [36]:
df = df[['Time Serie', 'EURO AREA - EURO/US$', 'UNITED KINGDOM - UNITED KINGDOM POUND/US$']]
df.columns = ['day', 'EURO', 'POUND']
df['day'] = pd.to_datetime(df['day'])
df.head()

Unnamed: 0,day,EURO,POUND
0,2000-01-03,0.9847,0.6146
1,2000-01-04,0.97,0.6109
2,2000-01-05,0.9676,0.6092
3,2000-01-06,0.9686,0.607
4,2000-01-07,0.9714,0.6104


In [40]:
work_df = df.set_index('day')
work_df.interpolate(inplace=True)
# pruned_df = pd.to_numeric(pruned_df)
work_df.head()

Unnamed: 0_level_0,EURO,POUND
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2000-01-03,0.9847,0.6146
2000-01-04,0.97,0.6109
2000-01-05,0.9676,0.6092
2000-01-06,0.9686,0.607
2000-01-07,0.9714,0.6104


In [72]:
work_df.sort_values(by='day', ascending=False)[['EURO']].head()

Unnamed: 0_level_0,EURO
day,Unnamed: 1_level_1
2019-12-31,0.8907
2019-12-30,0.8915
2019-12-27,0.8949
2019-12-26,0.9007
2019-12-25,0.90145


In [78]:
pd.date_range('2000-01-03', '2019-12-31')

DatetimeIndex(['2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06',
               '2000-01-07', '2000-01-08', '2000-01-09', '2000-01-10',
               '2000-01-11', '2000-01-12',
               ...
               '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25',
               '2019-12-26', '2019-12-27', '2019-12-28', '2019-12-29',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', length=7303, freq='D')

In [80]:
df.shape

(5217, 3)

In [79]:
work_df.shape

(5217, 2)

In [90]:
def rolling_func(x):
    if len(x) < 3:
        return -1
    return x[-1] < x[0]


work_df['target'] = (
    work_df
    .sort_values(by='day', ascending=False)[['EURO']]
    .rolling('3d', closed='both')
    .agg(rolling_func)
    .sort_values(by='day', ascending=True)
)
work_df.head(n=10)

Unnamed: 0_level_0,EURO,POUND,target
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-03,0.9847,0.6146,0.0
2000-01-04,0.97,0.6109,0.0
2000-01-05,0.9676,0.6092,1.0
2000-01-06,0.9686,0.607,-1.0
2000-01-07,0.9714,0.6104,-1.0
2000-01-10,0.9754,0.6107,0.0
2000-01-11,0.9688,0.6068,1.0
2000-01-12,0.9727,0.6073,1.0
2000-01-13,0.9737,0.6067,-1.0
2000-01-14,0.9874,0.6115,-1.0


Unnamed: 0_level_0,EURO,POUND
day,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-12-31,0.89070,0.75360
2019-12-30,0.89150,0.76100
2019-12-27,0.89490,0.76390
2019-12-26,0.90070,0.76880
2019-12-25,0.90145,0.77035
...,...,...
2000-01-07,0.97140,0.61040
2000-01-06,0.96860,0.60700
2000-01-05,0.96760,0.60920
2000-01-04,0.97000,0.61090
