# Part 1 - Raw Data

1. Preparation
2. Download the necessary data from refinitiv/eikon
3. Create SQLite database and insert the raw data into it

## 1. Preparation

In [1]:
#import of the relevant libraries and connect to the eikon database

import eikon as ek
ek.set_app_key("31414bf6047543af95f34c11c29c6cdb1872fe35") #Key Rafael
#ek.set_app_key("5977dcba0e8742aaa39ed8524b65c51ba9cf90e9") #Key Matthias
#ek.set_app_key("9995d23e109d4cb9bc4cd3b5436667901bb26b35") #Key Ahmed
import pandas as pd
pd.set_option("display.max_rows", None, "display.max_columns", None)
import sqlite3
from sqlite3 import Error
import numpy as np
import scipy
from statsmodels.tsa.stattools import adfuller
from statsmodels.regression.rolling import RollingOLS 
import statsmodels.api as sm
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates 
import itertools as it

#### 1.1 - Generate multiple lists for the data under investigation

In [2]:
#generate a list of the cryptocurrencies
rics_crypto = ['BTC=', #Bitcoin
              'ETH=', #Ethereum
              'XRP=', #Ripple
              'LTC=', #Litecoion
              'BCH='] #Bitcoin Cash

#generate a list of the fiat currencies
rics_currency = ['EUR=', #Euro
                 'GBP=', #Pound Sterling
                 'JPY=', #Japanese Yen
                 'CHF=', #Swiss Francs
                 'CAD='] #Canadian Dollar

#generate a list of the commodities
rics_commodities = ['LCOc1', #Crude Oil (ICE Europe Brent Crude Electronic Energy Future)
                    'WTC-', #Crude Oil (WTI Cushing US FOB)
                    'XAU=', #Gold
                    'XAG=', #Silver
                    '.BCOM', #Bloomberg Commodity Index
                    '.dMIWO0EN00PUS'] #MSCI World Energy Index USD (End of Day)

#generate a list of the stock indices
rics_stockindex = ['.NDX', #Nasdaq 100 Index
                   '.SPX', #S&P 500 Index
                   '.FTSE', #FTSE 100 Index
                   '.CSI300', #China Securities Index 300
                   '.dMIWO00000PUS', #MSCI World Price Index USD (End of Day)
                   '.dMIEF00000PUS', #MSCI Emerging Markets Price Index USD (End of Day)
                   '.dMIEU00000PUS'] #MSCI Europe Price Index USD (End of Day)

#### 1.2 - Generate some further lists which will be helpful later on

In [3]:
#generate a list of all rics
rics = rics_crypto + rics_currency + rics_commodities + rics_stockindex

#generate a list of the dependent variables (y)
rics_dependent_variables = rics_crypto + rics_currency

#generate a list of the independent variables (x)
rics_independent_variables = rics_commodities + rics_stockindex

#generate a list of the desired names of the columns
columns =['Bitcoin',
          'Ethereum',
          'Ripple',
          'Litecoin',
          'Bitcoin Cash',
          'EUR',
          'GBP',
          'JPY',
          'CHF',
          'CAD',
          'Crude Oil ICE',
          'Crude Oil WTI',
          'Gold',
          'Silver',
          'Bloomberg Commodity Index',
          'MSCI World Energy',
          'Nasdaq 100',
          'S&P 500',
          'FTSE 100',
          'CSI 300',
          'MSCI World',
          'MSCI Emerging Markets',
          'MSCI Europe']

#generate a list of the desired names of the columns of the dependent variables (crypto and other currencies)
columns_dependent_variables = ['Bitcoin',
                               'Ethereum',
                               'Ripple',
                               'Litecoin',
                               'Bitcoin Cash',
                               'EUR',
                               'GBP',
                               'JPY',
                               'CHF',
                               'CAD']

#generate a list of the desired names of the columns of the independent variables (commodities & indices)
columns_independent_variables = ['Crude Oil ICE',
                               'Crude Oil WTI',
                               'Gold',
                               'Silver',
                               'Bloomberg Commodity Index',
                               'MSCI World Energy',
                               'Nasdaq 100',
                               'S&P 500',
                               'FTSE 100',
                               'CSI 300',
                               'MSCI World',
                               'MSCI Emerging Markets',
                               'MSCI Europe']

#generate a list of the desired start and end date of the analysis
startdate = '2012-01-04'
enddate = '2022-03-31'

## 2. Download the necessary data from refinitiv/eikon

#### 2.1 - We create loops to download all data for the last 10 years and merge all four dataframes into one

In [4]:
#Loop to download time series for cryptos (the loop helps to deal with the datapoint limit per request)
crypto_histo = pd.DataFrame()

for i in range (0,len(rics_crypto)):
    data_input = ek.get_timeseries(rics_crypto[i],
                                   start_date=startdate,
                                   end_date=enddate,
                                   fields='CLOSE',
                                   interval='daily',
                                   corax = 'adjusted')
    data_input['RIC'] = rics_crypto[i]
    crypto_histo = crypto_histo.append(data_input)

    
#Loop to download time series for fiat currencies
currency_histo = pd.DataFrame()

for i in range (0,len(rics_currency)):
    data_input = ek.get_timeseries(rics_currency[i],
                                   start_date=startdate,
                                   end_date=enddate,
                                   fields='CLOSE',
                                   interval='daily',
                                   corax = 'adjusted')
    data_input['RIC'] = rics_currency[i]
    currency_histo = currency_histo.append(data_input)
    

#Loop to download time series for commodities
commodities_histo = pd.DataFrame()

for i in range (0,len(rics_commodities)):
    data_input = ek.get_timeseries(rics_commodities[i],
                                   start_date=startdate,
                                   end_date=enddate,
                                   fields='CLOSE',
                                   interval='daily',
                                   corax = 'adjusted')
    data_input['RIC'] = rics_commodities[i]
    commodities_histo = commodities_histo.append(data_input)
    

#Loop to download time series for stockindices
stockindex_histo = pd.DataFrame()

for i in range (0,len(rics_stockindex)):
    data_input = ek.get_timeseries(rics_stockindex[i],
                                   start_date=startdate,
                                   end_date=enddate,
                                   fields='CLOSE',
                                   interval='daily',
                                   corax = 'adjusted')
    data_input['RIC'] = rics_stockindex[i]
    stockindex_histo = stockindex_histo.append(data_input)

    
#merge all four time series into one dataframe 
list_data_parameters = [crypto_histo, currency_histo, commodities_histo, stockindex_histo]
all_data_merged = pd.concat(list_data_parameters)
print(all_data_merged.shape)
all_data_merged.head()

(55055, 2)


Unnamed: 0_level_0,CLOSE,RIC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-07-17,623.01,BTC=
2014-07-18,629.7,BTC=
2014-07-19,627.53,BTC=
2014-07-20,624.66,BTC=
2014-07-21,620.0,BTC=



#### 2.2 - Verify the correctness of the data from the API by comparing it with the data which we retrieved via Excel

In [5]:
#convert the dataframe "all_data_merged* into wide format to make it comparable
raw_data_API = all_data_merged.copy()
raw_data_API_index = raw_data_API.index.strftime('%Y-%m-%d')
raw_data_API['Date'] = raw_data_API_index
raw_data_API = raw_data_API.pivot(index='Date',columns='RIC',values='CLOSE')[rics]
raw_data_API.columns = columns
raw_data_API.index = pd.to_datetime(raw_data_API.index)

In [6]:
#load the excel file (with the data from Refinitiv/Eikon) to python and prepare it for the comparison
raw_data_excel = pd.read_excel("Raw_data_excel/raw_data_excel.xlsx")
raw_data_excel = raw_data_excel.iloc[:,1:]
raw_data_excel.dropna(axis=0, how="any", inplace=True)
raw_data_excel.columns = ['RIC','Date','CLOSE']
print(raw_data_excel.isnull().sum())
raw_data_excel = raw_data_excel.pivot_table(index='Date',columns='RIC',values='CLOSE', aggfunc="sum")[rics]
raw_data_excel.columns = columns
raw_data_excel.index = pd.to_datetime(raw_data_excel.index)

RIC      0
Date     0
CLOSE    0
dtype: int64


In [7]:
#compare the tail of the two dataframes to verify the correctness of the dataset
#NaN's/<NA> indicate that both datasets(API and excel) do not show a value (this will be cleaned in a next step)
raw_data_API.tail() == raw_data_excel.tail()

Unnamed: 0_level_0,Bitcoin,Ethereum,Ripple,Litecoin,Bitcoin Cash,EUR,GBP,JPY,CHF,CAD,Crude Oil ICE,Crude Oil WTI,Gold,Silver,Bloomberg Commodity Index,MSCI World Energy,Nasdaq 100,S&P 500,FTSE 100,CSI 300,MSCI World,MSCI Emerging Markets,MSCI Europe
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2022-03-27,True,True,True,True,True,,,,,,,,,,,,,,,,,,
2022-03-28,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2022-03-29,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2022-03-30,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
2022-03-31,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True


## 3. Create SQLite database and insert the raw data into it

In [8]:
#create a SQLite database
conn = sqlite3.connect('AQM_Project_Aziz_Oeggerli_Schmid.db')
c = conn.cursor()
c.execute('''CREATE TABLE raw_data
         (Date TIMESTAMP,
         CLOSE           INT    NOT NULL,
         RIC            INT     NOT NULL)''')

print("Table created successfully")


#create a new column with the date
#this will later be used to re-genereate the dattime index of the dataframe after pulling the data from our SQLite database
index = all_data_merged.index.strftime('%Y-%m-%d')
all_data_merged['Date1'] = index


#transform the dataframe into a list
#this needs to be done so that we can insert the data into the before created SQLite database
all_data_raw_to_insert = all_data_merged.values.tolist()


#upload the data to our SQLite database
c = conn.cursor()
c.executemany("INSERT INTO raw_data(CLOSE, RIC, Date) VALUES (?,?,?)", all_data_raw_to_insert)    
conn.commit()

print("Upload to SQL-database successful")

Table created successfully
Upload to SQL-database successful
