In [1]:
import pandas as pd

# Convert csv into dataframe
file_path = r"c:\Users\rober\Documents\Python\rg_project\data\processed\market_data_filled_clean.csv"
df_market_filled_clean = pd.read_csv(file_path, index_col='Date', parse_dates=True)

print(df_market_filled_clean.head())

                   BTC        DXY         Gold        SP500  Yield_10Y  \
Date                                                                     
2016-01-01  434.334015  98.870003  1075.099976  2012.660034      2.245   
2016-01-02  433.437988  98.870003  1075.099976  2012.660034      2.245   
2016-01-03  430.010986  98.870003  1075.099976  2012.660034      2.245   
2016-01-04  433.091003  98.870003  1075.099976  2012.660034      2.245   
2016-01-05  431.959991  99.400002  1078.400024  2016.709961      2.248   

                  VIX  
Date                   
2016-01-01  20.700001  
2016-01-02  20.700001  
2016-01-03  20.700001  
2016-01-04  20.700001  
2016-01-05  19.340000  


In [3]:
# Creating columns for price normalization
# Using Base-100: (Current Price / Starting Price) x 100
# VIX remains as an absolute level

for col in ['BTC', 'DXY', 'Gold', 'SP500', 'Yield_10Y']:
    first_price = df_market_filled_clean[col].iloc[0]
    df_market_filled_clean[f'{col}_norm'] = (df_market_filled_clean[col] / first_price) * 100

# Checking results
print(df_market_filled_clean.head())

                   BTC        DXY         Gold        SP500  Yield_10Y  \
Date                                                                     
2016-01-01  434.334015  98.870003  1075.099976  2012.660034      2.245   
2016-01-02  433.437988  98.870003  1075.099976  2012.660034      2.245   
2016-01-03  430.010986  98.870003  1075.099976  2012.660034      2.245   
2016-01-04  433.091003  98.870003  1075.099976  2012.660034      2.245   
2016-01-05  431.959991  99.400002  1078.400024  2016.709961      2.248   

                  VIX    BTC_norm    DXY_norm   Gold_norm  SP500_norm  \
Date                                                                    
2016-01-01  20.700001  100.000000  100.000000  100.000000  100.000000   
2016-01-02  20.700001   99.793701  100.000000  100.000000  100.000000   
2016-01-03  20.700001   99.004676  100.000000  100.000000  100.000000   
2016-01-04  20.700001   99.713812  100.000000  100.000000  100.000000   
2016-01-05  19.340000   99.453411  100.5360

In [5]:
# Creating new % change columns for correlation
# Yield_10Y is a % so using first differencing

# 'BTC', 'DXY', 'Gold', 'SP500' % change columns
for col in ['BTC', 'DXY', 'Gold', 'SP500']:
    df_market_filled_clean[f'{col}_pct_chg'] = df_market_filled_clean[col].pct_change()

# Yield % change column
df_market_filled_clean['Yield_10Y_pct_chg'] = df_market_filled_clean['Yield_10Y'].diff()

# Clean up the first row
df_market_filled_clean.fillna(0, inplace=True)

# Checking results
print(df_market_filled_clean.head())

                   BTC        DXY         Gold        SP500  Yield_10Y  \
Date                                                                     
2016-01-01  434.334015  98.870003  1075.099976  2012.660034      2.245   
2016-01-02  433.437988  98.870003  1075.099976  2012.660034      2.245   
2016-01-03  430.010986  98.870003  1075.099976  2012.660034      2.245   
2016-01-04  433.091003  98.870003  1075.099976  2012.660034      2.245   
2016-01-05  431.959991  99.400002  1078.400024  2016.709961      2.248   

                  VIX    BTC_norm    DXY_norm   Gold_norm  SP500_norm  \
Date                                                                    
2016-01-01  20.700001  100.000000  100.000000  100.000000  100.000000   
2016-01-02  20.700001   99.793701  100.000000  100.000000  100.000000   
2016-01-03  20.700001   99.004676  100.000000  100.000000  100.000000   
2016-01-04  20.700001   99.713812  100.000000  100.000000  100.000000   
2016-01-05  19.340000   99.453411  100.5360

In [None]:
# Saving new market_data_filled_final.csv
df_market_filled_clean.to_csv("market_data_filled_final.csv")