In [2]:
import pandas as pd

# Convert csv into dataframe
file_path = r"c:\Users\rober\Documents\Python\rg_project\data\processed\market_data_synced_clean.csv"
df_market_synced_clean = pd.read_csv(file_path, index_col='Date', parse_dates=True)

print(df_market_synced_clean.head())

                   BTC        DXY         Gold        SP500  Yield_10Y  \
Date                                                                     
2016-01-04  433.091003  98.870003  1075.099976  2012.660034      2.245   
2016-01-05  431.959991  99.400002  1078.400024  2016.709961      2.248   
2016-01-06  429.105011  99.180000  1091.900024  1990.260010      2.177   
2016-01-07  458.048004  98.220001  1107.699951  1943.089966      2.153   
2016-01-08  453.230011  98.540001  1097.800049  1922.030029      2.130   

                  VIX  
Date                   
2016-01-04  20.700001  
2016-01-05  19.340000  
2016-01-06  20.590000  
2016-01-07  24.990000  
2016-01-08  27.010000  


In [3]:
# Creating columns for price normalization
# Using Base-100: (Current Price / Starting Price) x 100
# VIX remains as an absolute level

for col in ['BTC', 'DXY', 'Gold', 'SP500', 'Yield_10Y']:
    first_price = df_market_synced_clean[col].iloc[0]
    df_market_synced_clean[f'{col}_norm'] = (df_market_synced_clean[col] / first_price) * 100

# Checking results
print(df_market_synced_clean.head())

                   BTC        DXY         Gold        SP500  Yield_10Y  \
Date                                                                     
2016-01-04  433.091003  98.870003  1075.099976  2012.660034      2.245   
2016-01-05  431.959991  99.400002  1078.400024  2016.709961      2.248   
2016-01-06  429.105011  99.180000  1091.900024  1990.260010      2.177   
2016-01-07  458.048004  98.220001  1107.699951  1943.089966      2.153   
2016-01-08  453.230011  98.540001  1097.800049  1922.030029      2.130   

                  VIX    BTC_norm    DXY_norm   Gold_norm  SP500_norm  \
Date                                                                    
2016-01-04  20.700001  100.000000  100.000000  100.000000  100.000000   
2016-01-05  19.340000   99.738851  100.536056  100.306953  100.201223   
2016-01-06  20.590000   99.079641  100.313541  101.562650   98.887044   
2016-01-07  24.990000  105.762530   99.342570  103.032274   96.543377   
2016-01-08  27.010000  104.650064   99.6662

In [4]:
# Creating new % change columns for correlation
# Yield_10Y is a % so using first differencing

# 'BTC', 'DXY', 'Gold', 'SP500' % change columns
for col in ['BTC', 'DXY', 'Gold', 'SP500']:
    df_market_synced_clean[f'{col}_pct_chg'] = df_market_synced_clean[col].pct_change()

# Yield % change column
df_market_synced_clean['Yield_10Y_pct_chg'] = df_market_synced_clean['Yield_10Y'].diff()

# Clean up the first row
df_market_synced_clean.fillna(0, inplace=True)

# Checking results
print(df_market_synced_clean.head())

                   BTC        DXY         Gold        SP500  Yield_10Y  \
Date                                                                     
2016-01-04  433.091003  98.870003  1075.099976  2012.660034      2.245   
2016-01-05  431.959991  99.400002  1078.400024  2016.709961      2.248   
2016-01-06  429.105011  99.180000  1091.900024  1990.260010      2.177   
2016-01-07  458.048004  98.220001  1107.699951  1943.089966      2.153   
2016-01-08  453.230011  98.540001  1097.800049  1922.030029      2.130   

                  VIX    BTC_norm    DXY_norm   Gold_norm  SP500_norm  \
Date                                                                    
2016-01-04  20.700001  100.000000  100.000000  100.000000  100.000000   
2016-01-05  19.340000   99.738851  100.536056  100.306953  100.201223   
2016-01-06  20.590000   99.079641  100.313541  101.562650   98.887044   
2016-01-07  24.990000  105.762530   99.342570  103.032274   96.543377   
2016-01-08  27.010000  104.650064   99.6662

In [5]:
# Saving new market_data_sycned_final.csv
df_market_synced_clean.to_csv("market_data_sycned_final.csv")