In [1]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import BDay
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', None)

path2 = # FSI_S_P_500_Segments_Data

dates_series = pd.date_range('2005-01-01', '2022-05-28', freq=BDay()).to_series()   # PLACEHOLDER FOR ALL DATES
dates_frame = dates_series.to_frame(name="Date")


##################################    LOAD DATA

features = ['Real_Estate_XLRE', 'Comm_XLC', 'Cons_Stap_XLP', 'Cons_Discr_XLY', 'Energy_XLE', 'Financials_XLF', 'Healthcare_XLV', \
            'Industrials_XLI', 'IT_XLK', 'Utilities_XLU', 'Materials_XLB'] 

segment_list =[dates_frame]
for idx,val in enumerate(features):
    segment = pd.read_csv(path2 + val + '.csv').dropna()
    segment = segment[['Date','Price']].rename(columns={"Price": val})
    segment['Date'] = pd.to_datetime(segment['Date'])
    segment = segment.sort_values(by=['Date'], ascending=False)
    segment = segment.set_index('Date')
    segment_list.append(segment)
    

df = pd.concat(segment_list, axis=1, join="outer")
df = df.dropna(subset=['Date'])
df = df.reset_index(drop=True)

###############   Simulate the data point for the 2 short tame series
XLRE_mean = df['Real_Estate_XLRE'][2810:3010].interpolate(method='linear', limit_direction='both').mean()
XLRE_std = df['Real_Estate_XLRE'][2810:3010].interpolate(method='linear', limit_direction='both').std()
df['Real_Estate_XLRE'][0:2810] = df['Real_Estate_XLRE'][0:2810].apply(lambda x: np.random.normal(XLRE_mean, XLRE_std))
################
XLC_mean = df['Comm_XLC'][3520:3720].interpolate(method='linear', limit_direction='both').mean()
XLC_std = df['Comm_XLC'][3520:3720].interpolate(method='linear', limit_direction='both').std()
df['Comm_XLC'][0:3520] = df['Comm_XLC'][0:3520].apply(lambda x: np.random.normal(XLC_mean, XLC_std))

############### Interpolate empty data
df = df.interpolate(method='linear', limit_direction='both')
df = df.reset_index(drop=True)


##################################################################### STANDARDIZE DATA
df_stand = df.copy()

features = [ 'Real_Estate_XLRE', 'Comm_XLC','Cons_Stap_XLP', 'Cons_Discr_XLY', 'Energy_XLE', 'Financials_XLF', 'Healthcare_XLV', \
            'Industrials_XLI', 'IT_XLK', 'Utilities_XLU', 'Materials_XLB'] 


for col in features:
    col_lst=[]
    print(col)
    for i in range(0,len(df)):
            st_val = (df[col][i] - df[col].mean())/df[col].std()
            col_lst.append(st_val)
        
    df_stand[col] = col_lst

df_stand = df_stand.reset_index(drop=True)

##################################################################### STANDARDIZE DATA IN A ROLLING WINDOW OF 250 DAYS


lag = 250
df_stand_roll = df[lag:].copy()


for col in features:
    col_lst=[]
    print(col)
    for i in range(0,len(df)):
        j=i+lag
        if j<len(df):
            #print(j)
            st_val = (df[col][j] - df[col][i:j].mean())/df[col][i:j].std()
            col_lst.append(st_val)
        
    df_stand_roll[col] = col_lst

df_stand_roll = df_stand_roll.reset_index(drop=True)


################################################################ CHANGE SIGN FOR ALL INDICATORS
df_stand_sign = df_stand_roll.copy()

for i in features:
    df_stand_sign[i] = -df_stand_sign[i]

################################################################ RUN PCA

lag2=10

dfa = df_stand_sign.copy()
weights =[]
pdf =[]
for i in range(0,len(dfa)):
    j=i+lag2
    if j<= len(dfa):
    
    
        x = dfa.loc[0:j, features].values

        #x = StandardScaler().fit_transform(x)       
        pca = PCA(n_components=1)
        principalComponents = pca.fit_transform(x)

        pdf.append(principalComponents[0])
        weights.append(pca.components_[0].tolist())
        #print(i,j,pca.components_[0])
    
PCA_DF = pd.DataFrame(weights, columns = features)



PCA_DF['total_square'] = PCA_DF.Real_Estate_XLRE**2  + PCA_DF.Comm_XLC**2 +\
PCA_DF.Cons_Stap_XLP**2 + PCA_DF.Cons_Discr_XLY**2 + PCA_DF.Energy_XLE**2 + \
PCA_DF.Financials_XLF**2 + PCA_DF.Healthcare_XLV**2 + PCA_DF.Industrials_XLI**2 + PCA_DF.IT_XLK**2  + \
PCA_DF.Utilities_XLU**2 +  PCA_DF.Materials_XLB**2 

PCA_DF['total'] =  PCA_DF.Real_Estate_XLRE  + PCA_DF.Comm_XLC + PCA_DF.Cons_Stap_XLP + \
PCA_DF.Cons_Discr_XLY + PCA_DF.Energy_XLE + \
PCA_DF.Financials_XLF + PCA_DF.Healthcare_XLV+ PCA_DF.Industrials_XLI + PCA_DF.IT_XLK  + \
PCA_DF.Utilities_XLU +  PCA_DF.Materials_XLB 

PCA_DF['Date'] = dfa['Date'][(lag2-1):].reset_index(drop=True)


##################### Multiply Weights with Standardize data ( Get PCAs)

df_stand_short = df_stand_sign[(len(df_stand_sign) - len(PCA_DF)):].reset_index(drop=True)
df_mult = df_stand_short[features] * PCA_DF[features]
df_mult['Date']=PCA_DF['Date']
df_mult['total'] = df_mult[df_mult.columns[:-1]].sum(axis=1)

###################


%matplotlib qt
dfg = df_mult.copy()
plt.figure(figsize=(20,10))
plt.stackplot(dfg.index, 
              [dfg['Comm_XLC'], dfg['Real_Estate_XLRE'], 
               dfg['Cons_Stap_XLP'], \
               dfg['Cons_Discr_XLY'], dfg['Energy_XLE'], \
               dfg['Financials_XLF'], dfg['Healthcare_XLV'],\
               dfg['Industrials_XLI'], dfg['IT_XLK'], \
               dfg['Materials_XLB'],dfg['Utilities_XLU'] ], 
              labels=features,
              alpha=0.8)


w = list(range(0, len(dfg), 90))
labels = list(dfg['Date'].dt.strftime('%Y-%m-%d').iloc[w])
plt.xticks(ticks=w, labels = labels, rotation=90,fontsize=12 )
plt.title ('S&P 500 Segments - Financial Crisis 2008/2009', fontsize=14 )
plt.yticks(fontsize=12 )
plt.ylabel('FSI', fontsize=12)
plt.legend(fontsize=10, loc=1)
plt.show()


######################## AREA PLOTS


%matplotlib qt
dfg = df_mult.copy()

plt.figure(figsize=(20,10))

x1 = list(range(0,len(dfg.Date)))

Comm_XLC = dfg.Comm_XLC.to_list()
Cons_Stap_XLP = dfg.Cons_Stap_XLP.to_list()

Cons_Discr_XLY = dfg.Cons_Discr_XLY.to_list()
Energy_XLE = dfg.Energy_XLE.to_list()

Financials_XLF = dfg.Financials_XLF.to_list()
Healthcare_XLV = dfg.Healthcare_XLV.to_list()

Industrials_XLI = dfg.Industrials_XLI.to_list()
IT_XLK = dfg.IT_XLK.to_list()

Real_Estate_XLRE = dfg.Real_Estate_XLRE.to_list()
Utilities_XLU = dfg.Utilities_XLU.to_list()
Materials_XLB = dfg.Materials_XLB.to_list()

################### LINE PLOTS

#plt.plot(x1, Comm_XLC, label = "Comm_XLC")
plt.plot(x1, Cons_Stap_XLP, label = "Cons_Stap_XLP", linewidth =5)

plt.plot(x1, Cons_Discr_XLY, label = "Cons_Discr_XLY", linewidth =5)
plt.plot(x1, Energy_XLE, label = "Energy_XLE", linewidth =5)

plt.plot(x1, Financials_XLF, label = "Financials_XLF", linewidth =5)
plt.plot(x1, Healthcare_XLV, label = "Healthcare_XLV", linewidth =5)

plt.plot(x1, Industrials_XLI, label = "Industrials_XLI", linewidth =5)
plt.plot(x1, IT_XLK, label = "IT_XLK", linewidth =5)

#plt.plot(x1, Real_Estate_XLRE, label = "Real_Estate_XLRE")

plt.plot(x1, Utilities_XLU, label = "Utilities_XLU", linewidth =5)
plt.plot(x1, Materials_XLB, label = "Materials_XLB", linewidth =5)



w = list(range(0, len(dfg), 10))
labels = list(dfg['Date'].dt.strftime('%Y-%m-%d').iloc[w])
plt.xticks(ticks=w, labels = labels, rotation=90 , fontsize=14 )
plt.title ('S&P 500 Segments - Covid Pandemic', fontsize=20 )
plt.yticks(fontsize=14 )
plt.ylabel('FSI', fontsize=14)

plt.legend(fontsize=14, loc=1)
plt.show()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Real_Estate_XLRE
Comm_XLC
Cons_Stap_XLP
Cons_Discr_XLY
Energy_XLE
Financials_XLF
Healthcare_XLV
Industrials_XLI
IT_XLK
Utilities_XLU
Materials_XLB
Real_Estate_XLRE
Comm_XLC
Cons_Stap_XLP
Cons_Discr_XLY
Energy_XLE
Financials_XLF
Healthcare_XLV
Industrials_XLI
IT_XLK
Utilities_XLU
Materials_XLB
