## **This code aims to generate PCA dataframes**

Remember that **PCA dataframes already contain the return column**

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from factor_analyzer import Rotator

In [2]:
project_dir = "/home/jupyter-tfg2425paula/prediction_project_v3"
os.chdir(project_dir)

clean_data_dir = os.path.join(project_dir, "00_data/clean")
pca_data_dir = os.path.join(project_dir, "00_data/pca")

In [3]:
types_securities = ["options", "technical"]
stocks = ['AAPL', 'MSFT', 'AMZN', 'NVDA', 'SPX']
years = ["15y", "10y", "5y", "2y"]

In [4]:
def generate_rotated_pca_df(features_df, target_df):
    
    pca = PCA(n_components=0.90)
    pca_data = pca.fit_transform(features_df)
    num_components = pca.n_components_
    explained_variance = pca.explained_variance_ratio_

    print(f'Number of selected components: {num_components}')
    print(f'Explained variance by component: {explained_variance}')

    original_features = features_df.columns
    pca_components = pd.DataFrame(
        pca.components_, columns=original_features, index=[f'PC{i+1}' for i in range(num_components)])

    pca_loadings = pca_components.values

    # Rotation
    rotator = Rotator(method='oblimin')
    rotated_loadings = rotator.fit_transform(pca_loadings)
    rotated_pca_components = pd.DataFrame(
        rotated_loadings,
        columns=pca_components.columns,
        index=pca_components.index
    )

    rotated_pca_data = np.dot(features_df, rotated_loadings.T)

    rotated_pca_features_df = pd.DataFrame(
        rotated_pca_data,
        columns=[f'Rotated_PC{i+1}' for i in range(rotated_pca_data.shape[1])]
    )

    rotated_final_df = pd.concat([rotated_pca_features_df, target_df.reset_index(drop=True)], axis=1)
    
    return rotated_final_df

def scale_data(df):
    """
    Scales specified columns in a DataFrame using the specified scaling method.
    
    Args:
        df (pd.DataFrame): The input DataFrame.
        selected_cols (list): A list of column names to scale.
        scaling_method (str): The scaling method to use ("standard" or "minmax"). Default is "standard".
    
    Returns:
        pd.DataFrame: The DataFrame with specified columns scaled.
    """
    selected_scale_cols = [col for col in df.columns if col != "Target"]
    
    scaler = StandardScaler()
    df[selected_scale_cols] = df[selected_scale_cols].apply(pd.to_numeric, errors="coerce")
    df_scaled = df.copy()
    df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
    df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
    df_scaled[selected_scale_cols] = scaler.fit_transform(df[selected_scale_cols])
        
    return df_scaled


In [5]:
for security_type in types_securities:
    for stock in stocks:
        for period in years:
            clean_filepath = f"{security_type}/{stock}/{period}_data.csv"
            intial_df = pd.read_csv(os.path.join(clean_data_dir, clean_filepath))

            print(clean_filepath)
            scaled_df = scale_data(intial_df)
            target_df = scaled_df['Target']
            features_df = scaled_df.drop(columns = ["Date", "Target"])
            pca_df = generate_rotated_pca_df(features_df, target_df)
            
            output_folder = os.path.join(pca_data_dir, f"{security_type}/{stock}")
            os.makedirs(output_folder, exist_ok=True)
            
            pca_df.to_csv(os.path.join(pca_data_dir, clean_filepath), index=False)


options/AAPL/15y_data.csv
Number of selected components: 6
Explained variance by component: [0.31242667 0.232207   0.13005256 0.11050965 0.09119593 0.05821267]
options/AAPL/10y_data.csv
Number of selected components: 6
Explained variance by component: [0.31112435 0.25478101 0.11878127 0.0977161  0.0890532  0.0570205 ]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

options/AAPL/5y_data.csv
Number of selected components: 6
Explained variance by component: [0.33518157 0.21447856 0.12933665 0.10219766 0.09113943 0.0563259 ]
options/AAPL/2y_data.csv
Number of selected components: 5
Explained variance by component: [0.38886097 0.18543289 0.13256468 0.12164312 0.08707735]
options/MSFT/15y_data.csv
Number of selected components: 5
Explained variance by component: [0.41526508 0.20370144 0.11840808 0.08886373 0.07949863]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

options/MSFT/10y_data.csv
Number of selected components: 5
Explained variance by component: [0.50382051 0.1618703  0.10403398 0.08734723 0.07897683]
options/MSFT/5y_data.csv
Number of selected components: 5
Explained variance by component: [0.37680799 0.1919581  0.15024945 0.12340558 0.08547733]
options/MSFT/2y_data.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

Number of selected components: 5
Explained variance by component: [0.33721687 0.19297451 0.18921912 0.12932264 0.08211326]
options/AMZN/15y_data.csv
Number of selected components: 5
Explained variance by component: [0.3595095  0.24474122 0.16205949 0.09009303 0.0453326 ]
options/AMZN/10y_data.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

Number of selected components: 6
Explained variance by component: [0.39990523 0.27222971 0.09748031 0.08619385 0.04211764 0.03877468]
options/AMZN/5y_data.csv
Number of selected components: 6
Explained variance by component: [0.39524667 0.28441947 0.09142384 0.07452278 0.04498086 0.03810488]
options/AMZN/2y_data.csv
Number of selected components: 5
Explained variance by component: [0.40451734 0.20554515 0.12015095 0.10013037 0.07249021]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

options/NVDA/15y_data.csv
Number of selected components: 4
Explained variance by component: [0.44778663 0.2371563  0.14673567 0.0887768 ]
options/NVDA/10y_data.csv
Number of selected components: 4
Explained variance by component: [0.45649455 0.24683882 0.12955228 0.0870808 ]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

options/NVDA/5y_data.csv
Number of selected components: 4
Explained variance by component: [0.40387889 0.2519453  0.16030076 0.08786868]
options/NVDA/2y_data.csv
Number of selected components: 4
Explained variance by component: [0.48180068 0.21324914 0.13463066 0.0873475 ]
options/SPX/15y_data.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

Number of selected components: 5
Explained variance by component: [0.40887753 0.24733901 0.109139   0.09924223 0.08951225]
options/SPX/10y_data.csv
Number of selected components: 4
Explained variance by component: [0.46102815 0.23529644 0.12170139 0.09002143]
options/SPX/5y_data.csv


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=T

Number of selected components: 5
Explained variance by component: [0.44698313 0.2529506  0.094028   0.08966524 0.06803213]
options/SPX/2y_data.csv
Number of selected components: 5
Explained variance by component: [0.40892945 0.21347435 0.14253888 0.10309772 0.0874945 ]
technical/AAPL/15y_data.csv
Number of selected components: 14
Explained variance by component: [0.34552015 0.26553605 0.08536069 0.04483062 0.03873971 0.02240427
 0.02077963 0.0155846  0.01290356 0.01127917 0.01052375 0.0099992
 0.00961535 0.00922641]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AAPL/10y_data.csv
Number of selected components: 13
Explained variance by component: [0.35377256 0.27094494 0.08882837 0.04303461 0.03923784 0.02309838
 0.01756611 0.01554381 0.01158891 0.01093511 0.01045194 0.00964965
 0.00934086]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AAPL/5y_data.csv
Number of selected components: 12
Explained variance by component: [0.35941526 0.27249076 0.09443148 0.04500407 0.04035892 0.0220564
 0.01643639 0.01336057 0.01095616 0.01059593 0.01016389 0.0094767 ]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AAPL/2y_data.csv
Number of selected components: 11
Explained variance by component: [0.3466668  0.2769457  0.0909589  0.04402454 0.04239471 0.0292693
 0.02287633 0.01646711 0.01197115 0.01102541 0.01002685]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/MSFT/15y_data.csv
Number of selected components: 14
Explained variance by component: [0.35663533 0.2526166  0.08884874 0.04299596 0.04015931 0.02337229
 0.02189681 0.01441268 0.01286374 0.01075717 0.0100564  0.00985731
 0.00906736 0.00877339]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/MSFT/10y_data.csv
Number of selected components: 13
Explained variance by component: [0.35415644 0.25941691 0.09405063 0.04540922 0.04064525 0.02266814
 0.01742986 0.01564672 0.01135969 0.01089807 0.01033728 0.01000615
 0.00907379]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/MSFT/5y_data.csv
Number of selected components: 12
Explained variance by component: [0.34807836 0.27864824 0.10207507 0.0445652  0.0402575  0.02085079
 0.01661531 0.01238857 0.01136975 0.0101054  0.00983869 0.00973307]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/MSFT/2y_data.csv
Number of selected components: 11
Explained variance by component: [0.36172625 0.26317562 0.1043589  0.04820074 0.04269826 0.02293741
 0.01594246 0.01325949 0.0112068  0.01069528 0.00982528]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AMZN/15y_data.csv
Number of selected components: 14
Explained variance by component: [0.35167478 0.25271217 0.08937092 0.04436927 0.04022761 0.02436133
 0.02335539 0.01571469 0.01224232 0.01130451 0.01061055 0.00960617
 0.00949251 0.00885674]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AMZN/10y_data.csv
Number of selected components: 13
Explained variance by component: [0.35869228 0.2523537  0.09241678 0.04729129 0.04089817 0.02505703
 0.01812899 0.01590493 0.01211237 0.0113155  0.01057191 0.0096254
 0.00890155]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AMZN/5y_data.csv
Number of selected components: 13
Explained variance by component: [0.33156826 0.27475554 0.09783    0.04730326 0.042406   0.0251374
 0.01792957 0.01390174 0.01216974 0.01119869 0.01088998 0.01002322
 0.00977373]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/AMZN/2y_data.csv
Number of selected components: 12
Explained variance by component: [0.36035746 0.24974106 0.10043372 0.05042527 0.04231582 0.02573482
 0.01652224 0.01441271 0.01358876 0.01091904 0.01018435 0.00993239]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/NVDA/15y_data.csv
Number of selected components: 14
Explained variance by component: [0.36316227 0.23306906 0.07662401 0.04940831 0.03746351 0.0323068
 0.02324306 0.01782349 0.01390368 0.01211692 0.01122985 0.01058017
 0.00992455 0.00948756]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/NVDA/10y_data.csv
Number of selected components: 14
Explained variance by component: [0.35633912 0.24427358 0.08069147 0.0496026  0.03815364 0.02867367
 0.02357447 0.01729037 0.013176   0.01128833 0.01102225 0.01050389
 0.00969194 0.00936963]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/NVDA/5y_data.csv
Number of selected components: 13
Explained variance by component: [0.36187955 0.25447787 0.08554767 0.05250271 0.03858315 0.02438954
 0.01963276 0.01391534 0.01177727 0.01105571 0.01062487 0.00965337
 0.00910409]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/NVDA/2y_data.csv
Number of selected components: 11
Explained variance by component: [0.3598135  0.26434543 0.09878091 0.0476084  0.04166764 0.0234203
 0.01717962 0.01611039 0.01270529 0.01211693 0.01044617]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/SPX/15y_data.csv
Number of selected components: 24
Explained variance by component: [0.28312099 0.17180064 0.06732297 0.05901701 0.04384314 0.03197546
 0.0308134  0.02391456 0.02051036 0.01855089 0.01648289 0.01530589
 0.01357366 0.01252324 0.01207964 0.01141543 0.01055633 0.01038711
 0.00972809 0.00919773 0.00863867 0.00822349 0.0076945  0.00735969]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/SPX/10y_data.csv
Number of selected components: 24
Explained variance by component: [0.28312099 0.17180064 0.06732297 0.05901701 0.04384314 0.03197546
 0.0308134  0.02391456 0.02051036 0.01855089 0.01648289 0.01530589
 0.01357366 0.01252324 0.01207964 0.01141543 0.01055633 0.01038711
 0.00972809 0.00919773 0.00863867 0.00822349 0.0076945  0.00735969]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/SPX/5y_data.csv
Number of selected components: 22
Explained variance by component: [0.28119194 0.19030746 0.08275816 0.06277883 0.03675481 0.03196126
 0.03005609 0.02102131 0.01979548 0.01706286 0.01453111 0.01375637
 0.01245787 0.01176595 0.01130722 0.01079985 0.01000923 0.00983648
 0.00904339 0.00889447 0.0082289  0.00785315]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


technical/SPX/2y_data.csv
Number of selected components: 21
Explained variance by component: [0.28603993 0.15856845 0.08960154 0.0800908  0.04868594 0.03080102
 0.02692267 0.02318216 0.01921273 0.01735148 0.01605376 0.01526851
 0.0135191  0.01296614 0.01121076 0.01073362 0.0103494  0.00931483
 0.00911052 0.0084809  0.00823079]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].replace([np.inf, -np.inf], np.nan, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[selected_scale_cols].fillna(df[selected_scale_cols].mean(), inplace=True)
  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [9]:
period

'2y'