In [20]:
# Import the necessary libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', None)  # To show all rows
pd.set_option('display.max_columns', None)  # To show all columns

**GOOGLE**

In [21]:
# Read in the CSV files that contain SMA, EMA, and BB/RSI inidcators based on ratio data
googl_df = pd.read_csv("googl_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)
nvda_df = pd.read_csv("nvda_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)
mmm_df = pd.read_csv("mmm_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)
pg_df = pd.read_csv("pg_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)

In [22]:
# Import necessary libraries for machine learning, scaling, resampling and classification reports
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import SMOTE
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
import pdfkit
import pandas as pd

In [23]:
googl_df.columns

Index(['GOOGL Adj. Close', 'GOOGL P/S (LTM)', 'GOOGL P/FCF (LTM)',
       'GOOGL P/E (LTM)', 'GOOGL Debt/Equity (LTM)', 'actual_returns',
       'ps_sma_fast', 'ps_sma_fast30', 'ps_sma_slow', 'ps_ema_fast',
       ...
       'ps_Portfolio Cumulative Returns_sma30',
       'ps_Portfolio Cumulative Returns_ema30',
       'pfcf_Portfolio Cumulative Returns_sma',
       'pfcf_Portfolio Cumulative Returns_ema',
       'pfcf_Portfolio Cumulative Returns_sma30',
       'pfcf_Portfolio Cumulative Returns_ema30',
       'pe_Portfolio Cumulative Returns_sma',
       'pe_Portfolio Cumulative Returns_ema',
       'pe_Portfolio Cumulative Returns_sma30',
       'pe_Portfolio Cumulative Returns_ema30'],
      dtype='object', length=132)

In [24]:
# Define features (X)
features = ['GOOGL P/S (LTM)', 'GOOGL P/FCF (LTM)', 'GOOGL P/E (LTM)']


In [25]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma30', 'ps_Entry/Exit_ema30', 'pfcf_Entry/Exit_sma30',
               'pfcf_Entry/Exit_ema30', 'pe_Entry/Exit_sma30', 'pe_Entry/Exit_ema30']



In [26]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create empty DataFrames to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = googl_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply Random Oversampling to address class imbalance
    oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
    X_resampled, y_resampled = oversampler.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Compute and store classification report for testing data
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Compute and store classification report for training data
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_GOOGLE_30.csv', index=True)
reports_df_train.to_csv('training_classification_reports_GOOGLE_30.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma30' ---


--- Classification reports for 'ps_Entry/Exit_ema30' ---


--- Classification reports for 'pfcf_Entry/Exit_sma30' ---


--- Classification reports for 'pfcf_Entry/Exit_ema30' ---


--- Classification reports for 'pe_Entry/Exit_sma30' ---


--- Classification reports for 'pe_Entry/Exit_ema30' ---



**NVDA**

In [27]:
# Define features (X) 
features = ['NVDA P/S (LTM)', 'NVDA P/FCF (LTM)', 'NVDA P/E (LTM)']

In [28]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']

In [29]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create empty DataFrames to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = nvda_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply Random Oversampling to address class imbalance
    oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
    X_resampled, y_resampled = oversampler.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Compute and store classification report for testing data
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Compute and store classification report for training data
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_NVDA_30.csv', index=True)
reports_df_train.to_csv('training_classification_reports_NVDA_30.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Classification reports for 'ps_Entry/Exit_ema' ---


--- Classification reports for 'pfcf_Entry/Exit_sma' ---


--- Classification reports for 'pfcf_Entry/Exit_ema' ---


--- Classification reports for 'pe_Entry/Exit_sma' ---


--- Classification reports for 'pe_Entry/Exit_ema' ---



**MMM**

In [30]:
# Define features (X) 
features = ['MMM P/S (LTM)', 'MMM P/FCF (LTM)', 'MMM P/E (LTM)']

In [31]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']

In [32]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create empty DataFrames to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = mmm_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply Random Oversampling to address class imbalance
    oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
    X_resampled, y_resampled = oversampler.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Compute and store classification report for testing data
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Compute and store classification report for training data
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_MMM_30.csv', index=True)
reports_df_train.to_csv('training_classification_reports_MMM_30.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Classification reports for 'ps_Entry/Exit_ema' ---


--- Classification reports for 'pfcf_Entry/Exit_sma' ---


--- Classification reports for 'pfcf_Entry/Exit_ema' ---


--- Classification reports for 'pe_Entry/Exit_sma' ---


--- Classification reports for 'pe_Entry/Exit_ema' ---



**PG**

In [33]:
# Define features (X) 
features = ['PG P/S (LTM)', 'PG P/FCF (LTM)', 'PG P/E (LTM)']

In [34]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']

In [35]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create empty DataFrames to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = pg_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply Random Oversampling to address class imbalance
    oversampler = RandomOverSampler(sampling_strategy='auto', random_state=42)
    X_resampled, y_resampled = oversampler.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Compute and store classification report for testing data
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Compute and store classification report for training data
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_PG_30.csv', index=True)
reports_df_train.to_csv('training_classification_reports_PG_30.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Classification reports for 'ps_Entry/Exit_ema' ---


--- Classification reports for 'pfcf_Entry/Exit_sma' ---


--- Classification reports for 'pfcf_Entry/Exit_ema' ---


--- Classification reports for 'pe_Entry/Exit_sma' ---


--- Classification reports for 'pe_Entry/Exit_ema' ---

