In [1]:
# Import the necessary libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', None)  # To show all rows
pd.set_option('display.max_columns', None)  # To show all columns

**GOOGLE**

In [2]:
# Read in the CSV files that contain SMA, EMA, and BB/RSI inidcators based on ratio data
googl_df = pd.read_csv("Resources/googl_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)
nvda_df = pd.read_csv("Resources/nvda_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)
mmm_df = pd.read_csv("Resources/mmm_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)
pg_df = pd.read_csv("Resources/pg_signals.csv", infer_datetime_format=True, index_col="Date", parse_dates=True)

In [33]:
# Import necessary libraries for machine learning, scaling, resampling and classification reports 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from imblearn.combine import SMOTEENN
import pandas as pd

In [34]:
# Define features (X) 
features = ['GOOGL P/S (LTM)', 'GOOGL P/FCF (LTM)', 'GOOGL P/E (LTM)']


In [35]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']



In [36]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create an empty DataFrame to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = googl_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply SMOTEENN to address class imbalance
    smoteenn = SMOTEENN(random_state=42)
    X_resampled, y_resampled = smoteenn.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
    
    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Testing Data)")

        # Print classification report for testing data
        print(classification_report(y_test, y_pred_test))
        
        # Store classification report for testing data in the DataFrame
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Training Data)")

        # Print classification report for training data
        print(classification_report(y_train, y_pred_train))

        # Store classification report for training data in the DataFrame
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_GOOGLE_10.csv', index=True)
reports_df_train.to_csv('training_classification_reports_GOOGLE_10.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Naive Bayes --- (Testing Data)
              precision    recall  f1-score   support

        -1.0       0.34      0.10      0.15       242
         0.0       0.52      0.48      0.50       221
         1.0       0.39      0.75      0.51       205

    accuracy                           0.42       668
   macro avg       0.42      0.44      0.39       668
weighted avg       0.41      0.42      0.37       668


--- Naive Bayes --- (Training Data)
              precision    recall  f1-score   support

        -1.0       0.33      0.10      0.16       863
         0.0       0.46      0.44      0.45       887
         1.0       0.41      0.70      0.52       921

    accuracy                           0.42      2671
   macro avg       0.40      0.41      0.37      2671
weighted avg       0.40      0.42      0.38      2671


--- Random Forest Classifier --- (Testing Data)
              precision    recall  f1-score   support

    

**NVDA**

In [37]:
# Define features (X)
features = ['NVDA P/S (LTM)', 'NVDA P/FCF (LTM)', 'NVDA P/E (LTM)']

In [38]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']

In [39]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create an empty DataFrame to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = nvda_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply SMOTEENN to address class imbalance
    smoteenn = SMOTEENN(random_state=42)
    X_resampled, y_resampled = smoteenn.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
    
    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Testing Data)")

        # Print classification report for testing data
        print(classification_report(y_test, y_pred_test))
        
        # Store classification report for testing data in the DataFrame
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Training Data)")

        # Print classification report for training data
        print(classification_report(y_train, y_pred_train))

        # Store classification report for training data in the DataFrame
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_NVDA_10.csv', index=True)
reports_df_train.to_csv('training_classification_reports_NVDA_10.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Naive Bayes --- (Testing Data)
              precision    recall  f1-score   support

        -1.0       0.49      0.84      0.62       238
         0.0       0.52      0.39      0.45       201
         1.0       0.11      0.04      0.06       193

    accuracy                           0.45       632
   macro avg       0.37      0.42      0.37       632
weighted avg       0.38      0.45      0.39       632


--- Naive Bayes --- (Training Data)
              precision    recall  f1-score   support

        -1.0       0.44      0.83      0.57       851
         0.0       0.52      0.38      0.44       852
         1.0       0.16      0.06      0.09       824

    accuracy                           0.43      2527
   macro avg       0.37      0.42      0.37      2527
weighted avg       0.38      0.43      0.37      2527


--- Random Forest Classifier --- (Testing Data)
              precision    recall  f1-score   support

    

**MMM**

In [40]:
# Define features (X) 
features = ['MMM P/S (LTM)', 'MMM P/FCF (LTM)', 'MMM P/E (LTM)']

In [41]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']

In [42]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create an empty DataFrame to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = mmm_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply SMOTEENN to address class imbalance
    smoteenn = SMOTEENN(random_state=42)
    X_resampled, y_resampled = smoteenn.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
    
    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Testing Data)")

        # Print classification report for testing data
        print(classification_report(y_test, y_pred_test))
        
        # Store classification report for testing data in the DataFrame
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Training Data)")

        # Print classification report for training data
        print(classification_report(y_train, y_pred_train))

        # Store classification report for training data in the DataFrame
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_MMM_10.csv', index=True)
reports_df_train.to_csv('training_classification_reports_MMM_10.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Naive Bayes --- (Testing Data)
              precision    recall  f1-score   support

        -1.0       0.59      0.32      0.42       196
         0.0       0.56      0.39      0.46       183
         1.0       0.49      0.86      0.62       191

    accuracy                           0.52       570
   macro avg       0.55      0.52      0.50       570
weighted avg       0.55      0.52      0.50       570


--- Naive Bayes --- (Training Data)
              precision    recall  f1-score   support

        -1.0       0.60      0.36      0.45       743
         0.0       0.62      0.44      0.52       749
         1.0       0.51      0.85      0.64       786

    accuracy                           0.55      2278
   macro avg       0.58      0.55      0.53      2278
weighted avg       0.58      0.55      0.54      2278


--- Random Forest Classifier --- (Testing Data)
              precision    recall  f1-score   support

    

**PG**

In [43]:
# Define features (X) 
features = ['PG P/S (LTM)', 'PG P/FCF (LTM)', 'PG P/E (LTM)']

In [44]:
# Define y variables
y_variables = ['ps_Entry/Exit_sma', 'ps_Entry/Exit_ema', 'pfcf_Entry/Exit_sma',
               'pfcf_Entry/Exit_ema', 'pe_Entry/Exit_sma', 'pe_Entry/Exit_ema']

In [45]:
# Define models
logistic_regression = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
models = {'Naive Bayes': GaussianNB(),
          'Random Forest Classifier': RandomForestClassifier(random_state=42),
          'Logistic Regression': logistic_regression}

# Create an empty DataFrame to store the classification reports
reports_df_test = pd.DataFrame()
reports_df_train = pd.DataFrame()

# Loop through each y variable
for y_var in y_variables:
    print(f"\n--- Classification reports for '{y_var}' ---\n")
    
    # Drop NaN values from X and y for the current y variable
    data_cleaned = pg_df.dropna(subset=features + [y_var])
    X = data_cleaned[features]
    y = data_cleaned[y_var]

    # Apply SMOTEENN to address class imbalance
    smoteenn = SMOTEENN(random_state=42)
    X_resampled, y_resampled = smoteenn.fit_resample(X, y)

    # Split the resampled data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
    
    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Loop through each model
    for model_name, model in models.items():
        # Train the model on the training data
        model.fit(X_train_scaled, y_train)

        # Make predictions on the testing data
        y_pred_test = model.predict(X_test_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Testing Data)")

        # Print classification report for testing data
        print(classification_report(y_test, y_pred_test))
        
        # Store classification report for testing data in the DataFrame
        report_test = classification_report(y_test, y_pred_test, output_dict=True)
        report_df_test = pd.DataFrame(report_test).transpose()
        report_df_test['y_variable'] = y_var
        report_df_test['model'] = model_name
        reports_df_test = pd.concat([reports_df_test, report_df_test])

        # Make predictions on the training data
        y_pred_train = model.predict(X_train_scaled)

        # Print label for the model
        print(f"\n--- {model_name} --- (Training Data)")

        # Print classification report for training data
        print(classification_report(y_train, y_pred_train))

        # Store classification report for training data in the DataFrame
        report_train = classification_report(y_train, y_pred_train, output_dict=True)
        report_df_train = pd.DataFrame(report_train).transpose()
        report_df_train['y_variable'] = y_var
        report_df_train['model'] = model_name
        reports_df_train = pd.concat([reports_df_train, report_df_train])

# Export the DataFrames containing classification reports to separate CSV files
reports_df_test.to_csv('testing_classification_reports_PG_10.csv', index=True)
reports_df_train.to_csv('training_classification_reports_PG_10.csv', index=True)


--- Classification reports for 'ps_Entry/Exit_sma' ---


--- Naive Bayes --- (Testing Data)
              precision    recall  f1-score   support

        -1.0       0.50      0.63      0.56       174
         0.0       0.71      0.47      0.57       189
         1.0       0.38      0.42      0.40       163

    accuracy                           0.51       526
   macro avg       0.53      0.51      0.51       526
weighted avg       0.54      0.51      0.51       526


--- Naive Bayes --- (Training Data)
              precision    recall  f1-score   support

        -1.0       0.51      0.65      0.57       671
         0.0       0.75      0.50      0.60       768
         1.0       0.45      0.50      0.48       663

    accuracy                           0.55      2102
   macro avg       0.57      0.55      0.55      2102
weighted avg       0.58      0.55      0.55      2102


--- Random Forest Classifier --- (Testing Data)
              precision    recall  f1-score   support

    