In [5]:
# Import necessary libraries
import pandas as pd
import Common_Functions as cmn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [6]:
# Load the dataset conating YoY changes in the variables
df_change = pd.read_csv(cmn.PATH_df_no_NAN_99p_YoY).drop(columns="Unnamed: 0")

In [7]:
# Function to binary classify rising and falling metrics
def prediction_int(df, feature):
    pred = ((df[feature] > 0).astype(int))
    return pred.astype(int)

In [8]:
# Selecting all YoY-change columns
feature_cols = df_change.filter(regex='Return|YoY').columns.tolist()

# Creating a copy of the imported dataframe, to store the binary values
df_change_binary = df_change.copy()

# Calling the function to perform binary classification on YoY columns
for feature in feature_cols:
    df_change_binary[f"{feature} Classified"] = prediction_int(df_change, feature)

In [9]:
# Defining the features X
ESG_List = ['ESG Score',
            'ESG Combined Score',
            'ESG Controversies Score',
            'Social Pillar Score',
            'Governance Pillar Score',
            'ESG Score YoY change',
            'ESG Combined Score YoY change',
            'ESG Controversies Score YoY change',
            'Social Pillar Score YoY change',
            'Governance Pillar Score YoY change']

# Defining the targets y
Financials_List = [ 'Total Return Classified', 
                    'Return On Equity - Mean Classified',                    
                    'Revenue - Mean YoY change Classified',
                    'Earnings Per Share - Mean YoY change Classified',
                    'Company Market Cap YoY change Classified',
                    'EBITDA - Mean YoY change Classified']

In [10]:
# Function to run RandomForest on the new binary dataframe
def CustomClassifier(X, y_list):
   result = {}
   for y in y_list:
      
      # Splitting training and testing data and introducing the RandomForest Classifier
      X_train, X_test, y_train, y_test = train_test_split(X, y_list[y], test_size = 0.20, random_state = 101)
      rfc = RandomForestClassifier(n_estimators=100, random_state=42)

      # fit random forest classifier to training data
      rfc.fit(X_train, y_train)

      # make predictions on test data
      y_pred = rfc.predict(X_test)

      # calculate different metrics to assess the quality of the predictions
      result[y] = accuracy_score(y_test, y_pred)

   return result

In [11]:
X = df_change_binary[ESG_List]
y_list = df_change_binary[Financials_List]

results = CustomClassifier(X, y_list)
print(results)

{'Total Return Classified': 0.5966690803765388, 'Return On Equity - Mean Classified': 0.929640357229061, 'Revenue - Mean YoY change Classified': 0.6932174752594739, 'Earnings Per Share - Mean YoY change Classified': 0.5696355298093169, 'Company Market Cap YoY change Classified': 0.5627564566739078, 'EBITDA - Mean YoY change Classified': 0.6466328747284577}
