In [10]:
# Import necessary libraries
import pandas as pd
import Common_Functions as cmn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [11]:
# Load the dataset conating YoY changes in the variables
df_change = pd.read_csv(cmn.PATH_df_no_NAN_99p_YoY).drop(columns="Unnamed: 0")

In [12]:
# Function to binary classify rising and falling metrics
def prediction_int(df, feature):
    pred = ((df[feature] > 0).astype(int))
    return pred.astype(int)

In [13]:
# Selecting all YoY-change columns
feature_cols = df_change.filter(regex='Return|YoY').columns.tolist()

# Creating a copy of the imported dataframe, to store the binary values
df_change_binary = df_change.copy()

# Calling the function to perform binary classification on YoY columns
for feature in feature_cols:
    df_change_binary[f"{feature} Classified"] = prediction_int(df_change, feature)

In [18]:
df_change_binary.columns.tolist()

['ISIN',
 'Year',
 'Total Return',
 'ESG Score',
 'ESG Combined Score',
 'ESG Controversies Score',
 'Social Pillar Score',
 'Governance Pillar Score',
 'Environmental Pillar Score',
 'Revenue - Mean',
 'Earnings Per Share - Mean',
 'Return On Equity - Mean',
 'Volume',
 'Company Market Cap',
 'EBITDA - Mean',
 'Country of Headquarters',
 'Region of Headquarters',
 'TRBC Industry Name',
 'TRBC Industry Group Name',
 'TRBC Business Sector Name',
 'TRBC Economic Sector Name',
 'ESG Score YoY change',
 'ESG Combined Score YoY change',
 'ESG Controversies Score YoY change',
 'Social Pillar Score YoY change',
 'Governance Pillar Score YoY change',
 'Revenue - Mean YoY change',
 'Earnings Per Share - Mean YoY change',
 'Company Market Cap YoY change',
 'EBITDA - Mean YoY change',
 'Total Return Classified',
 'Return On Equity - Mean Classified',
 'ESG Score YoY change Classified',
 'ESG Combined Score YoY change Classified',
 'ESG Controversies Score YoY change Classified',
 'Social Pillar S

In [14]:
# Defining the features X
ESG_List = ['ESG Score',
            'ESG Combined Score',
            'ESG Controversies Score',
            'Social Pillar Score',
            'Governance Pillar Score',
            'ESG Score YoY change',
            'ESG Combined Score YoY change',
            'ESG Controversies Score YoY change',
            'Social Pillar Score YoY change',
            'Governance Pillar Score YoY change']

# Defining the targets y
Financials_List = [ 'Total Return Classified', 
                    'Return On Equity - Mean Classified',                    
                    'Revenue - Mean YoY change Classified',
                    'Earnings Per Share - Mean YoY change Classified',
                    'Company Market Cap YoY change Classified',
                    'EBITDA - Mean YoY change Classified']

In [21]:
# Function to run RandomForest on the new binary dataframe
def CustomClassifier(X, y_list):
   all_results = {}
   for y in y_list:
      result = {}      
      # Splitting training and testing data and introducing the RandomForest Classifier
      X_train, X_test, y_train, y_test = train_test_split(X, y_list[y], test_size = 0.20)
      rfc = RandomForestClassifier(n_estimators=100)

      # fit random forest classifier to training data
      rfc.fit(X_train, y_train)

      # make predictions on test data
      y_pred = rfc.predict(X_test)

      # calculate different metrics to assess the quality of the predictions
      result['Precision'] = precision_score(y_pred=y_pred, y_true=y_test)
      result['Recall'] = recall_score(y_pred=y_pred, y_true=y_test)
      result['F1'] = f1_score(y_pred=y_pred, y_true=y_test)
      result['Accuracy'] = accuracy_score(y_test, y_pred)
      result['Confusion'] = confusion_matrix(y_pred=y_pred, y_true=y_test)
      all_results[y] = result

   return all_results

In [24]:
X = df_change_binary[ESG_List]
y_list = df_change_binary[Financials_List]

results = CustomClassifier(X, y_list)

for y in y_list:
    print(f'{y}')
    print('    Precision    : {0:1.5f}'.format(results[y]['Precision']))
    print('    Recall       : {0:1.5f}'.format(results[y]['Recall']))
    print('    F1           : {0:1.5f}'.format(results[y]['F1']))
    print('    Accuracy     : {0:1.5f}'.format(results[y]['Accuracy']))
    print('    Confusion    :')
    print(f"                  {results[y]['Confusion']}")

Total Return Classified
    Precision    : 0.63
    Recall       : 0.87
    F1           : 0.73
    Accuracy     : 0.60
    Confusion    :
                  [[ 431 2677]
 [ 665 4513]]
Return On Equity - Mean Classified
    Precision    : 0.93
    Recall       : 1.00
    F1           : 0.96
    Accuracy     : 0.93
    Confusion    :
                  [[   3  580]
 [   3 7700]]
Revenue - Mean YoY change Classified
    Precision    : 0.71
    Recall       : 0.96
    F1           : 0.82
    Accuracy     : 0.69
    Confusion    :
                  [[ 117 2337]
 [ 205 5627]]
Earnings Per Share - Mean YoY change Classified
    Precision    : 0.60
    Recall       : 0.84
    F1           : 0.70
    Accuracy     : 0.57
    Confusion    :
                  [[ 510 2787]
 [ 779 4210]]
Company Market Cap YoY change Classified
    Precision    : 0.60
    Recall       : 0.81
    F1           : 0.69
    Accuracy     : 0.56
    Confusion    :
                  [[ 693 2668]
 [ 955 3970]]
EBITDA - Mean Y

In [None]:
Financials_List_To_Test = [ #'Total Return Classified', 
                    'Return On Equity - Mean Classified']                    
                    #'Revenue - Mean YoY change Classified',
                    #'Earnings Per Share - Mean YoY change Classified',
                    #'Company Market Cap YoY change Classified',
                    #'EBITDA - Mean YoY change Classified']
X = df_change_binary[ESG_List]
y_list = df_change_binary[Financials_List_To_Test]

results = CustomClassifier(X, y_list)

for y in y_list:
    print(f'{y}')
    print('    Precision    : {0:1.5f}'.format(results[y]['Precision']))
    print('    Recall       : {0:1.5f}'.format(results[y]['Recall']))
    print('    F1           : {0:1.5f}'.format(results[y]['F1']))
    print('    Accuracy     : {0:1.5f}'.format(results[y]['Accuracy']))
    print('    Confusion    :')
    print(f"                  {results[y]['Confusion']}")