# 7. Target prediction

In [1]:
import numpy as np
import pandas as pd
import joblib

### The lists of the features are shown below:

In [2]:
governance = ["ceo_is_female","unequal_voting","ceo_tenure","board_size","classified_board_system","poison_pill","buyback_yield",
              "dividend_payout_ratio","cf_to_total_compensation_to_executives","cf_to_total_compensation_to_board_members"]

operation = ["cf_to_capex_industry_peers_percentile","net_debt_to_ebitda_industry_peers_percentile",
             "current_ratio_industry_peers_percentile","ebitda_margin_industry_peers_percentile",
             "sales_to_total_assets_industry_peers_percentile","employee_growth_rate_industry_peers_percentile",
             "fcf_yield_industry_peers_percentile","sales_growth_rate_industry_peers_percentile",
             "cash_conversion_cycle_industry_peers_percentile","interest_coverage_ratio_industry_peers_percentile"]

ownership = ["free_float_percentage","institution_ownership_percentage","insider_shares_percentage"]

technical= ['rsi_14d','rsi_30d','volatility_30d','volatility_90d','volatility_180d',"volume_30d_average_to_outstanding"]

returns = ['total_return_5y', 'total_return_4y', 'total_return_3y','total_return_2y', 'total_return_1y', 'total_return_6m','total_return_3m']

valuation = ["roe_industry_peers_percentile","operating_roic_industry_peers_percentile","pe_ratio_industry_peers_percentile",
             "eps_industry_peers_percentile","ev_to_sales_industry_peers_percentile","tobin_q_ratio_industry_peers_percentile",
             "pb_ratio_industry_peers_percentile","asset_to_equity_industry_peers_percentile","ev_ebitda_industry_peers_percentile", "ev_to_asset_industry_peers_percentile"]

binary = ["unequal_voting", "classified_board_system","poison_pill"]

features = governance + operation + ownership + technical + returns + valuation

### Load the model

In [3]:
knn_borderlinesmote_LR = joblib.load('../models/KNN/BorderlineSmote/KNN_BorderlineSmote_LogisticRegression.joblib')
median_borderlinesmote_LR = joblib.load('../models/Median/BorderlineSmote/Median_BorderlineSmote_LogisticRegression.joblib')
gain_borderlinesmote_LR = joblib.load('../models/GAIN/BorderlineSmote/GAIN_BorderlineSmote_LogisticRegression.joblib')
miceforest_borderlinesmote_LR = joblib.load('../models/MiceForest/BorderlineSmote/MiceForest_BorderlineSmote_LogisticRegression.joblib')


models = [
    knn_borderlinesmote_LR
#     median_borderlinesmote_LR,
#     gain_borderlinesmote_LR,
#     miceforest_borderlinesmote_LR
]


model_names = [
    "KNN_BorderlineSmote_LR_0.782"
#     "Median_BorderlineSmote_LR_0.770",
#     "GAIN_BorderlineSmote_LR_0.768",
#     "MiceForest_BorderlineSmote_LR_0.752"
]


### Load the data

In [4]:
df = pd.read_csv('../database/companies/imputation/median/median_original.csv')

  df = pd.read_csv('../database/companies/imputation/median/median_original.csv')


### Run the model

In [5]:
# Filter out the data for the year 2023
df_2023 = df[df['year'] == 2023]

# Specify features for the prediction
X = df_2023[features]

# Initialize a DataFrame to store results
df_results = pd.DataFrame({
    'ID': df_2023['ID'],
    'company_name': df_2023['company_name']
})

# Predict and rank for each model
for model, model_name in zip(models, model_names):
    # Directly predict with the model, as scaling is handled internally for logistic regression models within pipelines
    prediction_scores = model.predict_proba(X)[:, 1]

    # Convert scores to ranks
    ranks = pd.Series(prediction_scores).rank(method='min', ascending=False)

    # Add ranks to df_results
    df_results[model_name] = ranks.values

# Compute mean rank
df_results['mean_rank'] = df_results[model_names].mean(axis=1)

# Sort df_results in ascending order of mean_rank
df_results = df_results.sort_values(by='mean_rank', ascending=True)

# Reset index for the sorted DataFrame
df_results.reset_index(drop=True, inplace=True)

# Optionally, if you want to start the ranking from 1 for display purposes
df_results.index = np.arange(1, len(df_results) + 1)


In [6]:
df_results.head(30)

Unnamed: 0,ID,company_name,KNN_BorderlineSmote_LR_0.782,mean_rank
1,AXSM UQ Equity,Axsome Therapeutics Inc,1.0,1.0
2,INGN UW Equity,Inogen Inc,2.0,2.0
3,NUS UN Equity,Nu Skin Enterprises Inc,3.0,3.0
4,BCOV UW Equity,Brightcove Inc,4.0,4.0
5,SILK UW Equity,Silk Road Medical Inc,5.0,5.0
6,KVHI UW Equity,KVH Industries Inc,6.0,6.0
7,OM UW Equity,Outset Medical Inc,7.0,7.0
8,FOSL UW Equity,Fossil Group Inc,8.0,8.0
9,BHR UN Equity,Braemar Hotels & Resorts Inc,9.0,9.0
10,UGI UN Equity,UGI Corp,10.0,10.0


In [7]:
df_results.to_csv('../MinwuKim_2023_Target_Prediction.csv')