### Binary Classfication model result

In [19]:
# Standard library imports
import os
import random
import pickle
import importlib

from pathlib import Path
# Third-party library imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import openpyxl
import geopandas as gpd

# Scikit-learn imports
from sklearn.model_selection import (
    StratifiedKFold,
    GridSearchCV,
    RandomizedSearchCV,
    KFold,
)
from sklearn.metrics import (
    f1_score,
    precision_score,
    recall_score,
    mean_squared_error,
    mean_absolute_error,
    confusion_matrix,
    make_scorer,
)
from sklearn.feature_selection import (
    RFECV,
    SelectKBest,
    SequentialFeatureSelector,
    RFE,
    mutual_info_regression,
    mutual_info_classif,
    f_regression,
)
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.inspection import permutation_importance

# XGBoost imports
import xgboost as xgb
from xgboost.sklearn import XGBClassifier, XGBRegressor
import seaborn as sns


In [6]:
def unweighted_random(y_train, y_test):
    options = y_train.value_counts(normalize=True)
    y_pred = random.choices(population=list(options.index), k=len(y_test))
    return y_pred

def weighted_random(y_train, y_test):
    options = y_train.value_counts()
    y_pred = random.choices(
        population=list(options.index), weights=list(options.values), k=len(y_test)
    )
    return y_pred

In [9]:



# Get the current working directory
current_dir = Path.cwd()

wor_dir = current_dir / Path('../../')


os.chdir(wor_dir)



In [None]:
file_name = "./models/output/v1/df_predicted_xgb_binary.csv"

df_predicted_xgb_binary=pd.read_csv(file_name)

file_name = "./models/output/v1/df_predicted_rf_binary.csv"

df_predicted_rf_binary=pd.read_csv(file_name)

file_name = "./models/output/v1/df_predicted_random_weighted.csv"

df_predicted_random_weighted=pd.read_csv(file_name)

file_name = "models/output/v1/df_predicted_random.csv"

df_predicted_random=pd.read_csv(file_name)

In [None]:

models = {
    "Random Fores": df_predicted_rf_binary,
    "XGBoost": df_predicted_xgb_binary,
    "Random": df_predicted_random,
    "Weighted Random": df_predicted_random_weighted,
}

f1 = []
precision = []
recall = []

# add 'list' if error
for df_temp in models.values():
    f1.append(f1_score(list(df_temp["actual"]), list(df_temp["predicted"])))
    precision.append(precision_score(list(df_temp["actual"]), list(df_temp["predicted"])))
    recall.append(recall_score(list(df_temp["actual"]), list(df_temp["predicted"])))

df_results_binary = pd.DataFrame(
    {"Models": list(models.keys()), "F1 score": f1, "Recall": recall, "Precision": precision}
)
display(df_results_binary)

In [None]:
models = {
    "Random Fores": df_predicted_rf_binary,
    "XGBoost": df_predicted_xgb_binary,
    "Random": df_predicted_random,
    "Weighted Random": df_predicted_random_weighted,
}

f1 = []
precision = []
recall = []

# add 'list' if error
for df_temp in models.values():
    f1.append(f1_score(list(df_temp["actual"]), list(df_temp["predicted"])))
    precision.append(precision_score(list(df_temp["actual"]), list(df_temp["predicted"])))
    recall.append(recall_score(list(df_temp["actual"]), list(df_temp["predicted"])))

df_results_binary = pd.DataFrame(
    {"Models": list(models.keys()), "F1 score": f1, "Recall": recall, "Precision": precision}
)
display(df_results_binary)

In [None]:
### Results 

models = {
    "Random Fores": df_predicted_rf_binary,
    "XGBoost": df_predicted_xgb_binary,
    "Random": df_predicted_random,
    "Weighted Random": df_predicted_random_weighted,
}

 

f1dic = {}
precisiondic = {}
recalldic={}
 

# add 'list' if error
for keys,df_temp in models.items():
    f1 = {}
    precision = {}
    recall = {}
    evs=[]
    for key,groups in df_temp.groupby(['typhoon']):
        f1[key]=f1_score(list(groups["actual"]),list( groups["predicted"]))
        precision[key]=precision_score(list(groups["actual"]), list(groups["predicted"]))
        recall[key]=recall_score(list(groups["actual"]), list(groups["predicted"]))
        
    f1dic[keys]=f1
    precisiondic[keys]=precision
    recalldic[keys]=recall


#df_results_regr = pd.DataFrame({"Models": list(models.keys()), "MAE": mae, "RMSE": rmse})
#display(df_results_regr)

In [None]:
f1dic 
precisiondic 
recalldic.keys()
['Random Fores', 'XGBoost', 'Random', 'Weighted Random']

In [None]:
list_df=[]

for keys in recalldic.keys():
    df=pd.DataFrame.from_dict(recalldic[keys],orient='index')
    reset_df = df.reset_index()
    reset_df=reset_df.rename(columns={'index':'typhoon',0:'Recall'})
    reset_df['Models']=keys
    list_df.append(reset_df)

df_recall = pd.concat(list_df)

plt.rcParams.update({'font.size': 18})
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(11, 6))
#ax.set_xscale("log")

 
# Plot the orbital period with horizontal boxes
sns.boxplot(y='Models', x='Recall', data=df_recall,
            whis=[0, 99], width=.4, palette="vlag")

# Add in points to show each observation
sns.stripplot(y='Models', x='Recall', data=df_recall,
              size=2, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
#ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
sns.despine(trim=True, left=True)
f.savefig('clas_algorithms_comparsion_recall.jpg', format='jpg', dpi=600)

In [None]:
list_df


In [None]:
list_df=[]
for keys in f1dic.keys():
    df=pd.DataFrame.from_dict(f1dic[keys],orient='index')
    reset_df = df.reset_index()
    reset_df=reset_df.rename(columns={'index':'typhoon',0:'F1 score'})
    reset_df['Models']=keys
    list_df.append(reset_df)

df_F1 = pd.concat(list_df)

plt.rcParams.update({'font.size': 18})
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(11, 6))
#ax.set_xscale("log")

 
# Plot the orbital period with horizontal boxes
sns.boxplot(y='Models', x='F1 score', data=df_F1,
            whis=[0, 99], width=.4, palette="vlag")

# Add in points to show each observation
sns.stripplot(y='Models', x='F1 score', data=df_F1,
              size=2, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
#ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
sns.despine(trim=True, left=True)
f.savefig('clas_algorithms_comparsion_F1.jpg', format='jpg', dpi=600)

In [None]:
list_df=[]
for keys in precisiondic.keys():
    df=pd.DataFrame.from_dict(f1dic[keys],orient='index')
    reset_df = df.reset_index()
    reset_df=reset_df.rename(columns={'index':'typhoon',0:'precision'})
    reset_df['Models']=keys
    list_df.append(reset_df)

df_recall = pd.concat(list_df)

plt.rcParams.update({'font.size': 18})
sns.set_theme(style="ticks")

# Initialize the figure with a logarithmic x axis
f, ax = plt.subplots(figsize=(11, 6))
#ax.set_xscale("log")

 
# Plot the orbital period with horizontal boxes
sns.boxplot(y='Models', x='precision', data=df_recall,
            whis=[0, 99], width=.4, palette="vlag")

# Add in points to show each observation
sns.stripplot(y='Models', x='precision', data=df_recall,
              size=2, color=".3", linewidth=0)

# Tweak the visual presentation
ax.xaxis.grid(True)
ax.set(ylabel="")
#ax.set_xticklabels(ax.get_xticklabels(),rotation = 90)
sns.despine(trim=True, left=True)
f.savefig('clas_algorithms_comparsion_precision.jpg', format='jpg', dpi=600)

In [None]:
df_F1['typhoon']

In [36]:
df_F1['typhoon1'] = df_F1['typhoon'].str.replace(r'[(),]', '', regex=True)

In [None]:
 
plt.rcParams.update({'font.size': 16}) 


g=sns.relplot(
    data=df_F1,#.sort_values(by='F1',ascending=False),
    x="typhoon", y="F1 score", hue="Models", palette="rocket", marker="o", s=100)
g.set_axis_labels("Typhoon Name/Year", "F1 score", labelpad=20)
g.legend.set_title("Model")
g.figure.set_size_inches(16.5, 9.5)
g.set_xticklabels(rotation=90)
g.ax.margins(.05)
g.despine(trim=True) 
g.add_legend(frameon=True)
g.legend.set_bbox_to_anchor((1.05, 1.06))
#sns.move_legend("lower center", bbox_to_anchor=(.5, 1), ncol=3, title=None, frameon=False)
g.savefig('classfication_algorithms_comparsion.jpg', format='jpg', dpi=400)
