In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

In [2]:
df = pd.read_csv("../Train_knight.csv")

for col in df:
	try:
		df[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
	except:
		continue

df['knight'] = [1 if x == 'Jedi' else 0 for x in df['knight']]

In [3]:
msk = np.random.rand(len(df)) <= 0.8
train = df[msk]
test = df[~msk]

X_train = train.loc[:, train.columns != 'knight']
y_train = train.loc[:, 'knight'].values
X_test = test.loc[:, test.columns != 'knight']
y_test = test.loc[:, 'knight'].values

In [4]:
clf1 = LogisticRegression(multi_class='multinomial', random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = KNeighborsClassifier(n_neighbors=11)

eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X_train, y_train)

In [5]:
predictions = eclf1.predict(X_test)

conf_matrix = np.zeros((2, 2))
for a, p in zip(y_test, predictions):
    conf_matrix[a][p] += 1

precision_jedi = (conf_matrix[1][1]) / (conf_matrix[1][1] + conf_matrix[0][1])
precision_sith = (conf_matrix[0][0]) / (conf_matrix[0][0] + conf_matrix[1][0])

recall_jedi = conf_matrix[1][1] / (conf_matrix[1][1] + conf_matrix[1][0])
recall_sith = conf_matrix[0][0] / (conf_matrix[0][0] + conf_matrix[0][1])

f1score_jedi = 2 * (precision_jedi * recall_jedi) / (precision_jedi + recall_jedi)
f1score_sith = 2 * (precision_sith * recall_sith) / (precision_sith + recall_sith)

unique, counts = np.unique(y_test, return_counts=True)

df = pd.DataFrame({
    "precision": [precision_jedi, precision_sith],
    "recall": [recall_jedi, recall_sith],
    "f1-score": [f1score_jedi, f1score_sith],
    "total": [counts[0], counts[1]]
}, index=['Jedi', 'Sith'])

df.loc['accuracy'] = ['', '', df['f1-score'].mean(), df['total'].sum()]
df.round(2)

Unnamed: 0,precision,recall,f1-score,total
Jedi,0.96,0.923077,0.94,49
Sith,0.96,0.979592,0.97,26
accuracy,,,0.96,75
