# Load Data

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

data = pd.read_csv("interviewTrain.csv")
test = pd.read_csv("interviewTestEven.csv")

In [2]:
print(data.head())

   height  weight  handson  rich  experience  y
0      42      19       75    60          13  0
1      93      19       96    47          59  1
2      50      61       75    78          83  1
3      11       9       11    77          94  0
4      21      13       69    34          32  0


In [3]:
X = data[["height", "weight", "handson", "rich", "experience"]]
Y = data["y"]

# Load Model

In [4]:
from sklearn.linear_model import LogisticRegression as LR

model_LR = LR()
model_LR.fit(X, Y)
data["y_preLR"] = model_LR.predict(X)

In [5]:
data

Unnamed: 0,height,weight,handson,rich,experience,y,y_preLR
0,42,19,75,60,13,0,0
1,93,19,96,47,59,1,1
2,50,61,75,78,83,1,1
3,11,9,11,77,94,0,0
4,21,13,69,34,32,0,0
...,...,...,...,...,...,...,...
191,77,38,21,50,24,0,0
192,59,84,79,23,11,1,1
193,26,49,5,85,82,0,0
194,67,61,6,95,61,1,1


# Metrics report

In [6]:
from sklearn.metrics import classification_report

target_names = ["reject", "accept"]
print(classification_report(data["y"], data["y_preLR"], target_names=target_names))

              precision    recall  f1-score   support

      reject       0.86      0.87      0.86        99
      accept       0.86      0.86      0.86        97

    accuracy                           0.86       196
   macro avg       0.86      0.86      0.86       196
weighted avg       0.86      0.86      0.86       196



# Manual Report

In [7]:
h_threshold = 40
w_threshold = 30
h_threshold = 60
r_threshold = 60
e_threshold = 50
num_of_pass = 2

In [8]:
data["h"] = data["height"] > h_threshold
data["w"] = data["weight"] > w_threshold
data["h"] = data["handson"] > h_threshold
data["r"] = data["rich"] > r_threshold
data["e"] = data["experience"] > e_threshold
data["count"] = data[["h", "w", "h", "r", "e"]].sum(axis=1)
data["y_manual"] = (data["count"] > num_of_pass).astype(int)

In [9]:
data

Unnamed: 0,height,weight,handson,rich,experience,y,y_preLR,h,w,r,e,count,y_manual
0,42,19,75,60,13,0,0,True,False,False,False,2,0
1,93,19,96,47,59,1,1,True,False,False,True,3,1
2,50,61,75,78,83,1,1,True,True,True,True,5,1
3,11,9,11,77,94,0,0,False,False,True,True,2,0
4,21,13,69,34,32,0,0,True,False,False,False,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,77,38,21,50,24,0,0,False,True,False,False,1,0
192,59,84,79,23,11,1,1,True,True,False,False,3,1
193,26,49,5,85,82,0,0,False,True,True,True,3,1
194,67,61,6,95,61,1,1,False,True,True,True,3,1


In [10]:
target_names = ["reject", "accept"]
print(classification_report(data["y_manual"], data["y_preLR"], target_names=target_names))

              precision    recall  f1-score   support

      reject       0.83      0.78      0.80       107
      accept       0.75      0.81      0.78        89

    accuracy                           0.79       196
   macro avg       0.79      0.79      0.79       196
weighted avg       0.79      0.79      0.79       196

