In [None]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

import warnings
warnings.filterwarnings(action = 'ignore')

In [None]:
data = pd.read_csv('../input/factors-affecting-campus-placement/Placement_Data_Full_Class.csv')

In [None]:
data

In [None]:
data.isna().sum()

In [None]:
data[data['status'] == 'Placed'].isna().sum()

In [None]:
data['salary'].fillna(0, inplace = True)

In [None]:
data.drop('sl_no', axis = 1, inplace = True)

In [None]:
data.isna().sum()
data

In [None]:
data['ssc_b'].value_counts()

In [None]:
data['hsc_b'].value_counts()

In [None]:
data['hsc_s'].value_counts()

In [None]:
data['degree_t'].value_counts()

In [None]:
data['specialisation'].value_counts()

In [None]:
def preprocess_inputs(df):
    df = df.copy()
    
    df = df.replace({'M' : 1, 'F': 0})
    df = df.replace({'Placed' : 1, 'Not Placed' : 0})
    df['workex'] = df['workex'].replace({'Yes' : 1, 'No' : 0})
    df['ssc_b'] =  df['ssc_b'].replace({'Central' : 1, 'Others' : 0})
    df['hsc_b'] =  df['ssc_b'].replace({'Central' : 1, 'Others' : 0})
    df['specialisation'] =  df['specialisation'].replace({'Mkt&Fin' : 1, 'Mkt&HR' : 0})
    
    return df

In [None]:
data1 = preprocess_inputs(data)
data1

In [None]:
onehot = ['degree_t', 'hsc_s']

In [None]:
def onehot_encode(df, columns):
    for column in columns:
        dummies = pd.get_dummies(df[column])
        df = pd.concat([df, dummies], axis = 1)
        df.drop(column, axis = 1, inplace = True)
    
    return df

In [None]:
data2 = onehot_encode(data1, onehot)
data2

In [None]:
figure = plt.figure(figsize=(10,10))
corr = data2.corr()

sns.heatmap(corr, cmap = 'Blues', fmt = '.2g', annot = True, cbar=True)

In [None]:
X = data2.drop('status', axis = 1)
Y = data2['status']

In [None]:
X

In [None]:
Y

In [None]:
scaler = StandardScaler()
scaler.fit(X)
X = pd.DataFrame(scaler.transform(X), columns = X.columns)  

In [None]:
#removing salary as it is directly proportional to geetting Placed
X.drop('salary', axis = 1, inplace = True)
X

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.8, random_state = 1)

In [None]:
print(X.shape, X_train.shape,X_test.shape)

In [None]:
models = {
    'Logistic Regression' : LogisticRegression(),
    'Decision Tree' : DecisionTreeClassifier(),
    'Random Forest' : RandomForestClassifier(),
    'Neural Network' : MLPClassifier()
}

for name, model in models.items():
    model.fit(X_train, Y_train)
    print(name + ' trained!')

In [None]:
for name, model in models.items():
    score = model.score(X_test, Y_test)
    print(name + ' Accuracy is: {:.2f}%'.format(score * 100))
    
    