In [3]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve

import warnings
warnings.filterwarnings('ignore')

import pickle
import json

In [4]:
df=pd.read_csv("Job_Placement_Data.csv")
df

Unnamed: 0,gender,ssc_percentage,ssc_board,hsc_percentage,hsc_board,hsc_subject,degree_percentage,undergrad_degree,work_experience,emp_test_percentage,specialisation,mba_percent,status
0,M,67.00,Others,91.00,Others,Commerce,58.00,Sci&Tech,No,55.0,Mkt&HR,58.80,Placed
1,M,79.33,Central,78.33,Others,Science,77.48,Sci&Tech,Yes,86.5,Mkt&Fin,66.28,Placed
2,M,65.00,Central,68.00,Central,Arts,64.00,Comm&Mgmt,No,75.0,Mkt&Fin,57.80,Placed
3,M,56.00,Central,52.00,Central,Science,52.00,Sci&Tech,No,66.0,Mkt&HR,59.43,Not Placed
4,M,85.80,Central,73.60,Central,Commerce,73.30,Comm&Mgmt,No,96.8,Mkt&Fin,55.50,Placed
...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,M,80.60,Others,82.00,Others,Commerce,77.60,Comm&Mgmt,No,91.0,Mkt&Fin,74.49,Placed
211,M,58.00,Others,60.00,Others,Science,72.00,Sci&Tech,No,74.0,Mkt&Fin,53.62,Placed
212,M,67.00,Others,67.00,Others,Commerce,73.00,Comm&Mgmt,Yes,59.0,Mkt&Fin,69.72,Placed
213,F,74.00,Others,66.00,Others,Commerce,58.00,Comm&Mgmt,No,70.0,Mkt&HR,60.23,Placed


In [5]:
df['gender'].replace({'M':0,'F':1},inplace=True)

In [6]:
df['ssc_board'].replace({'Central':0,'Others':1},inplace=True)

In [7]:
df['hsc_board'].replace({'Central':0,'Others':1},inplace=True)

In [8]:
df['hsc_subject'].replace({'Science':0,'Commerce':1,'Arts':2},inplace=True)

In [9]:
df['undergrad_degree'].replace({'Comm&Mgmt':0,'Sci&Tech':1,'Others':2},inplace=True)

In [10]:
df['work_experience'].replace({'No':0,'Yes':1},inplace=True)

In [11]:
df['specialisation'].replace({'Mkt&Fin':0,'Mkt&HR':1},inplace=True)

### Train_Test_split

In [12]:
x=df.drop('status',axis=1)
y=df['status']

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=2)

In [13]:
log_clf = LogisticRegression()
log_clf.fit(x_train, y_train)

### Model Evaluation

In [14]:
def evaluate_model(model, ind_var, act):
    pred = model.predict(ind_var)
    
    acc_score = accuracy_score(act,pred)
    print('Accuracy Score:', acc_score)
    print('**'*20)
    
    cnf_matrix = confusion_matrix(act, pred)
    print('Confision Matrix: \n', cnf_matrix)
    print('**'*20)
    
    clf_report = classification_report(act, pred)
    print('Classification Report :\n', clf_report)
    
    return pred
    
print('Testing Data Evaluation'.center(60, '*'))   
y_pred = evaluate_model(log_clf, x_test, y_test)

******************Testing Data Evaluation*******************
Accuracy Score: 0.813953488372093
****************************************
Confision Matrix: 
 [[ 7  6]
 [ 2 28]]
****************************************
Classification Report :
               precision    recall  f1-score   support

  Not Placed       0.78      0.54      0.64        13
      Placed       0.82      0.93      0.87        30

    accuracy                           0.81        43
   macro avg       0.80      0.74      0.76        43
weighted avg       0.81      0.81      0.80        43



In [15]:
print('Training Data Evaluation'.center(60, '*'))   
evaluate_model(log_clf, x_train, y_train)

******************Training Data Evaluation******************
Accuracy Score: 0.8953488372093024
****************************************
Confision Matrix: 
 [[ 44  10]
 [  8 110]]
****************************************
Classification Report :
               precision    recall  f1-score   support

  Not Placed       0.85      0.81      0.83        54
      Placed       0.92      0.93      0.92       118

    accuracy                           0.90       172
   macro avg       0.88      0.87      0.88       172
weighted avg       0.89      0.90      0.89       172



array(['Placed', 'Not Placed', 'Not Placed', 'Placed', 'Not Placed',
       'Placed', 'Placed', 'Placed', 'Placed', 'Placed', 'Placed',
       'Placed', 'Placed', 'Placed', 'Placed', 'Placed', 'Not Placed',
       'Not Placed', 'Placed', 'Not Placed', 'Placed', 'Placed',
       'Not Placed', 'Placed', 'Placed', 'Placed', 'Placed', 'Not Placed',
       'Placed', 'Not Placed', 'Placed', 'Placed', 'Not Placed', 'Placed',
       'Placed', 'Placed', 'Placed', 'Placed', 'Not Placed', 'Placed',
       'Placed', 'Placed', 'Not Placed', 'Placed', 'Placed', 'Not Placed',
       'Placed', 'Placed', 'Placed', 'Placed', 'Placed', 'Placed',
       'Placed', 'Placed', 'Not Placed', 'Not Placed', 'Placed',
       'Not Placed', 'Placed', 'Not Placed', 'Not Placed', 'Placed',
       'Placed', 'Not Placed', 'Placed', 'Placed', 'Not Placed',
       'Not Placed', 'Placed', 'Not Placed', 'Placed', 'Not Placed',
       'Not Placed', 'Placed', 'Placed', 'Placed', 'Not Placed', 'Placed',
       'Placed', 'Plac

In [None]:
['gender', 'ssc_percentage', 'ssc_board', 'hsc_percentage', 'hsc_board','hsc_subject', 'degree_percentage', 'undergrad_degree',
       'work_experience', 'emp_test_percentage', 'specialisation',
       'mba_percent'],

In [16]:
test_array = np.array([0,78,1,70,1,1,72,1,1,60,1,60], ndmin = 2)
test_array

array([[ 0, 78,  1, 70,  1,  1, 72,  1,  1, 60,  1, 60]])

In [23]:
result=log_clf .predict(test_array)
print(result)

['Placed']


In [18]:
project_data = {'gender': {'M':0, 'F':1},
               'ssc_board': {'Central':0, 'Others':1},
                'hsc_subject':{'Science':0,'Commerce':1,'Arts':2},
                'hsc_board': {'Central':0, 'Others':1},
                'undergrad_degree':{'Comm&Mgmt':0,'Sci&Tech':1,'Others':2},
                'work_experience':{'No':0,'Yes':1},
                'specialisation':{'Mkt&Fin':0,'Mkt&HR':1},
               'columns': list(x.columns)}
project_data

{'gender': {'M': 0, 'F': 1},
 'ssc_board': {'Central': 0, 'Others': 1},
 'hsc_subject': {'Science': 0, 'Commerce': 1, 'Arts': 2},
 'hsc_board': {'Central': 0, 'Others': 1},
 'undergrad_degree': {'Comm&Mgmt': 0, 'Sci&Tech': 1, 'Others': 2},
 'work_experience': {'No': 0, 'Yes': 1},
 'specialisation': {'Mkt&Fin': 0, 'Mkt&HR': 1},
 'columns': ['gender',
  'ssc_percentage',
  'ssc_board',
  'hsc_percentage',
  'hsc_board',
  'hsc_subject',
  'degree_percentage',
  'undergrad_degree',
  'work_experience',
  'emp_test_percentage',
  'specialisation',
  'mba_percent']}

In [20]:
with open('Logistic_Model.pkl', 'wb') as f:
    pickle.dump(log_clf, f)

In [21]:
with open('project_data.json', 'w') as f:
    json.dump(project_data, f)

In [22]:
x.columns

Index(['gender', 'ssc_percentage', 'ssc_board', 'hsc_percentage', 'hsc_board',
       'hsc_subject', 'degree_percentage', 'undergrad_degree',
       'work_experience', 'emp_test_percentage', 'specialisation',
       'mba_percent'],
      dtype='object')