**Data Retrival**

In [20]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [21]:
dataFrame = pd.read_csv("/content/student_records.csv")

In [22]:
dataFrame.head()

Unnamed: 0,Name,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,Henry,A,Y,90,85,Yes
1,John,C,N,85,51,Yes
2,David,F,N,10,17,No
3,Holmes,B,Y,75,71,No
4,Marvin,E,N,20,30,No


***Data  Preparation***

Feature extraction and engineering


In [23]:
#features and their outcomes
feature_names = ['OverallGrade', 'Obedient','ResearchScore','ProjectScore']
training_features = dataFrame[feature_names]

outcome_name = ['Recommend']
outcome_labels = dataFrame[outcome_name]

In [24]:
#view the features
training_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,90,85
1,C,N,85,51
2,F,N,10,17
3,B,Y,75,71
4,E,N,20,30
5,A,Y,92,79
6,B,Y,60,59
7,C,Y,75,33


In [25]:
#outcome labes
outcome_labels

Unnamed: 0,Recommend
0,Yes
1,Yes
2,No
3,No
4,No
5,Yes
6,No
7,No


In [26]:
#list down values based on type
numeric_feature_names = ['ResearchScore','ProjectScore']
categorical_feature_names = ['OverallGrade','Obedient']

Numeric scalling

In [27]:
from sklearn.preprocessing import StandardScaler
standardscaler = StandardScaler()

#fit scaler on numeric features
standardscaler.fit(training_features[numeric_feature_names])

#scaler feature names 
training_features[numeric_feature_names] = standardscaler.transform(training_features[numeric_feature_names])
training_features

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,A,Y,0.899583,1.37665
1,C,N,0.730648,-0.091777
2,F,N,-1.80339,-1.560203
3,B,Y,0.392776,0.772004
4,E,N,-1.465519,-0.998746
5,A,Y,0.967158,1.117516
6,B,Y,-0.114032,0.253735
7,C,Y,0.392776,-0.869179


Engenering categorical values

In [28]:
training_features = pd.get_dummies(training_features, columns=categorical_feature_names)
#view new generated values
training_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_B,OverallGrade_C,OverallGrade_E,OverallGrade_F,Obedient_N,Obedient_Y
0,0.899583,1.37665,1,0,0,0,0,0,1
1,0.730648,-0.091777,0,0,1,0,0,1,0
2,-1.80339,-1.560203,0,0,0,0,1,1,0
3,0.392776,0.772004,0,1,0,0,0,0,1
4,-1.465519,-0.998746,0,0,0,1,0,1,0
5,0.967158,1.117516,1,0,0,0,0,0,1
6,-0.114032,0.253735,0,1,0,0,0,0,1
7,0.392776,-0.869179,0,0,1,0,0,0,1


In [29]:
#get list of new categorical features
categorical_engineered_features = list(set(training_features.columns) - set(numeric_feature_names))
# categorical_engineered_features

**Modelling**

In [30]:
from sklearn.linear_model import LogisticRegression
import numpy as np

lr = LogisticRegression()
#fit model
model = lr.fit(training_features,np.array(outcome_labels['Recommend']))

#view paratemers
model


**Model evaluation**

In [31]:
#simple evaluation on the tradining data
predicted_labels = model.predict(training_features)
actual_labels = np.array(outcome_labels["Recommend"])



In [32]:
#evaluate the model performance
from sklearn.metrics import accuracy_score,classification_report

print("Accuarcy of the model : ",float(accuracy_score(actual_labels,predicted_labels))*100 ,'%')

print("classification stastics")
print(classification_report(actual_labels,predicted_labels))




Accuarcy of the model :  100.0 %
classification stastics
              precision    recall  f1-score   support

          No       1.00      1.00      1.00         5
         Yes       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



**Model deployement**

In [33]:
# import sklearn.external.joblib as extjoblib
import joblib
# from sklearn.externals import joblib
import os
# save models to be deployed on your server
if not os.path.exists('Model'):
    os.mkdir('Model')
if not os.path.exists('Scaler'):
    os.mkdir('Scaler') 
    
joblib.dump(model, r'Model/model.pickle') 
joblib.dump(standardscaler, r'Scaler/scaler.pickle') 

['Scaler/scaler.pickle']

**Prediction and action**

In [34]:
model = joblib.load(r'Model/model.pickle')
scaler = joblib.load(r'Scaler/scaler.pickle')

In [35]:
new_data = pd.DataFrame([{'Name': 'Nathan','OverallGrade': 'F','Obedient':'N','ResearchScore' : 30, 'ProjectScore' : 20},
                         {'Name':'Thomas','OverallGrade':'A','Obedient':'Y', 'ResearchScore': 78,'ProjectScore': 80}])

new_data = new_data[['OverallGrade', 'Obedient','ResearchScore','ProjectScore']]
new_data

Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore
0,F,N,30,20
1,A,Y,78,80


In [36]:
#data preparation
prediction_features = new_data[feature_names]

#scaling
prediction_features[numeric_feature_names] = scaler.transform(prediction_features[numeric_feature_names])

#engenering categorical values
prediction_features = pd.get_dummies(prediction_features,columns=categorical_feature_names)

#view feature data
prediction_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_F,Obedient_N,Obedient_Y
0,-1.127647,-1.430636,0,1,1,0
1,0.494137,1.160705,1,0,0,1


In [39]:
#add missing values to the categoriacl features columns
current_categorical_engineered_features = set(prediction_features.columns) - set(numeric_feature_names)
missing_features = set(categorical_engineered_features) - current_categorical_engineered_features

for feature in missing_features:
  #add zero since the feature is absent
  prediction_features[feature] = [0] * len(prediction_features)

#view final features
prediction_features

Unnamed: 0,ResearchScore,ProjectScore,OverallGrade_A,OverallGrade_F,Obedient_N,Obedient_Y,OverallGrade_C,OverallGrade_E,OverallGrade_B
0,-1.127647,-1.430636,0,1,1,0,0,0,0
1,0.494137,1.160705,1,0,0,1,0,0,0


In [44]:
feature_names = model.coef_
prediction_features = prediction_features.values

In [45]:
#predict using the model
predictions = model.predict(prediction_features)
#display results
new_data['Recommend'] = predictions
new_data



Unnamed: 0,OverallGrade,Obedient,ResearchScore,ProjectScore,Recommend
0,F,N,30,20,No
1,A,Y,78,80,Yes
