In [1]:
import pandas as pd
import numpy as np


In [2]:
#importing the data
data=pd.read_csv('Performance Index.csv')
data.head()
#empid -Employee ID
#jpi -Job Profile Index
#aptitude -Aptitude Score
#tol
#technical -Technical Aptitude
#general -General Aptitude


Unnamed: 0,empid,jpi,aptitude,tol,technical,general
0,1,45.52,43.83,55.92,51.82,43.58
1,2,40.1,32.71,32.56,51.49,51.03
2,3,50.61,56.64,54.84,52.29,52.47
3,4,38.97,51.53,59.69,47.48,47.69
4,5,41.87,51.35,51.5,47.59,45.77


In [30]:
#Check for missing values
data.isnull().values.any()

False

In [31]:
#set features and label
features=data.iloc[:,[2,3,4,5]].values
#we left out empid as it not relevant to our linear model naturally
label=data.jpi.values

In [4]:
#Implement Linear Regression
from sklearn.linear_model import LinearRegression
modelS= LinearRegression()
modelS.fit(features,label)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [5]:
#Check the Model (R2) Score
modelS.score(features,label)

0.8767885708295509

In [6]:
#Intercept of the model equation
modelS.intercept_

-54.28224697945714

In [7]:
#Coeffecients of the model equation
#You can check the results in the linear model on R to cross check the results
#I have made another Linear Regression File in R in the same folder
modelS.coef_

array([0.32356183, 0.03337241, 1.09546675, 0.53683395])

In [8]:
#I could have done this step before implementing the model 
#But I wanted to check the resultant changes after removing 'not so important' features 
#but unfortunately all the features were significant in this dataset 
#Now we check the relationship of each independant variable with the dependant variable 
#Using Pearson's Correlation Coefficient 
#Aptitude
from scipy.stats import pearsonr
correlation,pvalue = pearsonr(data.aptitude , data.jpi)
print(correlation)
print(pvalue)
print("Confidence Level : {} %".format(((1- pvalue)*100)))
if pvalue <= 0.05:
    print("Alternate Hypothesis (H1) - Aptitude and JPI has linear relationship")
else:
    print("Null Hypothesis (H0)- Aptitude and JPI has no linear relationship")

0.409322548968902
0.01801057489801154
Confidence Level : 98.19894251019885 %
Alternate Hypothesis (H1) - Aptitude and JPI has linear relationship


In [11]:
#Now we check the relationship of each independant variable with the dependant variable 
#Using Pearson's Correlation Coefficient 
#TOL
from scipy.stats import pearsonr
correlation,pvalue = pearsonr(data.tol , data.jpi)
print(correlation)
print(pvalue)
print("Confidence Level : {} %".format(((1- pvalue)*100)))
if pvalue <= 0.05:
    print("Alternate Hypothesis (H1) - TOL and JPI has linear relationship")
else:
    print("Null Hypothesis (H0)- TOL and JPI has no linear relationship")

0.4803521125368063
0.004665312624540699
Confidence Level : 99.53346873754593 %
Alternate Hypothesis (H1) - TOL and JPI has linear relationship


In [10]:
#Now we check the relationship of each independant variable with the dependant variable 
#Using Pearson's Correlation Coefficient 
#Technical
from scipy.stats import pearsonr
correlation,pvalue = pearsonr(data.technical , data.jpi)
print(correlation)
print(pvalue)
print("Confidence Level : {} %".format(((1- pvalue)*100)))
if pvalue <= 0.05:
    print("Alternate Hypothesis (H1) - Technical and JPI has linear relationship")
else:
    print("Null Hypothesis (H0)- Technical and JPI has no linear relationship")
#Technical and JPI have a strong positive correlation 

0.8077481453251263
1.3315243526897045e-08
Confidence Level : 99.99999866847564 %
Alternate Hypothesis (H1) - Technical and JPI has linear relationship


In [12]:
#Now we check the relationship of each independant variable with the dependant variable 
#Using Pearson's Correlation Coefficient 
#General
from scipy.stats import pearsonr
correlation,pvalue = pearsonr(data.general , data.jpi)
print(correlation)
print(pvalue)
print("Confidence Level : {} %".format(((1- pvalue)*100)))
if pvalue <= 0.05:
    print("Alternate Hypothesis (H1) - General and JPI has linear relationship")
else:
    print("Null Hypothesis (H0)- General and JPI has no linear relationship")


0.7900898894674637
4.555631088120382e-08
Confidence Level : 99.99999544436892 %
Alternate Hypothesis (H1) - Technical and JPI has linear relationship


In [13]:
data.describe()

Unnamed: 0,empid,jpi,aptitude,tol,technical,general
count,33.0,33.0,33.0,33.0,33.0,33.0
mean,17.0,47.865758,52.660606,53.990606,52.01697,49.036364
std,9.66954,9.458342,10.054991,10.149654,4.98145,5.636432
min,1.0,31.64,32.71,32.56,41.25,37.0
25%,9.0,41.19,45.59,44.89,48.34,45.07
50%,17.0,49.45,53.38,57.04,51.64,50.53
75%,25.0,53.92,56.75,61.28,54.68,53.5
max,33.0,66.39,75.03,68.53,67.27,58.9


In [26]:
#Since all the features have a linear relationship with the label ,We can't remove any of them from our model 
#We use the model for Predictions
#We have the min max values of different features(please consider the features taken in our model only)
aptitude=float(input("Enter Aptitude Score :"))
tol=float(input("Enter TOL Score :"))
technical=float(input("Enter Technical Score :"))
general=float(input("Enter General Score :"))
jpi=modelS.predict(np.array([[aptitude,tol,technical,general]]))
print( "Job Profile Index(JPI) of the employee is {}".format(jpi))


Enter Aptitude Score :52.66
Enter TOL Score :54
Enter Technical Score :52
Enter General Score :49
Job Profile Index(JPI) of the employee is [47.827764]
