In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from word2number import w2n

In [3]:
# import data
df = pd.read_csv('hiring.csv')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)\t
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [4]:
# fill missing experience and zero value
df.experience = df.experience.fillna('zero')
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)\t
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [5]:
# fill missing values on test score with median score
score_median = df.iloc[:, 1:2].median()
score_median

test_score(out of 10)    8.0
dtype: float64

In [6]:
df.iloc[:, 1:2] = df.iloc[:, 1:2].fillna(score_median)
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)\t
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,8.0,7,72000
7,eleven,7.0,8,80000


In [7]:
# converting word experinece  to numerical values
df.experience = df.experience.apply(w2n.word_to_num)

In [8]:
df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)\t
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [9]:
X = df.iloc[:, :3]

In [10]:
y = df.iloc[:, 3: 4]

In [11]:
# train the model
lr = linear_model.LinearRegression()
model = lr.fit(X, y)

In [12]:
prediction = model.predict([[6, 10, 7]])
prediction

array([[68508.73362445]])

In [13]:
# saving the model
import pickle

In [14]:
with open('model_salary_precition', 'wb') as f:
    pickle.dump(model, f)

In [15]:
# retrieve the model and use for prediction
with open('model_salary_precition', 'rb') as f:
    m_salary_predict = pickle.load(f)

In [16]:
# predict the salary from the saved model
m_salary_predict.predict([[6, 10, 7]])

array([[68508.73362445]])