#### Load Libraries

In [5]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from word2number import w2n

In [3]:
# Load data from git hub
hiring_df = pd.read_csv("https://raw.githubusercontent.com/josephjaiyeola/linear_reg_multivariate/main/data/hiring.csv")
hiring_df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [7]:
#replace the NAN in experience to zero
hiring_df.experience = hiring_df.experience.fillna("zero")
hiring_df.head()


Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000


In [12]:
#The experience column contains words. So words need to be converted to number, else python will read it as a categorical 

hiring_df.experience = hiring_df.experience.apply(w2n.word_to_num)
hiring_df


Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,,7,72000
7,11,7.0,8,80000


In [16]:
#replace the NAN in the test_score column to the median value of the column
#we need to use math.floor because of the column is in float
import math
median_test_score = math.floor(hiring_df['test_score(out of 10)'].median())
median_test_score


8

In [19]:
#replace the NA with the median value
hiring_df["test_score(out of 10)"] = hiring_df['test_score(out of 10)'].fillna(median_test_score)
hiring_df

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,0,8.0,9,50000
1,0,8.0,6,45000
2,5,6.0,7,60000
3,2,10.0,10,65000
4,7,9.0,6,70000
5,3,7.0,10,62000
6,10,8.0,7,72000
7,11,7.0,8,80000


In [24]:
#regression model 
reg = linear_model.LinearRegression()
reg.fit(hiring_df[['experience', 'test_score(out of 10)', 'interview_score(out of 10)']], hiring_df['salary($)'])


LinearRegression()

In [25]:
#get the reg coef(a)
reg.coef_

array([2812.95487627, 1845.70596798, 2205.24017467])

In [26]:
#get the reg intercept(b)
reg.intercept_

17737.263464337695

In [27]:
#what will be the salary when experience is 2 years, test score is 9 and interview score is 6

reg.predict([[2, 6, 9]])



array([54284.5705968])

In [28]:
#what will be the salary when experience is 12 years, test score is 10 and interview score is 10

reg.predict([[12, 10,10]])

.



array([92002.18340611])