# Linear Regression

In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from sklearn import linear_model

In [2]:
# Reading CSV file
df = pd.read_csv("linearregressionex.csv")
df.head()

Unnamed: 0,year,per capita income (US$)
0,1970,3399.299037
1,1971,3768.297935
2,1972,4251.175484
3,1973,4804.463248
4,1974,5576.514583


In [None]:
# Statistical Representation 
plt.xlabel("Year", size = 15)
plt.ylabel("Income", size = 15)
plt.scatter(df.year, df["per capita income (US$)"], color = "black", marker = "p")

In [None]:
# Model Fitting
reg = linear_model.LinearRegression()
reg.fit(df[["year"]], df["per capita income (US$)"])

In [None]:
# Model Prediction
reg.predict([[2020]])

# Multivariate Regression

### Handling missing data

In [48]:
# Reading CSV file
me = pd.read_csv("multivariateEx.csv")
me 

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


In [49]:
# Checking for null values and filling them
me.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 4 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   experience                  6 non-null      object 
 1   test_score(out of 10)       7 non-null      float64
 2   interview_score(out of 10)  8 non-null      int64  
 3   salary($)                   8 non-null      int64  
dtypes: float64(1), int64(2), object(1)
memory usage: 388.0+ bytes


In [50]:
# filling the null values in tes score 
# Using the mean of the rest of the score 
import math
t = math.floor(me["test_score(out of 10)"].mean())


In [51]:
me["test_score(out of 10)"] = me["test_score(out of 10)"].fillna(t)
me

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,7.0,7,72000
7,eleven,7.0,8,80000


In [52]:
# filling null value sin experience
me.experience = me.experience.fillna("zero")
me

Unnamed: 0,experience,test_score(out of 10),interview_score(out of 10),salary($)
0,zero,8.0,9,50000
1,zero,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,7.0,7,72000
7,eleven,7.0,8,80000


In [53]:
# Changing experience from words to numbers
from word2number import w2n
me.experience = me.experience.apply(w2n.word_to_num)

In [54]:
reg = linear_model.LinearRegression()
reg.fit(me[["experience", "test_score(out of 10)", "interview_score(out of 10)"]], me["salary($)"])

In [56]:
reg.predict([[9, 5, 7]])



array([67431.9960444])

In [59]:
reg.coef_

array([2922.26901502, 2221.30909959, 2147.48256637])

In [60]:
reg.intercept_

14992.651446693133