# Linear Regression

In [21]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from sklearn import linear_model
import math

In [None]:
# Reading CSV file
df = pd.read_csv("linearregressionex.csv")
df.head()

In [None]:
# Statistical Representation 
plt.xlabel("Year", size = 15)
plt.ylabel("Income", size = 15)
plt.scatter(df.year, df["per capita income (US$)"], color = "black", marker = "p")

In [None]:
# Model Fitting
reg = linear_model.LinearRegression()
reg.fit(df[["year"]], df["per capita income (US$)"])

In [None]:
# Model Prediction
reg.predict([[2020]])

# Multivariate Regression

### Handling missing data

In [None]:
# Reading CSV file
me = pd.read_csv("multivariateEx.csv")
me 

In [None]:
# Checking for null values and filling them
me.info()

In [50]:
# filling the null values in tes score 
# Using the mean of the rest of the score 
import math
t = math.floor(me["test_score(out of 10)"].mean())


In [None]:
me["test_score(out of 10)"] = me["test_score(out of 10)"].fillna(t)
me

In [None]:
# filling null value sin experience
me.experience = me.experience.fillna("zero")
me

In [53]:
# Changing experience from words to numbers
from word2number import w2n
me.experience = me.experience.apply(w2n.word_to_num)

In [None]:
reg = linear_model.LinearRegression()
reg.fit(me[["experience", "test_score(out of 10)", "interview_score(out of 10)"]], me["salary($)"])

In [None]:
reg.predict([[9, 5, 7]])

In [None]:
reg.coef_

In [None]:
reg.intercept_

### Gradient Descent and Cost Function

In [None]:
def predict_using_sklearn():
    gd = pd.read_csv("GradientDescent Ex.csv")
    r =linear_model.LinearRegression()
    r.fit(gd[["math"]], gd["cs"])
    return r.coef_, r.intercept_


def gradient_descent(x, y):
    m_curr = 0
    b_curr = 0
    iterations  = 1000000
    n = len(x)
    learning_rate = 0.0002

    cost_previous = 0

    for i in range(iterations):
        y_predict = m_curr * x + b_curr
        cost = (1/n) * sum([Val**2 for Val in (y - y_predict)])
        md = -(2/n)*sum(x * (y - y_predict))
        bd = -(2/n)*sum((y - y_predict))
        m_curr = m_curr - learning_rate * md
        b_curr = b_curr - learning_rate* bd
        if math.isclose(cost, cost_previous, rel_tol = 1e-20):
            break
        cost_previous = cost
        print("m {}, b {}, cost {}, iteration {}".format(m_curr, b_curr, cost, i))

    return m_curr, b_curr


if __name__ == "__main__":
    gd = pd.read_csv("GradientDescent Ex.csv")
    x = np.array(gd["math"])
    y = np.array(gd["cs"])


    m, b = gradient_descent(x, y)
    print("Using gradient decent function: Coef{} Intercept {}".format(m, b))

    m_sklearn, b_sklearn = predict_using_sklearn()
    print("using sklearn: Coef {} Intercept {}".format(m_sklearn, b_sklearn))