# Linear Regression
## with one dependent and one independent variable

#### Mandatory Imports :)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading and visualizing single variable dataset

In [None]:
dataset = pd.read_csv('SalaryData.csv')

In [None]:
x = np.array(dataset.YearsExperience)
y = np.array(dataset.Salary)

In [None]:
#converting into dataframes
x_df = pd.DataFrame(x)
y_df = pd.DataFrame(y)

In [None]:
plt.scatter(x,y,color="orange")
plt.grid()
plt.show()

## Linear Regression_1 : using SkLearn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
regressor = LinearRegression()

In [None]:
x.shape,y.shape

In [None]:
regressor.fit(x_df,y_df)

In [None]:
y_pred = regressor.predict(x_df)

In [None]:
plt.scatter(x,y, label ='y', color='orange')
plt.plot(x,y_pred)
plt.xlabel("x")
plt.ylabel('y')
plt.title("Linear Regression using SkLearn")
plt.grid(color='silver')
plt.show()

## Cost by SkLearn

In [None]:
cost_sk = [0.5*((i-j)**2) for i,j in zip(y_pred,np.array(y))]

In [None]:
round(sum(cost_sk)[0],4)

## Regression_2 : using Formula

### Standard equation of line y = mx + c

###          m = Σ (x - x̅) * (y - y̅) ∕ Σ (x - x̅)²
###          c = y̅ - m*x̅

In [None]:
x = np.array(x)
y = np.array(y)

In [None]:
x_minus_x_mean = [i-np.mean(x) for i in x]

In [None]:
y_minus_y_mean = [j-np.mean(y) for j in y]

In [None]:
x_minus_x_mean_square = [k**2 for k in x_minus_x_mean]

In [None]:
m = sum([i*j for i,j in zip(x_minus_x_mean, y_minus_y_mean)])/(sum(x_minus_x_mean_square))

In [None]:
c = np.mean(y) - m*np.mean(x)

In [None]:
y_hat = [m*i + c for i in x]

In [None]:
plt.scatter(x,y ,label = 'y', color = 'orange')
plt.xlabel("x")
plt.ylabel('y')
plt.title("Linear Regression using formula")
plt.plot(x, y_hat, label = 'y_hat')
plt.legend()
plt.grid(color = 'silver')
plt.show()

## Cost using Formula method

In [None]:
cost_fml = [0.5*((i-j)**2) for i,j in zip(y_hat,y)]

In [None]:
round(sum(cost_fml),4)

## Regression_3: using Gradient Descent

In [None]:
def gradient_descent(alpha, no_of_iterations, x, y):
    theta_0 = np.random.randn()
    theta_1 = np.random.randn()
    m = len(x)
    h_theta_x = [theta_0 + theta_1*i for i in x]
    iterations = 0
    # Apply gradient descent
    for i in range(no_of_iterations):
        cost1 = (1/2*m)*sum([(i-j)**2 for i,j in zip(h_theta_x,y)])
        theta_0 = theta_0 - alpha*(1/m)*sum([i-j for i,j in zip(h_theta_x,y)])
        theta_1 = theta_1 - alpha*(1/m)*sum([(i-j)*k for i,j,k in zip(h_theta_x,y,x)])
        h_theta_x = [theta_0 + theta_1*i for i in x]
        cost2 = (1/2*m)*sum([(i-j)**2 for i,j in zip(h_theta_x,y)])
        iterations = iterations + 1
        if cost1 - cost2 <= 0.001:
            return [theta_0, theta_1]
        else:
            continue

In [None]:
optimal_thetas = gradient_descent(alpha = 0.01,no_of_iterations = 1000000, x = x, y = y)

In [None]:
h_theta_x = [optimal_thetas[0] + optimal_thetas[1]*i for i in x]

In [None]:
plt.scatter(x,y, label ='y',color='orange')
plt.xlabel("x")
plt.ylabel('y')
plt.title("Linear Regression using Gradient Descent")
plt.plot(x,h_theta_x, label = 'h_theta_x')
plt.legend()
plt.grid(color='silver')
plt.show()

## Cost by Gradient Descent

In [None]:
cost_gd = [0.5*((i-j)**2) for i,j in zip(h_theta_x,y)]

In [None]:
round(sum(cost_gd),4)