<img src="https://rhyme.com/assets/img/logo-dark.png" align="center">

<h2 align="center"> Linear Regression </h2>

### Task 2: Load the Data and Libraries
---

In [None]:
import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
import numpy as np
import sklearn as sk 

## Load and Visualize the dataset
- "bmi_and_life_expectancy.csv" file
* Country – The country the person was born in.
* Life expectancy – The average life expectancy at birth for a person in that country.
* BMI – The mean BMI of males in that country.

In [None]:
dataset = pd.read_csv('bmi_and_life_expectancy.csv', index_col=False)

In [None]:
dataset.head()

In [None]:
dataset = dataset.dropna()

In [None]:
from sklearn.preprocessing import normalize


In [None]:
X_train = dataset['BMI']
y_train = dataset['Life_Expectancy']

In [None]:
dataset.plot(x="BMI", y="Life_Expectancy", kind="scatter", figsize=(8,6))

In [None]:
plt.scatter(X_train, y_train, color='blue', edgecolor='k')
plt.xlabel('BMI')
plt.ylabel('Life expectancy')
plt.show()


### Task 4: Compute the Cost $J(\theta)$
---

The objective of linear regression is to minimize the cost function

$$J(\theta) = \frac{1}{2m} \sum_{i=1}^m (h_\theta(x^{(i)}) - y^{(i)} )^2$$

where $h_{\theta}(x)$ is the hypothesis and given by the linear model

$$h_{\theta}(x) = \theta^Tx = \theta_0 + \theta_1x_1$$

In [None]:
def CostFunction(X, y, theta):
    m = len(y)
    yPred = X.dot(theta)
    error = (yPred - y)**2
    return 1/(2*m)*np.sum(error)

In [None]:
m = dataset.BMI.values.size # 163
X = np.append(np.ones((m, 1)), dataset.BMI.values.reshape(m, 1), axis = 1)

y = dataset.Life_Expectancy.values.reshape(m, 1)



In [None]:
theta = np.zeros((2,1))


In [None]:
theta

In [None]:
X_tr =  normalize(X)
y_train2 = normalize(y)

In [None]:
for i in X_tr:
    print(i)

In [None]:
print(CostFunction(X_tr, y_train2, theta))

### Task 5: Gradient Descent
---

Minimize the cost function $J(\theta)$ by updating the below equation and repeat unitil convergence
        
$\theta_j := \theta_j - \alpha \frac{1}{m} \sum_{i=1}^m (h_{\theta}(x^{(i)}) - y^{(i)})x_j^{(i)}$ (simultaneously update $\theta_j$ for all $j$).

In [None]:
def GradientDescent(X, y, theta, alpha, iteractions):
    m = len(y)
    costs = []
    for i in range(iteractions):
        yPred = X.dot(theta)
        error = np.dot(X.transpose(), (yPred - y))
        theta -= alpha*(1/m)*error
        costs.append(CostFunction(X, y, theta))
    return theta, costs

In [None]:
theta, costs = GradientDescent(X_tr, y_train2, theta, alpha = 0.01, iteractions = 20)

print('h(x) = {} + {}x1'.format(str(round(theta[0, 0], 2)),
                                str(round(theta[1, 0], 2))))

In [None]:
costs[-1]

### Task 6: Visualising the Cost Function $J(\theta)$
---

In [None]:
from mpl_toolkits.mplot3d import Axes3D

In [None]:
theta0 = np.linspace(-10, 10, 100)
theta1 = np.linspace(-1, 4, 100)

costValues = np.zeros((len(theta0),len(theta1)))

for i in range(len(theta0)):
    for j in range(len(theta1)):
        t = np.array([theta0[i], theta1[j]])
        costValues[i, j] = CostFunction(X_tr, y_train2, t)

In [None]:
fig = plt.figure(figsize = (14, 9))
ax = fig.gca(projection = '3d')

surf = ax.plot_surface(theta0, theta1, costValues, cmap = 'viridis')

fig.colorbar(surf, shrink = 1, aspect = 20)

ax.set_xlabel('$\Theta_0$', color = 'b')
ax.set_ylabel('$\Theta_1$', color = 'b')
ax.set_zlabel('$J(\Theta)$', color = 'b')

#ax.view_init(35, 32)

plt.show()

### Task 7: Plotting the Convergence
---

Plot $J(\theta)$ against the number of iterations of gradient descent:

In [None]:
plt.figure(figsize = (14, 9))
plt.plot(costs, 'g')
plt.grid(color = 'black', alpha = 3)
plt.xlabel('$Iteractions$', fontsize = 15)
plt.ylabel('$J(\Theta)$', fontsize = 15)
plt.title('Values of the Cost Function over Iteractions of Gradient Descent', fontsize = 18);

### Task 8: Training Data with Linear Regression Fit
---

In [None]:
theta.shape

In [None]:
theta

In [None]:
plt.figure(figsize = (14, 9))

theta = np.squeeze(theta)
sns.regplot(x = 'BMI', y = 'Life_Expectancy', color = 'g', data = dataset)

xValue = [x for x in range(2,25)]
yValue = [(x*theta[1] + theta[0]) for x in xValue]
sns.lineplot(xValue, yValue)

plt.grid(color = 'black', alpha = 0.3)
plt.xlabel('$Population\ in\ 10000s$', fontsize = 15)
plt.ylabel('Profit in $10,000s', fontsize = 15)
plt.title('Linear Regression Fit', fontsize = 18);

### Task 9: Inference using the optimized $\theta$ values
---

$h_\theta(x) = \theta^Tx$

In [None]:
def Predict(x, theta):
    yPred = np.dot(theta.transpose(), x)
    return yPred

In [None]:
yPred1 = Predict(np.array([1, 4]), theta)*10000
print('For a population of 40,000 people, the model predicts a profit of $' + str(round(yPred1, 0)))

In [None]:
yPred2 = Predict(np.array([1, 8.3]), theta)*10000
print('For a population of 40,000 people, the model predicts a profit of $' + str(round(yPred2, 0)))