# Understanding Linear Regression

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [None]:
df = pd.read_csv('../../data/csv/heightVsWeight/hw_25000.csv')

In [None]:
df

In [None]:
df.columns = ['Index', 'Height', 'Weight']

In [None]:
df.head()

In [None]:
df.plot(kind='scatter',
        x='Height',
       y='Weight',
       title='Weight and Height in adults')

In [None]:
df.plot(kind='scatter',
        x='Height',
       y='Weight',
       title='Weight and Height in adults')
# Plotting a Line
plt.plot([61, 74], [85, 162], color='red', linewidth=4)

In [None]:
def line(X, weight=0, bias=0):
    return X * weight + bias

In [None]:
X = np.linspace(61, 74, 100)

In [None]:
X

In [None]:
yhat = line(X, weight=0, bias=0)

In [None]:
yhat

In [None]:
df.plot(kind='scatter',
        x='Height',
       y='Weight',
       title='Weight and Height in adults')
plt.plot(X, yhat, color='red', linewidth=4)

In [None]:
def error(y_gt, y_pred):
    err = (y_gt - y_pred)
    return err.sum()

In [None]:
def mse(y_gt, y_pred):
    s = (y_gt - y_pred)**2
    return s.mean()

In [None]:
X = df[['Height']].values
y_gt = df[['Weight']].values

In [None]:
y_pred = line(X)

In [None]:
error(y_gt, y_pred)

In [None]:
mse(y_gt, y_pred)

# Will look at below code after understaning how to optimize the model


In [None]:
plt.figure(figsize=(10, 5))
plt1 = plt.subplot(121)
df.plot(kind='scatter', x='Height', y='Weight', title='Weight and Height in adults', ax=plt1)
# Lets look at Cost Function, how it behaves for few values of "b" b/n -150 to +200
bs = np.array([-150, -100, -50, 0, 50, 100, 150, 200])
# List to store mean sq error
ms_error = []

for b in bs:
    y_pred = line(X, weight=2, bias=b)
    error = mse(y_gt, y_pred)
    ms_error.append(error)
    plt.plot(X, y_pred)

plt2 = plt.subplot(122)
plt.plot(bs, ms_error, 'o-')
plt.title('Cost as func of bias=b')
plt.xlabel('bias')

# Linear Regression Using Keras

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam, SGD

In [None]:
# Sequnetial because we will adding values in a sequence in linear model
model = Sequential()

In [None]:
# We add to the model a Dense Layer (which basically does is o/p = (dot(i/p, kernel) + bias))
# First parameters is no.of outputs i.e 1 in our case 
# Share of out i/p varibale is 1 (i.e X)
# It just does line function (b + X*w)
model.add(Dense(1, input_shape=(1,)))

In [None]:
model.summary()

In [None]:
model.compile(Adam(lr=0.9), 'mse')

In [None]:
model.fit(X, y_gt, epochs=45)

In [None]:
y_pred = model.predict(X)
df.plot(kind='scatter', x='Height', y='Weight', title='Weight and Height in adults')
plt.plot(X, y_pred, color='red')

In [None]:
w, b = model.get_weights()

In [None]:
w

In [None]:
b

# Test and Train Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
len(X)

In [None]:
len(y_gt)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, y_gt, test_size=0.2)

In [None]:
len(x_train)

In [None]:
len(x_test)

In [None]:
w = 0.0, b = 0.0

In [None]:
model.set_weights((w, b))

In [None]:
model.fit(x_train, y_train, epochs=45, verbose=0)

In [None]:
y_tra_pred = model.predict(x_train).ravel()
y_tst_pred = model.predict(x_test).ravel()

In [None]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2s

In [None]:
print(" The MSE on the train set (x_train) is:{:0.1f}".format(mse(y_train, y_tra_pred)))
print(" The MSE on the train set (x_test) is:{:0.1f}".format(mse(y_test, y_tst_pred)))

In [None]:
print("R2 Score on the train set is:{:0.3f}".format(r2s(y_train, y_tra_pred)))
print("R2 score on the test set is:{:0.3f}".format(r2s(y_test, y_tst_pred)))