#  **Simple and Multiple Linear Regression**
Lab Exercises - Week 2

----------

In [None]:
import pandas as pd
import numpy as np
import scipy
import matplotlib.pyplot as plt
from sklearn import linear_model as ln
import os
print(os.listdir("../input"))

## 1. Introduction to Numpy:

In [None]:
# Declaring an array
arr = np.array([[1,2,3],[4,5,6]])

print("Array dimensions:\n", arr.shape)
print("Array previous:\n", arr)

In [None]:
# Function to generate a Matrix with all values as 1:
identityMatrix = np.ones((2,2))
print("Identity Matrix:\n", identityMatrix)

#Function to stack so as to make a single Matrix horizontally:
x = np.hstack((identityMatrix,arr))
print("Stacking Arrays:\n", x)

In [None]:
# Dot Product
a = np.array([[7,8],[9,10]])
b = np.array([[11,12],[13,14]])
print(np.dot(a,b))

In [None]:
# Transpose
mat = np.array([[7,8],[9,10],[11,12],[13,14]])

print("Original Matrix:\n", mat)
print("Transpose Matrix:\n", np.transpose(mat))

In [None]:
# Function to calculate the inverse of a matrix
mat = np.array([[7,8],[9,10]])
print("Matrix Inverse:\n", np.linalg.inv(mat))

## 3. Simple Linear Regression using Numpy

In [None]:
y = np.array([[1.55],[0.42],[1.29],[0.73],[0.76],[-1.09],[1.41],[-0.32]])
z = np.array([[1.13],[-0.73],[0.12],[0.52],[-0.54],[-1.15],[0.20],[-1.09]])

In [None]:
# Generating regression coefficients
id = np.ones((8,1))
x = np.hstack((id,z))
beta = (np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(),x)),x.transpose()),y))
print(beta)

In [None]:
#Result - Calculation
yp1 = beta[0] + beta[1]*z
print(np.hstack((z,y,yp1)))

## 4. Simple Linear Regression using Scikit-Learn:

In [None]:
# Input DataFrame
d = pd.DataFrame(np.hstack((z,y)))
d.columns = ["x1","y"]
print(d)

In [None]:
# Linear Regression - model fitting
model = ln.LinearRegression()
results = model.fit(z,y)
print(model.intercept_, model.coef_)

In [None]:
# Result: Scikit - Learn
yp2 = model.predict(z)
print(yp2)

In [None]:
# Linear Regression representation using scatter plot
plt.title("Scatter Plot Representation",fontsize=16)
plt.scatter(z,y)
plt.plot(z,yp2, color="red")
plt.show()

In [None]:
# Prediction for new values
x1new = pd.DataFrame(np.hstack(np.array([[1],[0],[-0.12],[0.52]])))
x1new.columns = ["x1"]
yp2new = model.predict(x1new)
print(yp2new)

## 5. Multiple Linear Regression using Numpy functions:

In [None]:
# Input DataFrame
y = np.array([[1.55],[0.42],[1.29],[0.73],[0.76],[-1.09],[1.41],[-0.32]])
x1 = np.array([[1.13],[-0.73],[0.12],[0.52],[-0.54],[-1.15],[0.20],[-1.09]])
x2 = np.array([[1],[0],[1],[1],[0],[1],[0],[1]])

In [None]:
id = np.ones((8,1))
x = np.hstack((id, x1, x2))
print(x)

In [None]:
# Calculating Regression Coefficients
beta = (np.dot(np.dot(np.linalg.inv(np.dot(x.transpose(),x)),x.transpose()),y))
print(beta)

In [None]:
# Result - Calculation
yp1 = beta[0] + beta[1]*x1 + beta[2]*x2
print(np.hstack((x,y,yp1)))

## 6. Multiple Linear Regression using Scikit-Learn:

In [None]:
# Input DataFrame
d = pd.DataFrame(np.hstack((x1,x2,y)))
d.columns = ["x1","x2","y"]
print(d)

In [None]:
# Multiple Linear Regression - Model Fitting
inputDF = d[["x1","x2"]]
model = ln.LinearRegression()
result = model.fit(inputDF, y)

print(model.intercept_, model.coef_)

In [None]:
# Result: Scikit - Learn
yp2 = model.predict(inputDF)
yp2

In [None]:
# Prediction for new values
x1new = pd.DataFrame(np.hstack((np.array([[1],[0],[-0.12],[0.52]]),np.array([[1],[-1],[2],[0.7]]))))
x1new.columns = ["x1","x2"]
yp2new = model.predict(x1new)
print(np.hstack((x1new,yp2new)))

**Voilà! This is the end of the lab session for week 2.** <br>
Do not forget to commit your notebook and set the access to private. Share the notebook with Prof. Karim (Kaggle id: karimshaikh) and Manish Varma (Kaggle id: manishvarma).

## 7. Exercise Questions:
Q1. Using **survey.csv**, build simple linear regression based model using "Height" as a dependent variable and "Wrhnd" as independent variable.

In [None]:
df = pd.read_csv("../input/survey.csv")
#df.head()
#df = df.rename(index=str,columns = ("Wr.Hnd":"WrHnd"))
df = df[['Wr.Hnd', 'Height']]
print(df.head())
print("------------------------------")
print(df.isnull().values.any())
print(df.isnull().sum())

In [None]:
#Checking for Null/Nan Values
df = df.dropna()
print("Check for NaN/Null values:\n", df.isnull().values.any())
print("Number of NaN/Null values:\n", df.isnull().sum())

In [None]:
# Simple Linear Regression
inputDF = df[["Wr.Hnd"]]
outcomeDF = df[["Height"]]
model = ln.LinearRegression()
results = model.fit(inputDF, outcomeDF)

print(model.intercept_, model.coef_)

Q2. Using **check.csv** build a multiple linear regression based model using "Price" as a dependent variable and "Bidders" and "Age" as independent variables.

In [None]:
df = pd.read_csv("../input/clock.csv")
print(df.head())
print("------------------------------------------------------------------------------")
print("Check for NaN/Null values:\n", df.isnull().values.any())
print("Number of NaN/Null values:\n", df.isnull().sum())

In [None]:
# Multiple Linear Regression 
inputDF = df[["Bidders","Age"]]
outcomeDF = df[["Price"]]
model = ln.LinearRegression()
results = model.fit(inputDF, outcomeDF)

print(model.intercept_, model.coef_)

Voilà! This is the end of the lab session for week 2.
Do not forget to commit your notebook and set the access to private. Share the notebook with Prof. Karim (Kaggle id: karimshaikh) and Manish Varma (Kaggle id: manishvarma).