# ➥ Linear Regression

## i.Simple Linear Regression

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score,mean_absolute_error,mean_squared_error,r2_score

In [4]:
placement = pd.read_csv('placement.csv')
placement.head()

Unnamed: 0,cgpa,package
0,6.89,3.26
1,5.12,1.98
2,7.82,3.25
3,7.42,3.67
4,6.94,3.57


In [5]:
placement.isnull().sum()

cgpa       0
package    0
dtype: int64

In [6]:
#Split data for train test
X = placement.iloc[:,0:1]
y = placement.iloc[:,-1]

In [7]:
#Train
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)

In [8]:
X_train

Unnamed: 0,cgpa
124,6.06
16,6.84
148,7.57
93,6.79
65,7.60
...,...
106,6.13
14,7.73
92,7.90
179,7.14


In [9]:
lr = LinearRegression()

In [10]:
lr.fit(X_train,y_train)

In [None]:
y_test

In [None]:
y_pred = lr.predict(X_test.iloc[1].values.reshape(1,1))

In [None]:
y_pred

### Plotting

In [None]:
plt.scatter(placement['cgpa'],placement['package'])
plt.plot(X_train,lr.predict(X_train),color='red')
plt.show()

### Find Error and R2 Score

In [None]:
y_pred1 = lr.predict(X_test)

In [None]:
y_pred1

In [None]:
# Mean Absolute Error(MAE)
print('MAE: ',mean_absolute_error(y_test,y_pred1))

In [None]:
y_test.values

In [None]:
# Mean Squred Error(MSE)
print('MSE: ',mean_squared_error(y_test.values,y_pred1))

In [None]:
# R2 Score

In [None]:
print('R2 Score; ',r2_score(y_test.values,y_pred1))

## ii.Multiple Linear Regression

In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score,mean_absolute_error,mean_squared_error,r2_score
import plotly.express as px
import plotly.graph_objects as go

In [None]:
X,y = make_regression(n_samples=200,n_features=2,n_informative=2,n_targets=1,noise=50)

In [None]:
ds = pd.DataFrame({'feature1':X[:,0],'feature2':X[:,1],'target':y})
ds.head()

In [None]:
fig = px.scatter_3d(ds, x='feature1', y='feature2', z='target')

fig.show()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(ds.iloc[:,0:2],ds.iloc[:,-1],test_size=0.1,random_state=42)

In [None]:
y_train

In [None]:
y_test

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
y_pred = lr.predict(X_test)

In [None]:
y_test.values

In [None]:
y_pred

In [None]:
# Mean Absolute Error(MAE)
mae = mean_absolute_error(y_test,y_pred)
mae

In [None]:
# Mean Squred Error(MSE)
mse = mean_squared_error(y_test,y_pred)
mse

In [None]:
# Root Mean Squred Error(RMSE)
rmse = np.sqrt(mean_squared_error(y_test,y_pred))
rmse

In [None]:
# R2 Score
r2s = r2_score(y_test,y_pred)
r2s

In [None]:
x = np.linspace(-5, 5, 10)
y = np.linspace(-5, 5, 10)
xGrid, yGrid = np.meshgrid(y, x)

final = np.vstack((xGrid.ravel().reshape(1,100),yGrid.ravel().reshape(1,100))).T
z_final = lr.predict(final).reshape(10,10)
z = z_final

fig = px.scatter_3d(ds, x='feature1', y='feature2', z='target')

fig.add_trace(go.Surface(x = x, y = y, z =z ))

fig.show()

In [None]:
# beta1(B1) & beta2(B2) value
lr.coef_

In [None]:
# beta0(B0) balue 
lr.intercept_

## iii.Gradient Descent

### Taking m constant and find b
**slope = -2 * ∑ (yi - mxi - b)**

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
# generate random number
X,y = make_regression(n_samples=200,n_features=1,n_informative=1,n_targets=1,noise=50,random_state=2)

In [None]:
# make random number to DataFrame
df = pd.DataFrame({'feature':X[:,0],'target':y})

In [None]:
df.head()

In [None]:
# ploting
plt.scatter(X,y)
plt.show()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
# m Value
m =lr.coef_
m

In [None]:
# b value
lr.intercept_

In [None]:
y_pred = lr.predict(X_test)

In [None]:
plt.scatter(X_train,y_train)
plt.plot(X_test,y_pred)
plt.show()

In [None]:
r2s = r2_score(y_test,y_pred)
r2s

In [None]:
# now find b value using gradient descent taking m constant

In [None]:
epoch = 30
b = -120
lr = 0.001 #learning rate
for i in range(epoch):
    slope = -2 * np.sum(y_train - 39.8256 * X_train.ravel() - b)
    b = b - (lr * slope)
print('b:',b)

In [None]:
# make a function
def gd(b,epoch,lr):
    for i in range(epoch):
        slope = -2 * np.sum(y_train - 39.8256 * X_train.ravel() - b)
        b = b - (lr * slope)
    print('b:',b)

In [None]:
gd = gd(150,100,0.001)

### Find m & b

* **slope_b = -2 * ∑ (yi - mxi - b)** 
* **slope_m = -2 * ∑(yi -mxi - b) * xi**
* **b = b - learning rate * slope**

In [None]:
plt.scatter(X,y)
plt.show()

In [None]:
epoch = 50
lr = 0.001
m = 120
b = -120

for i in range(epoch):
    slope_b = -2 * np.sum(y_train - m * X_train.ravel() - b)
    slope_m = -2 * np.sum((y_train - m * X_train.ravel() - b )* X_train.ravel())
    m = m - (lr * slope_m)
    b = b - (lr * slope_b)
print('m:',m)
print('b:',b)


## iv.Batch Gradient Descent

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
X,y = load_diabetes(return_X_y=True)

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
X_train.shape,y_train.shape

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
lr.coef_

In [None]:
lr.intercept_

In [None]:
# now solving this using batch gradient descent

b0 = 0
bn = np.ones(X_train.shape[1])
epochs = 2000
lr1 = 0.7
for i in range(epochs):
    # for find intercept(b0) value
    y_hat = np.dot(X_train,bn) + b0
    slope_b0 = -2 * np.mean(y_train - y_hat)
    b0 = b0 - (lr1 * slope_b0)
    # for find coefficient(bn) value
    slope_bn = -2 * np.dot((y_train - y_hat),X_train) / X_train.shape[0]
    bn = bn - (lr1 * slope_bn)
    
print(b0,bn)


## v.Stochastic Gradient Descent

In [None]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
X,y = load_diabetes(return_X_y=True)

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
lr.intercept_,lr.coef_

In [None]:
# now solve with Stochastic Gradient Descent(SGD)

epochs = 50
b0 = 0
bn = np.ones(X_train.shape[1])
lr1 = 0.01

for i in range(epochs):
    for j in range(X_train.shape[0]):
        idx = np.random.randint(0,X_train.shape[0])
        y_hat = np.dot(X_train[idx],bn) + b0
        
        # for find intercept(b0) value
        slope_b0 = -2 * (y_train[idx] - y_hat)
        b0 = b0 - (lr1 * slope_b0)
        
        # for find coefficient(bn) value
        slope_bn = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
        bn = bn - (lr1 * slope_bn)
    
print(b0,bn)


## vi.sklearn built in Stochastic Gradient Descent

In [None]:
from sklearn.linear_model import SGDRegressor,LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score

In [None]:
X,y = load_diabetes(return_X_y=True)

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
lr.intercept_,lr.coef_

In [None]:
# now using SGDRegressor
sgd = SGDRegressor(max_iter=200,learning_rate='constant')

In [None]:
sgd.fit(X_train,y_train)

In [None]:
sgd.intercept_,sgd.coef_

In [None]:
y_pred = sgd.predict(X_test)

In [None]:
r2s = r2_score(y_test,y_pred)

In [None]:
r2s

## vii.Mini Batch Gradient Descent

In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
import random
from sklearn.metrics import r2_score

In [None]:
# using sklearn:

In [None]:
X,y = load_diabetes(return_X_y=True)

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [None]:
sgd = SGDRegressor(learning_rate='constant',eta0=0.01)

In [None]:
batch_size = 40
epochs = 500
for i in range(epochs):
    idx = random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],y_train[idx])
                            

In [None]:
sgd.coef_

In [None]:
sgd.intercept_

In [None]:
y_pred = sgd.predict(X_test)

In [None]:
r2s = r2_score(y_test,y_pred)

In [None]:
r2s