In [None]:
from sklearn.linear_model import LinearRegression
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
model = LinearRegression()

In [None]:
diameter = [6,8,10,14,18]
toppings = [2,1,0,2,0]
price = [7,9,13,17.5,18]

X_train = pd.DataFrame({'Diameter':diameter,
                   'Toppings':toppings})

y_train = pd.DataFrame({'Price':price})
X_train,y_train

In [None]:
model.fit(X_train,y_train)
model.coef_,model.intercept_


In [None]:
x_b = np.column_stack((np.ones(5),X_train))
x_b


In [None]:
theta_best = np.linalg.inv(x_b.T @ x_b) @ x_b.T @y_train
theta_best

In [None]:
X_train

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

x_model = np.arange(20)
y_model = np.arange(20)
z_model = theta_best.iloc[0,0] + x_model * theta_best.iloc[1,0] + y_model * theta_best.iloc[2,0] 
x_model,y_model,z_model

ax.plot(x_model,y_model,z_model)
ax.scatter3D(X_train.Diameter,X_train.Toppings,y_train)

### Gradient Descent

In [None]:
#%%timeit

m = 10000
rng = np.random.RandomState(0)
X = 2 * rng.rand(m,1)

y = 4 + 3 * X + rng.randn(m,1)

x_b = np.column_stack((np.ones(m),X))

eta = 0.1
n_iterations = 1000

theta = rng.randn(2,1)
#print("org theta",theta)
tolerance_theta = 0.00002
tolerance_gra = 0.00005
count = 0

for iter in range(n_iterations):
    count += 1
    gra = 2 /m * x_b.T @ (x_b @ theta -y)
    
    prev_theta = theta
    theta = theta - eta * gra

    sqrt_comp_grad = np.sqrt((gra**2).sum())
    #print("sqrt_comp_grad",sqrt_comp_grad)

    sqrt_comp = np.sqrt(((prev_theta - theta)**2).sum())
    #print('sqrt_comp',sqrt_comp)
    if sqrt_comp_grad < tolerance_gra:
        print("count", count,"sqrt_comp_grad",sqrt_comp_grad)
        break

    #if sqrt_comp < tolerance_theta:
    #    print("count", count)
    #    break


theta

## Stochastic Gradient Descent

In [None]:
%%timeit
m = 100000
rng = np.random.RandomState(0)
X = 2 * rng.rand(m,1)
y = 4 + 3 * X + rng.randn(m,1)

x_b = np.column_stack((np.ones(m),X))

theta = rng.randn(2,1)

count = 0

n_epochs = 50
t0,t1 = 5,50

def learning_schedule(t):
    return t0/ (t +t1)

theta = rng.randn(2,1)

for epoch in range(n_epochs):
    for i in range(m):
        random_index = rng.randint(m)
        x_i = x_b[random_index:random_index +1]
        y_i = y[random_index:random_index +1]
        gradients = 2 * x_i.T @ (x_i @ theta - y_i)
        eta  = learning_schedule(epoch * m + i)
        theta = theta - eta * gradients

theta

### Mini Gradient Descent

In [None]:
m = 100000
rng = np.random.RandomState(0)
X = 2 * rng.rand(m,1)
y = 4 + 3 * X + rng.randn(m,1)

x_b = np.column_stack((np.ones(m),X))

theta = rng.randn(2,1)

count = 0

n_epochs = 50
t0,t1 = 5,50
batch_size = 10

def learning_schedule(t):
    return t0/ (t +t1)

theta = rng.randn(2,1)

for epoch in range(n_epochs):
    for i in range(m):
        random_indexes = rng.randint(m,size=batch_size)
        np.random.shuffle(x_b)
        x_idxes = x_b[random_indexes]
        #print('x_idxes',x_idxes.shape)
        y_idxes = y[random_indexes]
        #print('y_idxes',y_idxes.shape)
        gradients = 2 * x_idxes.T @ (x_idxes @ theta - y_idxes)
        eta  = learning_schedule(epoch * m + i)
        theta = theta - eta * gradients

theta



In [None]:
m = 20
rng = np.random.RandomState(0)
idx = rng.randint(m,size=5)
idx

X = 2 * rng.rand(m,1)
x_b = np.column_stack((np.ones(m),X))

np.random.shuffle(x_b)

#display(idx, X)
display(idx, x_b[idx])

## polynomianl regression
### find the best polynomianl fir ti the function f(x) = sin(x) in the range o,10

In [31]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
from sklearn.linear_model import LinearRegression

#x = np.linspace(0,10,100).reshape(-1,1)
x =  10 * np.random.rand(50,1)
y = np.sin(x)

def PolynominalRegression(degree = 2,**kwargs):
    return make_pipeline(PolynomialFeatures(degree),
                        LinearRegression(**kwargs))

param_grid = {'polynomialfeatures__degree':np.arange(30),
             'linearregression__fit_intercept':[True,False],
             'linearregression__normalize':[True,False]}

grid = GridSearchCV(PolynominalRegression(),param_grid,cv=7)
grid.fit(x,y)



GridSearchCV(cv=7, error_score='raise-deprecating',
             estimator=Pipeline(memory=None,
                                steps=[('polynomialfeatures',
                                        PolynomialFeatures(degree=2,
                                                           include_bias=True,
                                                           interaction_only=False,
                                                           order='C')),
                                       ('linearregression',
                                        LinearRegression(copy_X=True,
                                                         fit_intercept=True,
                                                         n_jobs=None,
                                                         normalize=False))],
                                verbose=False),
             iid='warn', n_jobs=None,
             param_grid={'linearregression__fit_intercept': [True, False],
                        

In [32]:
grid.best_params_

{'linearregression__fit_intercept': True,
 'linearregression__normalize': True,
 'polynomialfeatures__degree': 20}