In [1]:
import sys
sys.path.append('../../')

In [2]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model

ModuleNotFoundError: No module named 'sklearn'

In [None]:
from sample_data import sample_data
from fscoreai import linear
from fscoreai import loss

# Univariate Linear Data

In [None]:
X, y = sample_data.simulate_linear_data()
plt.scatter(X, y)

## Sklearn Model

In [None]:
model_skl = linear_model.LinearRegression()
model_skl.fit(X, y)
print(model_skl.intercept_, model_skl.coef_)

## Custom Class

In [None]:
# Closed Form 
model = linear.LinearRegression()
model.fit_closed_form(X, y)
print(model.intercept_, model.coef_)

In [None]:
# Gradient Descent
model = linear.LinearRegression()
model.fit(X, y, lr=1e-2, n_epochs=1000)
print(model.intercept_, model.coef_)

## Plotting

In [None]:
X_test = np.arange(X.min(), X.max(), 0.1).reshape(-1, 1)
y_pred = model.predict(X_test)
plt.scatter(X, y)
plt.plot(X_test, y_pred, c="red")

# Multivariate Linear Data

In [None]:
X, y = sample_data.boston_house_data()
fig, ax = plt.subplots(7,2, figsize=(10,10))
ax[0][0].scatter(X[:, 0], y) #crime
ax[0][1].scatter(X[:, 1], y) #zoning
ax[1][0].scatter(X[:, 2], y) #industry
ax[1][1].scatter(X[:, 3], y) #charles river
ax[2][0].scatter(X[:, 4], y) #nox
ax[2][1].scatter(X[:, 5], y) #number pf rooms
ax[3][0].scatter(X[:, 6], y) #age
ax[3][1].scatter(X[:, 7], y) #dis
ax[4][0].scatter(X[:, 8], y) #rad
ax[4][1].scatter(X[:, 9], y) #tax
ax[5][0].scatter(X[:, 10], y) #ptratio
ax[5][1].scatter(X[:, 11], y) #black
ax[6][0].scatter(X[:, 12], y) #black

In [None]:
X = X[:, [0, 5]]

## Sklearn Model

In [None]:
model_skl = linear_model.LinearRegression()
model_skl.fit(X, y)
print(model_skl.intercept_, model_skl.coef_)

## Custom Class

In [None]:
# Closed Form 
model = linear.LinearRegression()
model.fit_closed_form(X, y)
print(model.intercept_, model.coef_)

In [None]:
# Gradient Descent
model = linear.LinearRegression()
model.fit(X, y, lr=1e-2, n_epochs=100)
print(model.intercept_, model.coef_)

## Plotting

In [None]:
plt.plot(model.costs)

In [None]:
y_pred = model.predict(X)
fig, ax = plt.subplots(1,2, figsize=(10,5))
ax[0].scatter(X[:, 0], y)
ax[0].scatter(X[:, 0], y_pred, c="red")
ax[1].scatter(X[:, 1], y)
ax[1].scatter(X[:, 1], y_pred, c="red")

# Lasso Regression

## SkLearn

In [None]:
model_skl = linear_model.Lasso(alpha=0.5)
model_skl.fit(X, y)
print(model_skl.intercept_, model_skl.coef_)

## Custom

In [None]:
model = linear.LassoRegression(alpha=0)
model.fit(X, y, lr=1e-2, n_epochs=10000, verbose=True)
print(model.intercept_, model.coef_)

# Ridge Regression

## SkLearn

In [None]:
model_skl = linear_model.Ridge(alpha=1)
model_skl.fit(X, y)
print(model_skl.intercept_, model_skl.coef_)

## Custom

In [None]:
model = linear.RidgeRegression(alpha=0.1)
model.fit(X, y, lr=1e-2, n_epochs=1000, verbose=True)
print(model.intercept_, model.coef_)

# ElasticNet Regression

## SkLearn

In [None]:
model_skl = linear_model.ElasticNet(alpha=0.25, l1_ratio=0.5)
model_skl.fit(X, y)
print(model_skl.intercept_, model_skl.coef_)

## Custom

In [None]:
model = linear.ElasticNetRegression(alpha=0.25, alpha_ratio=0.5)
model.fit(X, y, lr=1e-2, n_epochs=1000, verbose=True)
print(model.intercept_, model.coef_)

# Visualizing Slope vs. Cost for Univariate Data

In [None]:
X, y = sample_data.simulate_linear_data()
plt.scatter(X, y)

In [None]:
errors = []

def ssr(y, y_pred):
    return np.sum(np.square(y_pred - y))

coefs = np.arange(-5, 5, 0.2)
intercept = 0

for coef in coefs:
    y_pred = np.dot(X, coef) + intercept
    error = ssr(y, y_pred)
    errors.append(error)

plt.scatter(coefs, errors)
errors = []
coef = 0
intercepts = np.arange(-5, 5, 0.2)
for intercept in intercepts:
    y_pred = np.dot(X, coef) + intercept
    error = ssr(y, y_pred)
    errors.append(error)
    
plt.scatter(intercepts, errors)

# Hyperparameter Search

In [None]:
np.random.Generator.random(10)

In [None]:
def RandomSearchCV(X, y, model, param_range, folds):
    param = random.sample(range(param_range[0], param_range[1]), 10)
    train_scores = dict()
    test_scores = dict()

In [None]:
X, y = sample_data.simulate_linear_data()
model = linear.LinearRegression()
params = {
    'lr': [1e-1, 1e-2, 1e-3, 1e-5, 1e-8, 1e-10],
    'n_epochs': [2,10,100,1000,10000,100000]
}

clf = GridSearchCV(model, params)
clf.fit(X, y)

In [None]:
plt.plot(score)