In [1]:
import numpy as np

# Exercises

## Linear Regression

#### Test

In [13]:
X = np.random.randn(2,3)
X = np.c_[np.ones((X.shape[0], 1)), X]
X

array([[ 1.        , -1.2938477 ,  0.31060847, -0.56821196],
       [ 1.        ,  0.22553394,  0.12329867, -0.38258578]])

In [15]:
theta = np.random.rand(X.shape[1], 1)
theta

array([[0.17366279],
       [0.17262825],
       [0.47333071],
       [0.01260365]])

#### Class

In [16]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs = 10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        
        # Initial weights
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []
        
    def compute_loss(self, y_pred, y_target):
        return np.mean((y_pred - y_target)**2)
        
    def predict(self, X_data):
        return np.dot(X_data, self.theta)
    
    def fit(self):
        for epoch in range(self.num_epochs):
            
            y_pred = self.predict(self.X_data)
            loss = self.compute_loss(self.y_pred, self.y_target)
            self.losses.append(loss)
            
            gradient = 2 * 1/self.num_samples * np.dot(self.X_data , (y_pred - self.y_target))
            
            self.theta -= self.learning_rate*gradient
            
            if (epoch % 50) == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')
                
        return {
            'loss': sum(self.losses) / len(self.losses),
            'weight': self.theta
        }

#### Quiz

1. A
2. C [C - B - A - D]
3. D
4. B

In [18]:
def r2score ( y_pred , y ):
    rss = np . sum (( y_pred - y ) ** 2)
    tss = np . sum (( y - y . mean () ) ** 2)
    r2 = 1 - ( rss / tss )
    return r2

y_pred = np . array ([1 , 2 , 3 , 4 , 5])
y = np . array ([1 , 2 , 3 , 4 , 5])
print(r2score ( y_pred , y ))

y_pred = np . array ([1 , 2 , 3 , 4 , 5])
y = np . array ([3 , 5 , 5 , 2 , 4])
print(r2score ( y_pred , y ))

1.0
-2.235294117647059


## Polynomial Regression

#### Quiz

5. B
6. B #corrected
7. A
8. D
9. A
10. A

#### Polynomial Feature

In [22]:
def create_polynomial_features(X, degree=2):
    """ Creates the polynomial features
    Args:
    X : A array tensor for the data.
    degree : An integer for the degree of the generated polynomial function.
    """
    
    X_new = X
    for d in range(2, degree+1):
        X_new = np.c_[X_new, np.power(X, d)]
    return X_new

X = np.array([[1], [2], [3]])

In [None]:
# 8
def create_polynomial_features(X, degree=2):
    """ Creates the polynomial features
    Args:
    X : A array tensor for the data.
    degree : An integer for the degree of the generated polynomial function.
    """
    
    X_new = np.empty((X.shape[0], 0))
    for feature in X.T:
        feature_new = feature
        for d in range(2, degree+1):
            feature_new = np.c_[feature_new, np.power(feature, d)]
        X_new = np.c_[X_new, feature_new]

    return X_new

X = np.array([[1,2], [2,3], [3,4]])
print(create_polynomial_features(X, degree=2))

[[ 1.  1.  2.  4.]
 [ 2.  4.  3.  9.]
 [ 3.  9.  4. 16.]]


# Project: Sales Prediction

## Load dataset

In [3]:
import pandas as pd

df = pd.read_csv('SalesPrediction.csv')
df

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.913410,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.246340
4,15.0,8.437408,1.405998,Micro,56.594181
...,...,...,...,...,...
4567,26.0,4.472360,0.717090,Micro,94.685866
4568,71.0,20.610685,6.545573,Nano,249.101915
4569,44.0,19.800072,5.096192,Micro,163.631457
4570,71.0,17.534640,1.940873,Macro,253.610411


## Preprocessing

In [None]:
# b. Preprocessing 

### Encoding
df = pd.get_dummies(df)

### Impute
df = df.fillna(df.mean())

### Get Features
X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

### Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [11]:
# c. Feature Scaling

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
X_test_processed = scaler.fit_transform(X_test)
scaler.mean_[0]

53.971574344023324

In [12]:
# d. Polynomial Features

from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.fit_transform(X_test_processed)

In [18]:
# e. Training & Evaluation

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

preds = poly_model.predict(X_test_poly)
r2_score(y_test, preds)

-249452529497.46368