## Training vs Testing Errors

In [1]:
%config InlineBackend.figure_format = 'retina'
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

### Load the Iris dataset

In [2]:
# Iris dataset.
iris = datasets.load_iris()     # Load Iris dataset.

X = iris.data                   # The shape of X is (150, 4), which means
                                # there are 150 data points, each data point
                                # has 4 features.

# Here for convenience, we divide the 3 kinds of flowers into 2 groups: 
#     Y = 0 (or False):  Setosa (original value 0) / Versicolor (original value 1)
#     Y = 1 (or True):   Virginica (original value 2)

# Thus we use (iris.target > 1.5) to divide the targets into 2 groups. 
# This line of code will assign:
#    Y[i] = True  (which is equivalent to 1) if iris.target[k]  > 1.5 (Virginica)
#    Y[i] = False (which is equivalent to 0) if iris.target[k] <= 1.5 (Setosa / Versicolor)

Y = (iris.target > 1.5).reshape(-1,1) # The shape of Y is (150, 1), which means 
                                # there are 150 data points, each data point
                                # has 1 target value. 

X_and_Y = np.hstack((X, Y))     # Stack them together for shuffling.
np.random.seed(1)               # Set the random seed.
np.random.shuffle(X_and_Y)      # Shuffle the data points in X_and_Y array

print(X.shape)
print(Y.shape)
print(X_and_Y[0])               # The result should be always: [ 5.8  4.   1.2  0.2  0. ]

(150, 4)
(150, 1)
[ 5.8  4.   1.2  0.2  0. ]


In [3]:
# Divide the data points into training set and test set.
X_shuffled = X_and_Y[:,:4]
Y_shuffled = X_and_Y[:,4]

X_train = X_shuffled[:100] # Shape: (100,4)
Y_train = Y_shuffled[:100] # Shape: (100,)
X_test = X_shuffled[100:]  # Shape: (50,4)
Y_test = Y_shuffled[100:]  # Shape: (50,)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(100, 4)
(100,)
(50, 4)
(50,)


In [5]:
from sklearn.linear_model import LinearRegression 

# We use LinearRegression() to pre-defined W and b for you
pre_defined_weights = LinearRegression().fit(X_train, Y_train)
w = pre_defined_weights.coef_
b = pre_defined_weights.intercept_

[ 0.12975624  0.12249935 -0.11714156  0.67102651]
-1.16987680881


In [None]:
def regression_error(x, y, w, b):
    
    regression_error = 0
    for i in range(len(x)):
        # TODO: ******** To be filled ********
        
        # prediction based on x 
        
        # regression error, doing the sum 
        
    # calculate the mean and square root 
    
    return regression_error

def classification_error(x, y, w, b):
    classification_error = 0
    
    for i in range(len(x)):
        # TODO: ******** To be filled ********
        
        # prediction based on x
        
        # classification error
        
    # calculate the mean of error 
    
    return classification_error
    
print('Training regression errors are:')
print(regression_error(X_train, Y_train, w, b))
print('Testing regression errors are:')
print(regression_error(X_test, Y_test, w, b))

print('Training classification errors are:')
print(classification_error(X_train, Y_train, w, b))
print('Testing classification errors are:')
print(classification_error(X_test, Y_test, w, b))