In [1]:
import numpy as np
from itertools import combinations_with_replacement as cwr
import matplotlib.pyplot as plt

def polynomialFeatures(X, degree):
    output = np.array([])
    for i in range(1, degree + 1):
        temp = np.array(list(cwr(X,i)))
        print(temp)
        for j in range(len(temp)):
            output = np.append(output, np.prod(temp[j]))
    return output

In [2]:
def mse(Y_true, Y_pred):
    return np.mean((Y_true-Y_pred)**2)

In [3]:
def cross_validation(X,y,cv,model,**model_args):

    seed=np.random.randint(1,500)
    data=np.asarray(X)
    labels=np.asarray(y)
    np.random.seed(seed)
    np.random.shuffle(data)
    np.random.seed(seed)
    np.random.shuffle(labels)
    
    datafolds=np.array_split(data,cv)
    labelfolds=np.array_split(labels,cv)
    training_scores=[]
    validation_scores=[]
    for fold in range(len(datafolds)):
        trainingdatafolds=np.vstack([data for k, data in enumerate(datafolds) if k not in [fold]])
        trainingdatafolds=trainingdatafolds.reshape(-1,data.shape[1])
        trainingdata=np.empty((0,data.shape[1]), int)
        trainingdata=np.append(trainingdata, trainingdatafolds, axis=0)
        
        traininglabelfolds=np.concatenate([label for k, label in enumerate(labelfolds) if k not in [fold]])
        traininglabelfolds=traininglabelfolds.reshape(-1,1)
        traininglabels=np.empty((0,1), int)
        traininglabels=np.append(traininglabels, traininglabelfolds, axis=0)
    
        validationdata=datafolds[fold]
        validationlabels=labelfolds[fold]
        
        calledmodel=model()
        calledmodel.fit(trainingdata,traininglabels)
        train_preds =calledmodel.predict(trainingdata)
        validation_preds = calledmodel.predict(validationdata)
        training_scores.append(mse(traininglabels,train_preds))
        validation_scores.append(mse(validationlabels,validation_preds))

    return np.mean(training_scores), np.mean(validation_scores)


def learning_curve(model, X, y, cv, train_size=1, learning_rate=0.01, epochs=1000, tol=None, regularizer=None, lambd=0.0, **kwargs):
    training_scores=[]
    validation_scores=[]
    if train_size >= 1:
        index=train_size
        while index < len(y):
            X_subset=X[0:index,:]
            y_subset=y[0:index]
            train, val=cross_validation(X_subset,y_subset,cv,model,**model_args)
            training_scores.append(train)
            validation_scores.append(val)
            index += index
    else:
        index=len(X) - int(round(len(X) * train_size))
        while index <= 1:
            X_subset=X[0:index,:]
            y_subset=y[0:index]
            train, val=cross_validation(X_subset,y_subset,cv,model,**model_args)
            training_scores.append(train**0.5)
            validation_scores.append(val**0.5)
            index += index
    
    return training_scores, validation_scores

In [4]:
def plot_polynomial_model_complexity(model, X, Y, cv, maxPolynomialDegree, learning_rate=0.01, epochs=1000, tol=None, regularized=None, lambd=0.0, **kwargs):
    mse_train, mse_test = [], []
    degree_list = list(range(1, maxPolynomialDegree + 1))
    for degree in enumerate(degree_list, 1):
        X = polynomialFeatures(X, degree)
        train_score, val_score = learning_curve(model, X, Y, cv, 1, learning_rate, epochs, tol, regularizer, lambd, **kwargs)
        mse_train.append(train_score)
        mse_test.append(val_score)
    plt.figure(figsize=(10,6))
    plt.plot(degreeList, val_scores, "ro-", alpha=1.0, linewidth=1.0, label="Test RMSE")
    plt.plot(degreeList, train_scores, "bo-", alpha=1.0, linewidth=1.0, label="Train RMSE")
    plt.xlabel("Degree")
    plt.ylabel("RMSE")
    plt.title("RMSE for Varying Degree")
    plt.show()

In [5]:
class Linear_Regression:
    def __init__(self):
        pass
    
    def fit(self, X, Y, learning_rate=0.01, epochs=1000, tol=None, regularizer=None, lambd=0.0, **kwargs):
        return
    
    def predict(self, X):
        return