# MVP Prediction Models
### Loading data

In [148]:
# importing packages and loading data from pre-processing notebook
import pandas as pd
import numpy as np


y_train = pd.read_csv('y_train.csv').iloc[:,1:]
y_test = pd.read_csv('y_test.csv').iloc[:,1:]

x_train = pd.read_csv('x_train.csv').iloc[:,1:]
x_test = pd.read_csv('x_test.csv').iloc[:,1:]

x_train_pca = pd.read_csv('x_train_pca.csv').iloc[:,1:]
x_test_pca = pd.read_csv('x_test_pca.csv').iloc[:,1:]

### Linear Regressor Models

In [149]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge


# adding quadratic terms for models that will go through regularization
poly_term_adder = PolynomialFeatures(degree=2) 
x_train_poly = poly_term_adder.fit_transform(x_train)
x_test_poly = poly_term_adder.transform(x_test)

poly_term_adder_pca = PolynomialFeatures(degree=2)
x_train_pca_poly = poly_term_adder_pca.fit_transform(x_train_pca)
x_test_pca_poly = poly_term_adder_pca.transform(x_test_pca)


# Basic models
basic = LinearRegression()
basic.fit(x_train, y_train)

basic_pca = LinearRegression()
basic_pca.fit(x_train_pca, y_train)


# Lasso models
lasso = Lasso(alpha=0.1)
lasso.fit(x_train_poly, y_train)

lasso_pca = Lasso(alpha=0.1)
lasso_pca.fit(x_train_pca_poly, y_train)


# Ridge models
ridge = Ridge(alpha=0.1)
ridge.fit(x_train_poly, y_train)

ridge_pca = Ridge(alpha=0.1)
ridge_pca.fit(x_train_pca_poly, y_train)


# getting predictions
basic_pred = basic.predict(x_test)
basic_pca_pred = basic_pca.predict(x_test_pca)

lasso_pred = lasso.predict(x_test_poly)
lasso_pca_pred = lasso_pca.predict(x_test_pca_poly)

ridge_pred = ridge.predict(x_test_poly)
ridge_pca_pred = ridge_pca.predict(x_test_pca_poly)


# converting negative results to 0 and >1 results to 1
basic_pred[basic_pred < 0] = 0
basic_pca_pred[basic_pca_pred < 0] = 0

lasso_pred[lasso_pred < 0] = 0
lasso_pca_pred[lasso_pca_pred < 0] = 0

ridge_pred[ridge_pred < 0] = 0 
ridge_pca_pred[ridge_pca_pred < 0 ] = 0



basic_pred[basic_pred > 1] = 1
basic_pca_pred[basic_pca_pred > 1] = 1

lasso_pred[lasso_pred > 1] = 1
lasso_pca_pred[lasso_pca_pred > 1] = 1

ridge_pred[ridge_pred > 1] = 1
ridge_pca_pred[ridge_pca_pred > 1] = 1


# computing and displaying metrics
basic_mse = mean_squared_error(y_test, basic_pred)
basic_pca_mse = mean_squared_error(y_test, basic_pca_pred)

lasso_mse = mean_squared_error(y_test, lasso_pred)
lasso_pca_mse = mean_squared_error(y_test, lasso_pca_pred)

ridge_mse = mean_squared_error(y_test, ridge_pred)
ridge_pca_mse = mean_squared_error(y_test, ridge_pca_pred)

print()
print('Basic model MSE:', basic_mse)
print('Basic model (PCA) MSE:', basic_pca_mse)
print()
print('Lasso Regularization model MSE:', lasso_mse)
print('Lasso Regularization (PCA) model MSE:', lasso_pca_mse)
print()
print('Ridge Regularization model MSE:', ridge_mse)
print('Ridge Regularization (PCA) model MSE:', ridge_pca_mse)



  model = cd_fast.enet_coordinate_descent(



Basic model MSE: 0.032332017841986645
Basic model (PCA) MSE: 0.03861486577593903

Lasso Regularization model MSE: 0.027797635117050967
Lasso Regularization (PCA) model MSE: 0.04583684553920191

Ridge Regularization model MSE: 0.029200889524928823
Ridge Regularization (PCA) model MSE: 0.034898969315755576


### Decision Tree Regressor

In [152]:
from sklearn.tree import DecisionTreeRegressor, plot_tree
import matplotlib.pyplot as plt

# creating, fitting, and getting predictions from model
dtr = DecisionTreeRegressor()
dtr.fit(x_train, y_train)
dtr_pred = dtr.predict(x_test)

dtr_pca = DecisionTreeRegressor()
dtr_pca.fit(x_train_pca, y_train)
dtr_pca_pred = dtr_pca.predict(x_test_pca)

# converting negative results to 0 and >1 results to 1
dtr_pred[dtr_pred < 0 ] = 0
dtr_pca_pred[dtr_pca_pred < 0] = 0

dtr_pred[dtr_pred > 1] = 1
dtr_pca_pred[dtr_pca_pred > 1] = 1


# show tree structure
'''
plt.figure(figsize=(10, 8))
plot_tree(dtr, filled=True)
plot_tree(dtr_pca, filled=True)
plt.show()
'''

# metric
dtr_mse = mean_squared_error(y_test, dtr_pred)
dtr_pca_mse = mean_squared_error(y_test, dtr_pca_pred)
print('Decision Tree Regressor MSE:', dtr_mse)
print('Decision Tree Regressor (PCA) MSE:', dtr_pca_mse)


Decision Tree Regressor MSE: 0.05906257894736843
Decision Tree Regressor (PCA) MSE: 0.09438997894736845


### KNN Regressor

In [151]:
from sklearn.neighbors import KNeighborsRegressor

# creating, fitting, and getting predictions from model
knr = KNeighborsRegressor(n_neighbors=5, weights='distance')
knr.fit(x_train, y_train)
knr_pred = knr.predict(x_test)

knr_pca = KNeighborsRegressor()
knr_pca.fit(x_train_pca, y_train)
knr_pca_pred = knr_pca.predict(x_test_pca)

# converting negative results to 0 (negative share of votes not possible)
knr_pred[knr_pred < 0] = 0
knr_pca_pred[knr_pca_pred < 0] = 0

knr_pred[knr_pred > 1] = 1
knr_pca_pred[knr_pca_pred > 1] = 1

# metric
knr_mse = mean_squared_error(y_test, knr_pred)
knr_pca_mse = mean_squared_error(y_test, knr_pca_pred)
print('KNN Regressor MSE:', knr_mse)
print('KNN Regressor (PCA) MSE:', knr_pca_mse)


KNN Regressor MSE: 0.0387794999989323
KNN Regressor (PCA) MSE: 0.046628352421052635
