In [39]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

def import_dataset(path):
    dataset = pd.read_csv(path)
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values
    return X, y

def splitting_dataset(X, y, size = 0.2, state = 0):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = size, random_state = state)
    return X_train, X_test, y_train, y_test

def training_multiple_linear_regression(X_train, y_train):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    return regressor

def training_polynomial_regression(X_train, y_train, degree = 4):
    poly_reg = PolynomialFeatures(degree = 4)
    X_poly = poly_reg.fit_transform(X_train)
    regressor = LinearRegression()
    regressor.fit(X_poly, y_train)
    return regressor, poly_reg

def training_svr(X_train, y_train):
    regressor = SVR(kernel = 'rbf')
    regressor.fit(X_train, y_train)
    return regressor

def training_decision_tree(X_train, y_train, state = 0):
    regressor = DecisionTreeRegressor(random_state= state)
    regressor.fit(X_train, y_train)
    return regressor

def feature_scale(X_train, y_train):
    sc_X = StandardScaler()
    sc_y = StandardScaler()
    X_train = sc_X.fit_transform(X_train)
    y_train = sc_y.fit_transform(y_train)
    return sc_X, sc_y, X_train, y_train

def predict(regressor, X_test):
    y_pred = regressor.predict(X_test)
    np.set_printoptions(precision=2)
    return y_pred

def evaluate_model_performance(y_test, y_pred):
    return r2_score(y_test, y_pred)

In [40]:
# Init 
X, y = import_dataset('../../data/model_selection_data.csv')
X_train, X_test, y_train, y_test = splitting_dataset(X, y)

# Linear Regression

# Multiple Linear Regression
regressor = training_multiple_linear_regression(X_train, y_train)
y_pred = predict(regressor, X_test)
performance = evaluate_model_performance(y_test, y_pred)
print('Multiple Linear Regression:', performance)

# Polynomial Regression
regressor, poly_reg = training_polynomial_regression(X_train, y_train)
y_pred = predict(regressor, poly_reg.transform(X_test))
performance = evaluate_model_performance(y_test, y_pred)
print('Polynomial Regression:', performance)

# Support Vector Regression SVR
y = y.reshape(len(y),1)
X_train, X_test, y_train, y_test = splitting_dataset(X, y)
sc_X, sc_y, X_train, y_train = feature_scale(X_train, y_train)
regressor = training_svr(X_train, y_train)
y_pred = predict(regressor, sc_X.transform(X_test))
y_pred = sc_y.inverse_transform(y_pred.reshape(-1,1))
performance = evaluate_model_performance(y_test, y_pred)
print('SVR:', performance)

# Decision Tree
X, y = import_dataset('../../data/model_selection_data.csv')
X_train, X_test, y_train, y_test = splitting_dataset(X, y)
regressor = training_decision_tree(X_train, y_train)
y_pred = predict(regressor, X_test)
performance = evaluate_model_performance(y_test, y_pred)
print('Decision Tree:', performance)

# Random Forest



Multiple Linear Regression: 0.9325315554761302
Polynomial Regression: 0.9458192820534183


  y = column_or_1d(y, warn=True)


SVR: 0.9480784049986258
Decision Tree: 0.922905874177941
