# Regression Model Selection

## Constants

In [104]:
MULTIPLE_LINEAR_REGRESSION = "MULTIPLE_LINEAR_REGRESSION"
POLYNOMIAL_REGRESSION = "POLYNOMIAL_REGRESSION"
SUPPORT_VECTOR_REGRESSION = "SUPPORT_VECTOR_REGRESSION"
DECISION_TREE_REGRESSION = "DECISION_TREE_REGRESSION"
RANDOM_FOREST_REGRESSION = "RANDOM_FOREST_REGRESSION"
ALL_MODELS = [ MULTIPLE_LINEAR_REGRESSION, POLYNOMIAL_REGRESSION, SUPPORT_VECTOR_REGRESSION, DECISION_TREE_REGRESSION, RANDOM_FOREST_REGRESSION ]

MODEL_TYPE = None

while MODEL_TYPE not in ALL_MODELS:
    MODEL_TYPE = input("Enter the model type: ").replace(" ", "_").upper()

print(f"Selected model: {MODEL_TYPE}")

Selected model: RANDOM_FOREST_REGRESSION


## Import Libraries

In [105]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Import Dataset

In [106]:
dataset = pd.read_csv("datasets/energy.csv")
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Split Dataset

In [107]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/4, random_state=42)
y_pred = []

## Model Selection

### Multiple Linear Regression

In [108]:
if MODEL_TYPE == MULTIPLE_LINEAR_REGRESSION:
    
    # Create Linear Regression model
    from sklearn.linear_model import LinearRegression

    regressor = LinearRegression()
    regressor.fit(X_train, y_train)

    # Make Predictions
    y_pred = regressor.predict(X_test)


### Polynomial Regression

In [109]:
if MODEL_TYPE == POLYNOMIAL_REGRESSION:

    # Create Polynomial Regression Model
    from sklearn.preprocessing import PolynomialFeatures

    poly_reg = PolynomialFeatures(degree=4)
    X_poly = poly_reg.fit_transform(X_train)

    # Create Linear Regression Model
    from sklearn.linear_model import LinearRegression

    lin_reg = LinearRegression()
    lin_reg.fit(X_poly, y_train)

    # Make Predictions
    y_pred = lin_reg.predict(poly_reg.transform(X_test))

### Support Vector Regression

In [110]:
if MODEL_TYPE == SUPPORT_VECTOR_REGRESSION:
    
    # Reformat Data
    y = y.reshape(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/4, random_state=42)

    # Feature Scaling
    from sklearn.preprocessing import StandardScaler

    sc_X = StandardScaler()
    sc_y = StandardScaler()

    X_train = sc_X.fit_transform(X_train)
    y_train = sc_y.fit_transform(y_train)

    # Create Support Vector Regression Model
    from sklearn.svm import SVR

    regressor = SVR(kernel="rbf")
    regressor.fit(X_train, y_train.ravel())

    # Make Predictions
    y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1, 1))

### Decision Tree Regression

In [111]:
if MODEL_TYPE == DECISION_TREE_REGRESSION:

    # Create Model
    from sklearn.tree import DecisionTreeRegressor

    regressor = DecisionTreeRegressor(random_state=42)
    regressor.fit(X_train, y_train)

    # Make Predictions
    y_pred = regressor.predict(X_test)

### Random Forest Regression

In [112]:
if MODEL_TYPE == RANDOM_FOREST_REGRESSION:

    # Create Model
    from sklearn.ensemble import RandomForestRegressor

    regressor = RandomForestRegressor(n_estimators=10, random_state=42)
    regressor.fit(X_train, y_train)

    # Make Predictions
    y_pred = regressor.predict(X_test)

In [113]:
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(-1, 1), y_test.reshape(-1, 1)), 1))

[[454.44 455.27]
 [436.21 436.31]
 [436.13 440.68]
 ...
 [459.62 459.38]
 [473.7  475.24]
 [451.39 450.64]]


## Model Performance

In [114]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9566637376476244