In [None]:
import pandas as pd
import os
WORKING_LOCALLY = bool(os.getenv('WORKING_LOCALLY'))

PROCESSED_DATASET_FILE_PATH = 'data/processed_dataset.csv' if WORKING_LOCALLY \
    else '/content/drive/My Drive/Projects/IRBoardGameComplexity/processed_dataset.csv'

if not WORKING_LOCALLY:
    drive.mount('/content/drive')

df_features = pd.read_csv(PROCESSED_DATASET_FILE_PATH)
df_features.head()

In [None]:
TRAIN_PERCENT = 0.75
DATASET_SIZE = len(df_features)
TRAIN_SET_SIZE = int(DATASET_SIZE * TRAIN_PERCENT)

y_train, x_train = df_features.iloc[:TRAIN_SET_SIZE, 0], df_features.iloc[:TRAIN_SET_SIZE, 1:]
y_test, x_test = df_features.iloc[TRAIN_SET_SIZE:, 0], df_features.iloc[TRAIN_SET_SIZE:, 1:]

class Model:
    def __init__(self, instance):
        self.instance = instance
        self.name = instance.__class__.__name__

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import r2_score

MIN_COMPLEXITY, MAX_COMPLEXITY = 0, 5

def create_graph(index: int, title: str, y_test, predictions) -> None:
    plt.figure(index) 
    plt.title(title)
    plt.axis([MIN_COMPLEXITY, MAX_COMPLEXITY, MIN_COMPLEXITY, MAX_COMPLEXITY])
    plt.grid(True)
    plt.plot(y_test, predictions, 'ro', [MIN_COMPLEXITY, MAX_COMPLEXITY], [MIN_COMPLEXITY, MAX_COMPLEXITY])

models = [
    Model(linear_model.LinearRegression()),
    Model(linear_model.LassoCV()),
    Model(linear_model.ElasticNetCV()),
    Model(linear_model.RidgeCV())
    ]

results = pd.DataFrame(columns=['Model', 'Score'])
for i, model in enumerate(models):
    estimator = model.instance.fit(x_train, y_train)
    predictions = model.instance.predict(x_test)
    results = pd.concat([results, pd.DataFrame([
        { 'Model': model.name, 'Score': r2_score(y_test, predictions) }
        ])])
    create_graph(i, model.name, y_test, predictions)

results