In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss, confusion_matrix, mean_squared_error
import re


In [None]:
class BaseModel:
    def __init__(self, data, regularization=None):
        self.data = data
        self.model = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.regularization = regularization
    
    def preprocess(self):
        # Add column as target for classification
        self.data['target'] = self.data['winner'].map({'model_a': 1, 'model_b': 0})

        # Split data into training and testing sets
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.data.drop('target', axis=1), self.data['target'], test_size=0.3, random_state=0.3
        )

        # Scale data
        scaler = StandardScaler()
        self.X_train = scaler.fit_transform(self.X_train)
        self.X_test = scaler.transform(self.X_test)

In [None]:
class LogisticRegressionModel(BaseModel):
    def __init__(self, data, regularization=None):
        super().__init__(data, regularization)
    
    def preprocess(self):
        # Add column as target for classification
        self.data['target'] = self.data['winner'].map({'model_a': 1, 'model_b': 0})
        super().preprocess()

    def train_model(self):
        if self.regularization == 'l1':
            self.model = LogisticRegression(penalty='l1')
        elif self.regularization == 'l2':
            self.model = LogisticRegression(penalty='l2')
        else:
            self.model = LogisticRegression(penalty=None)
        self.model.fit(self.X_train, self.y_train)

    def predict(self, test_data=None):
        if test_data is not None:
            y_pred = self.model.predict(test_data)
            return y_pred
        y_pred = self.model.predict(self.X_test)
        return y_pred

    def evaluate(self, y_pred):
        print(log_loss(self.y_test, y_pred))
        print(confusion_matrix(self.y_test, y_pred))

In [None]:
class MultiLinearRegressionModel(BaseModel):
    def __init__(self, data, regularization=None):
        super().__init__(data, regularization)
    
    def preprocess(self):
        # Add column as target for classification
        self.create_avg_hardness()
        self.data['target'] = self.data['hardness_score']
        super().preprocess()

    def create_avg_hardness(self):
        def extract_digit(value):
            match = re.search(r"\d", str(value))
            return int(match.group()) if match else None
        for col in ["score_value_1", "score_value_2", "score_value_3"]:
            self.data[col] = self.data[col].apply(extract_digit)
        self.data['hardness_score'] = self.data[["score_value_1", "score_value_2", "score_value_3"]].mean(axis=1, skipna=True)
        self.data['hardness_score'] = round(self.data['hardness_score'])

    def train_model(self):
        if self.regularization == 'l1':
            self.model = Lasso()
        elif self.regularization == 'l2':
            self.model = Ridge()
        else:
            self.model = LinearRegression()
        self.model.fit(self.X_train, self.y_train)

    def predict(self, test_data=None):
        if test_data is not None:
            y_pred = self.model.predict(test_data)
            return y_pred
        y_pred = np.array(self.model.predict(self.X_test)).round()
        return y_pred

    def evaluate(self, y_pred):
        print(np.mean_squared_error(self.y_test, y_pred))

In [None]:
class RandomForestModel(BaseModel):
    def __init__(self, data, regularization=None):
        super().__init__(data, regularization)
    
    def preprocess(self):
        # Add column as target for classification
        self.data['target'] = self.data['winner'].map({'model_a': 1, 'model_b': 0})
        super().preprocess()

    def train_model(self):
        self.model = RandomForestClassifier()
        self.model.fit(self.X_train, self.y_train)

    def predict(self, test_data=None):
        if test_data is not None:
            y_pred = self.model.predict(test_data)
            return y_pred
        y_pred = self.model.predict(self.X_test)
        return y_pred

    def evaluate(self, y_pred):
        print(log_loss(self.y_test, y_pred))
        print(confusion_matrix(self.y_test, y_pred))