In [None]:
# this will autoreload your packages (i.e. if you make a change to one of your .py files, you will not need to restart your notebook - your change is dynamically imported)
%load_ext autoreload
%autoreload

In [None]:
import os
import django
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true" # to be able to make async database calls from the notebook
os.environ["DJANGO_SETTINGS_MODULE"] = "fpl.settings"
django.setup()

In [None]:
# !pip install scikit-learn

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
# Define the data
data = {
    'Player': ['Player1', 'Player1', 'Player1', 'Player1', 'Player1', 'Player 2', 'Player 2', 'Player 2', 'Player 2', 'Player 2'],
    'Game': ['Game1', 'Game2', 'Game3', 'Game4', 'Game5', 'Game1', 'Game2', 'Game3', 'Game4', 'Game5'],
    'Points': [10, 15, 20, 25, 30, 5, 10, 15, 20, 25],
    'avg_points': [10, 12.5, 15, 20, 25, 5, 7.5, 10, 12.5, 15],
}

# Create DataFrame
df = pd.DataFrame(data)

# Define test train split
X_train, X_test, y_train, y_test = train_test_split(df[['Points', 'avg_points']], df['Points'], test_size=0.4, random_state=0)

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import root_mean_squared_error
import numpy as np

class MyBaselineEstimator(BaseEstimator, TransformerMixin):
    def __init__(self, column_name):
        self.column_name = column_name

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return X[self.column_name]
    
    def score(self, X, y):
        y_pred  = self.transform(X)
        y = y.astype(float).tolist()
        y_pred = y_pred.astype(float).tolist()
        return root_mean_squared_error(y, y_pred)
    

class MyChallengerEstimator(MyBaselineEstimator):
    
    def transform(self, X):
        return X[self.column_name] * 2

In [None]:
from sklearn.pipeline import Pipeline


pipeline_champion = Pipeline([
    ('classifier', MyBaselineEstimator('avg_points')),
])
pipeline_challenger = Pipeline([
    ('classifier', MyChallengerEstimator('avg_points')),
])

In [None]:
def compare_two_pipelines_performance(pipeline_champion, pipeline_challenger,
                                      X_train, X_test, y_train, y_test):
    # Train pipeline1
    pipeline_champion.fit(X_train, y_train)
    # Train pipeline2
    pipeline_challenger.fit(X_train, y_train)
    # Compute the accuracy of pipeline1
    champion_accuracy = pipeline_champion.score(X_test, y_test)
    # Compute the accuracy of pipeline2
    challenger_accuracy = pipeline_challenger.score(X_test, y_test)
    print(f"pipeline_champion accuracy: {champion_accuracy}")
    print(f"pipeline_challenger accuracy: {challenger_accuracy}")
    return champion_accuracy, challenger_accuracy

In [None]:

compare_two_pipelines_performance(pipeline_champion, pipeline_challenger, X_train, X_test, y_train, y_test)