In [1]:
# Don't use strict note for type checking yet
%nb_mypy mypy-options --pretty

In [2]:
from abc import abstractmethod, abstractproperty, ABC
from typing import Any, TypeAlias, NewType, TypeVar, Type, NoReturn

import pandas as pd
import numpy as np


# Data set
# ==========

# Interface
# ---------
class DataSetInterface(ABC):
    """
    This serves as the *abstract* type under which all the concrete dataset 
    interfaces fall. We can use when we want to depend only on the data set 
    abstraction, but not the concrete type of data set. 
    
    At the moment, this interface does not yet defined any shared behavior,
    so it would also be possible to use virtual subclasses (e.g., registering) 
    instead. However, we want to keep the option open for the future to define 
    shared behavior that all the concrete dataset interfaces must implement.
    """
    pass

class StructuredDataSetInterface(DataSetInterface):
    
    @abstractproperty
    def X(self):
        pass

    @abstractproperty
    def y(self):
        pass


    # Format conversions
    # ------------------
    @classmethod
    @abstractmethod
    def from_pandas(cls, input_data: pd.DataFrame) -> None:
        pass

    @abstractmethod
    def to_pandas(self) -> pd.DataFrame:
        pass

    @classmethod
    @abstractmethod
    def from_numpy(cls, input_data: np.ndarray) -> None:
        pass

    @abstractmethod
    def to_numpy(self) -> np.ndarray:
        pass

    # @abstractmethod
    # def get_column_names(self) -> list[str]:
    #     pass


# Implementation
# ===============
class StructuredDataSetImplementation(DataSetInterface):
    def __init__(self, X: pd.DataFrame, y: pd.DataFrame):
        self._X = X
        self._y = y

    @property
    def X(self):
        return self._X
    
    @property
    def y(self):
        return self._y
    
    @classmethod
    def from_pandas(cls, input_data: pd.DataFrame, target_name: str):
        return cls(pd_data_frame=input_data, target_name=target_name)

    def to_pandas(self) -> pd.DataFrame:
        return self.data    
            
    @classmethod
    def from_numpy(cls, input_data: np.ndarray):
        pd_data_frame=pd.DataFrame(input_data)
        return cls(pd_data_frame=pd_data_frame)

    def to_numpy(self) -> np.ndarray:
        return self.data.to_numpy()
    
    def get_column_names(self) -> list[str]:
        return self.columns.tolist()
    

# Container
# =========

class DataContainerInterface(ABC):
    @abstractproperty
    def train(self) -> DataSetInterface:
        pass

    @abstractproperty
    def val(self) -> DataSetInterface:
        pass

    @abstractproperty
    def test(self) -> DataSetInterface:
        pass


class DataContainer():
    def __init__(self, train: DataSetInterface, val: DataSetInterface, test: DataSetInterface):
        self._train = train
        self._val = val
        self._test = test

    @property
    def train(self):
        return self._train
    
    @property
    def val(self):
        return self._val
    
    @property
    def test(self):
        return self._test


class NumpyDataLoader():
    def __init__(self, train: DataSetInterface, val: DataSetInterface, test: DataSetInterface):
        self._train = train
        self._val = val
        self._test = test

    @property
    def train(self):
        return self._train.to_numpy()
    
    @property
    def val(self):
        return self._val.to_numpy()
    
    @property
    def test(self):
        return self._test.to_numpy()
    

class StructuredDataContainer(DataContainerInterface):
    def __init__(self, train: StructuredDataSetInterface, val: StructuredDataSetInterface, test: StructuredDataSetInterface):
        self._train = train
        self._val = val
        self._test = test

    @property
    def train(self):
        return self._train
    
    @property
    def val(self):
        return self._val
    
    @property
    def test(self):
        return self._test
    
    def to_numpy_dataloader():
        return NumpyDataLoader(self.train, self.val, self.test)


<cell>65: [34mnote:[m [m[1m"StructuredDataSetImplementation"[m defined here[m
<cell>79: [1m[31merror:[m Unexpected keyword argument [m[1m"pd_data_frame"[m for
<cell>79: [1m[31merror:[m Unexpected keyword argument [m[1m"target_name"[m for
<cell>82: [1m[31merror:[m [m[1m"StructuredDataSetImplementation"[m has no attribute [m[1m"data"[m 
<cell>87: [1m[31merror:[m Unexpected keyword argument [m[1m"pd_data_frame"[m for
<cell>90: [1m[31merror:[m [m[1m"StructuredDataSetImplementation"[m has no attribute [m[1m"data"[m 
<cell>93: [1m[31merror:[m [m[1m"StructuredDataSetImplementation"[m has no attribute
<cell>169: [1m[31merror:[m Method must have at least one argument. Did you forget the


In [3]:
from abc import abstractmethod, ABC
from typing import Generic

from sklearn.linear_model import ElasticNet
# from sklearn.datasets import

from oo_ml.interface.data.container import DataContainerInterface
from oo_ml.interface.data.data_set_type import StructuredData
from oo_ml.interface.data.data_set import DataSetInterface


class EstimatorInterface(ABC):
    @abstractmethod
    def optimize_hyperparameters(self) -> None:
        pass

    @abstractmethod
    def fit(self) -> None:
        pass

    @abstractmethod
    def predict(self) -> DataSetInterface:
        pass

    # @abstractmethod
    # def score(self):
    #     """To do: Create return type for score."""
    #     pass


class SKLearnRegressorAdapter():
    def __init__(self, data_container: DataContainerInterface[StructuredData]) -> None:
        self.data_container = data_container

    # def optimize_hyperparameters(self):


    def fit(self) -> None:
        training_data = self.data_container.get_train_data()
        model = ElasticNet()
        model.fit(
            X=training_data.X,
            y=training_data.y,
        )

    # def predict(self) -> DataSetInterface:


<cell>7: [1m[31merror:[m Name [m[1m"DataContainerInterface"[m already defined on line -72 
<cell>9: [1m[31merror:[m Name [m[1m"DataSetInterface"[m already defined on line -158 
<cell>32: [1m[31merror:[m [m[1m"DataContainerInterface"[m expects no type arguments, but 1
<cell>39: [1m[31merror:[m [m[1m"DataContainerInterface"[m has no attribute [m[1m"get_train_data"[m


TypeError: type 'InMemoryFormat' is not subscriptable