### Loading file class

In [1]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_error
from sklearn.pipeline import Pipeline
import joblib
import logging
from typing import List, Dict, Union, Optional
from abc import ABC, abstractmethod

class DataLoader:
    """handle data loading and initial preprocessing"""

    def __init__(self, filepath: str):
        self.filepath = filepath
        self.data = None
        self.logger = logging.getLogger(__name__)

    def load_data(self) -> pd.DataFrame:
        """Load data from file"""
        try:
            if self.filepath.endswith('.csv'):
                self.data = pd.read_csv(self.filepath)
            elif self.filepath.endswith('.xlsx'):
                self.data = pd.read_excel(self.filepath)
            else:
                raise ValueError(f"Unsupported file format: {self.filepath}")
            self.logger.info(f"Data loaded sucessfully. Shape: {self.data.shape}")
            return self.data
        except Exception as e:
            self.logger.error(f"Error loading data: {str(e)}")
            raise 
    
    def get_feature_info(self) -> Dict:
        """Get basic information about feature"""
        if self.data is None:
            raise ValueError("Data not loaded. Call load_data() first.")
        return {
            'numerical': self.data.select_dtypes(include=['int64', 'float64']).columns.tolist(),
            'categorical': self.data.select_dtypes(include=['object','category','O']).columns.tolist(),
            'datetime': self.data.select_dtypes(include=['datetime64']).columns.tolist(),
            'missing_percentages': (self.data.isnull().sum() / len(self.data)*100).to_dict()
        }

### Feature transformer

In [None]:
class BaseFeatureTransformer(ABC, BaseEstimator, TransformerMixin):
    """Abstract b ase class for feature transformer"""

    @abstractmethod
    def transform(self, X: pd.DataFrame, y=None):
        """Transform the data"""
        pass

    def fit_transform(self, X: pd.DataFrame, y = None):
        return self.fit(X, y).transform(X)
