In [1]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import LabelEncoder
import pandas as pd

class MultiColumnLabelEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, columns: list = None):
        self.columns = columns  # Columns to encode
        self.encoders = {}

    def fit(self, X: pd.DataFrame, y: pd.Series = None) -> 'MultiColumnLabelEncoder':
        # If no columns specified, encode all columns
        self.columns = X.columns if self.columns is None else self.columns
        for col in self.columns:
            if col in X:
                le = LabelEncoder()
                le.fit(X[col].astype(str).fillna(''))
                self.encoders[col] = le
            else:
                raise ValueError(f"Column '{col}' not found in the DataFrame")
        return self

    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
        # Apply transformations using the fitted encoders
        X_transformed = X.copy()
        for col in self.columns:
            if col in self.encoders:
                X_transformed[col] = self.encoders[col].transform(X[col].astype(str).fillna(''))
            else:
                raise ValueError(f"Column '{col}' was not fitted")
        return X_transformed

    def fit_transform(self, X: pd.DataFrame, y: pd.Series = None) -> pd.DataFrame:
        # Combine fit and transform
        self.fit(X, y)
        return self.transform(X)

    def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
        # Inverse transform the encoded columns back to their original values
        X_inverse_transformed = X.copy()
        for col in self.columns:
            if col in self.encoders:
                X_inverse_transformed[col] = self.encoders[col].inverse_transform(X[col])
            else:
                raise ValueError(f"Column '{col}' was not fitted")
        return X_inverse_transformed


In [2]:
import pandas as pd
data = pd.read_csv("../Dataset/sampled_data.csv")

In [4]:
data.drop(['wip'], axis=1, inplace=True)
data

Unnamed: 0,date,quarter,department,day,team,targeted_productivity,smv,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity
0,1/17/2015,Quarter3,sweing,Saturday,4,0.7,22.52,10170,50,0.0,0,0,56.5,0.700542
1,1/10/2015,Quarter2,sweing,Saturday,2,0.8,28.08,10350,63,0.0,0,0,57.5,0.800594
2,1/27/2015,Quarter4,finishing,Tuesday,7,0.5,4.15,960,0,0.0,0,0,8.0,0.803542
3,1/3/2015,Quarter1,finishing,Saturday,10,0.7,4.15,960,0,0.0,0,0,8.0,0.701812
4,1/13/2015,Quarter2,sweing,Tuesday,8,0.8,25.9,10260,60,0.0,0,0,57.0,0.850253
5,2/11/2015,Quarter2,finishing,Wednesday,8,0.35,4.15,1440,0,0.0,0,0,12.0,0.994375
6,1/5/2015,Quarter1,sweing,Monday,5,0.6,21.98,6960,23,0.0,0,0,58.0,0.600029
7,1/29/2015,Quarter5,finishing,Thursday,10,0.8,3.94,1200,0,0.0,0,0,10.0,0.85695
8,2/3/2015,Quarter1,finishing,Tuesday,1,0.8,3.94,1200,0,0.0,0,0,10.0,0.99485
9,2/23/2015,Quarter4,finishing,Monday,2,0.8,5.13,960,0,0.0,0,0,8.0,0.368719


In [8]:
encoder = MultiColumnLabelEncoder(columns=['quarter'])
data_ev = encoder.fit_transform(data)
data_ev

Unnamed: 0,date,quarter,department,day,team,targeted_productivity,smv,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity
0,1/17/2015,2,sweing,Saturday,4,0.7,22.52,10170,50,0.0,0,0,56.5,0.700542
1,1/10/2015,1,sweing,Saturday,2,0.8,28.08,10350,63,0.0,0,0,57.5,0.800594
2,1/27/2015,3,finishing,Tuesday,7,0.5,4.15,960,0,0.0,0,0,8.0,0.803542
3,1/3/2015,0,finishing,Saturday,10,0.7,4.15,960,0,0.0,0,0,8.0,0.701812
4,1/13/2015,1,sweing,Tuesday,8,0.8,25.9,10260,60,0.0,0,0,57.0,0.850253
5,2/11/2015,1,finishing,Wednesday,8,0.35,4.15,1440,0,0.0,0,0,12.0,0.994375
6,1/5/2015,0,sweing,Monday,5,0.6,21.98,6960,23,0.0,0,0,58.0,0.600029
7,1/29/2015,4,finishing,Thursday,10,0.8,3.94,1200,0,0.0,0,0,10.0,0.85695
8,2/3/2015,0,finishing,Tuesday,1,0.8,3.94,1200,0,0.0,0,0,10.0,0.99485
9,2/23/2015,3,finishing,Monday,2,0.8,5.13,960,0,0.0,0,0,8.0,0.368719


In [16]:
data_1 = encoder.transform(data[8:9])
data_1

Unnamed: 0,date,quarter,department,day,team,targeted_productivity,smv,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity
8,2/3/2015,0,finishing,Tuesday,1,0.8,3.94,1200,0,0.0,0,0,10.0,0.99485
