In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

class DataPreprocessor:
    def __init__(self, numeric_columns, categorical_column):
        """
        Initializes the DataPreprocessor class with columns to scale and encode.
        
        Parameters:
        numeric_columns (list): List of numeric columns to be scaled.
        categorical_column (str): Categorical column to be encoded.
        """
        self.numeric_columns = numeric_columns
        self.categorical_column = categorical_column
        
        # Define the scaling pipeline for numeric columns
        self.numeric_pipeline = Pipeline([
            ('scaler', StandardScaler())  # Scaling numeric columns using StandardScaler
        ])
        
        # Define the encoding pipeline for categorical column
        self.categorical_pipeline = Pipeline([
            ('encoder', LabelEncoder())  # Encoding categorical columns using LabelEncoder
        ])
        
        # Combine both pipelines into a column transformer
        self.preprocessor = ColumnTransformer([
            ('numeric', self.numeric_pipeline, self.numeric_columns),
            ('categorical', self.categorical_pipeline, self.categorical_column)
        ])

    def fit_transform(self, df):
        """
        Applies the fit_transform process on the DataFrame and returns the transformed data.
        
        Parameters:
        df (DataFrame): Input DataFrame containing the data to preprocess.
        
        Returns:
        DataFrame: Transformed DataFrame.
        """
        df_transformed = df.copy()  # Create a copy to avoid changing the original data
        
        # Apply the preprocessor to the dataframe
        transformed_data = self.preprocessor.fit_transform(df)
        
        # Update the dataframe with transformed columns
        df_transformed[self.numeric_columns] = transformed_data[:, :len(self.numeric_columns)]
        df_transformed[self.categorical_column] = transformed_data[:, len(self.numeric_columns):]
        
        return df_transformed

# Create a synthetic dataset
data = {
    'Mileage': [30, 25.5, 35, 20, 28, 45, 50, 15, 60, 40],
    'Horsepower': [100, 150, 120, 180, 110, 160, 200, 190, 220, 180],
    'Fuel_Type': ['Petrol', 'Diesel', 'Petrol', 'Electric', 'Diesel', 'Petrol', 'Electric', 'Diesel', 'Petrol', 'Electric']
}

# Create DataFrame
df = pd.DataFrame(data)

# Instantiate the preprocessor with the columns to scale and encode
preprocessor = DataPreprocessor(numeric_columns=['Mileage', 'Horsepower'], categorical_column='Fuel_Type')

# Apply transformations
transformed_df = preprocessor.fit_transform(df)

# Display the transformed DataFrame
print("\nTransformed Data (After Scaling and Encoding):")
print(transformed_df)

TypeError: LabelEncoder.fit_transform() takes 2 positional arguments but 3 were given