In [11]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
import pandas as pd

# Sample Titanic dataset
data = pd.read_csv(r"C:\Users\hp\OneDrive\codes\python\Learn_ML\1. Datasets\train.csv")
data.drop(columns=["PassengerId","Name","Ticket","Cabin","SibSp","Pclass","Parch","Survived"],inplace=True)

# Preprocessing for "Embarked"
embarked_pipeline = Pipeline([
    ("impute", SimpleImputer(strategy="most_frequent")),  # Impute missing values
    ("encode", OneHotEncoder(sparse_output=False, handle_unknown="ignore"))  # One-hot encode
])

# Preprocessing for "Sex"
sex_encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")

# Preprocessing for numerical features
numerical_pipeline = Pipeline([
    ("impute", SimpleImputer(strategy="mean")),  # Impute missing values
    ("scale", StandardScaler())  # Standardize
])

# Define ColumnTransformer
clt = ColumnTransformer(
    transformers=[
        ("num_tnf", numerical_pipeline, ["Age", "Fare"]),# Numerical pipeline 
        ("embarked_tnf", embarked_pipeline, ["Embarked"]),  # Embarked pipeline
        ("sex_tnf", sex_encoder, ["Sex"])# One-hot encode Sex
          
    ],
    remainder="passthrough"  # Pass other columns untouched
)

# Fit and transform the data
transformed_data = clt.fit_transform(data)

# ** Get column names **
# Extract column names from transformers
embarked_col_names = clt.named_transformers_['embarked_tnf'].named_steps['encode'].get_feature_names_out(["Embarked"])
sex_col_names = clt.named_transformers_['sex_tnf'].get_feature_names_out(["Sex"])
numerical_col_names = ["Age", "Fare"]  # These columns are already named

# Combine all column names
all_col_names =  numerical_col_names+list(embarked_col_names) + list(sex_col_names)

# Create a DataFrame with the transformed data
transformed_df = pd.DataFrame(transformed_data, columns=all_col_names)
transformed_df.head()


Unnamed: 0,Age,Fare,Embarked_C,Embarked_Q,Embarked_S,Sex_female,Sex_male
0,-0.592481,-0.502445,0.0,0.0,1.0,0.0,1.0
1,0.638789,0.786845,1.0,0.0,0.0,1.0,0.0
2,-0.284663,-0.488854,0.0,0.0,1.0,1.0,0.0
3,0.407926,0.42073,0.0,0.0,1.0,1.0,0.0
4,0.407926,-0.486337,0.0,0.0,1.0,0.0,1.0
