In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


In [2]:
data = {
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female'],
    'City': ['Delhi', 'Mumbai', 'Pune', 'Delhi', 'Bangalore'],
    'Experience': [2, 5, 3, 8, 4],
    'Salary': [25000, 50000, 30000, 80000, 40000],
    'Left_Company': [0, 1, 0, 1, 0]
}

df = pd.DataFrame(data)
print(df)


   Gender       City  Experience  Salary  Left_Company
0    Male      Delhi           2   25000             0
1  Female     Mumbai           5   50000             1
2  Female       Pune           3   30000             0
3    Male      Delhi           8   80000             1
4  Female  Bangalore           4   40000             0


In [3]:
X = df[['Gender', 'City', 'Experience', 'Salary']]
y = df['Left_Company']


In [4]:
categorical_cols = ['Gender', 'City']


In [5]:
# Create ColumnTransformer
ct = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(drop='first'), categorical_cols)  # drop='first' to avoid dummy trap
    ],
    remainder='passthrough'  # keep the remaining columns as they are
)


In [6]:
X_encoded = ct.fit_transform(X)
print(X_encoded)


[[1.0e+00 1.0e+00 0.0e+00 0.0e+00 2.0e+00 2.5e+04]
 [0.0e+00 0.0e+00 1.0e+00 0.0e+00 5.0e+00 5.0e+04]
 [0.0e+00 0.0e+00 0.0e+00 1.0e+00 3.0e+00 3.0e+04]
 [1.0e+00 1.0e+00 0.0e+00 0.0e+00 8.0e+00 8.0e+04]
 [0.0e+00 0.0e+00 0.0e+00 0.0e+00 4.0e+00 4.0e+04]]


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

accuracy = model.score(X_test, y_test)
print("Model Accuracy:", accuracy)


Model Accuracy: 0.5
