In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [None]:
# Load dataset
data = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David', np.nan, 'Eve'],
    'Age': [25, np.nan, 30, 22, 35, 29],
    'Salary': [50000, 60000, np.nan, 45000, 70000, 55000],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'Finance', np.nan]
})

In [None]:
print("Original Dataset:\n", data)

Original Dataset:
       Name   Age   Salary Department
0    Alice  25.0  50000.0         HR
1      Bob   NaN  60000.0         IT
2  Charlie  30.0      NaN    Finance
3    David  22.0  45000.0         IT
4      NaN  35.0  70000.0    Finance
5      Eve  29.0  55000.0        NaN


In [None]:
# 1. Handling Missing Values
# Fill missing values in 'Name' and 'Department' with the most frequent value
data['Name'].fillna(data['Name'].mode()[0], inplace=True)
data['Department'].fillna(data['Department'].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Name'].fillna(data['Name'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Department'].fillna(data['Department'].mode()[0], inplace=True)


In [None]:
# Fill missing numerical values with the mean
data['Name'].fillna(data['Name'].mode()[0], inplace=True)
data['Department'].fillna(data['Department'].mode()[0], inplace=True)
data['Age'].fillna(data['Age'].mean(), inplace=True)
data['Salary'].fillna(data['Salary'].mean(), inplace=True)

print("\nDataset after handling missing values:\n", data)


Dataset after handling missing values:
       Name   Age   Salary Department
0    Alice  25.0  50000.0         HR
1      Bob  28.2  60000.0         IT
2  Charlie  30.0  56000.0    Finance
3    David  22.0  45000.0         IT
4    Alice  35.0  70000.0    Finance
5      Eve  29.0  55000.0    Finance


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Age'].fillna(data['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Salary'].fillna(data['Salary'].mean(), inplace=True)


In [None]:
# 2. Encoding Categorical Variables
encoder = LabelEncoder()
data['Department'] = encoder.fit_transform(data['Department'])
print("\nDataset after encoding categorical variables:\n", data)


Dataset after encoding categorical variables:
       Name   Age   Salary  Department
0    Alice  25.0  50000.0           1
1      Bob  28.2  60000.0           2
2  Charlie  30.0  56000.0           0
3    David  22.0  45000.0           2
4    Alice  35.0  70000.0           0
5      Eve  29.0  55000.0           0


In [None]:
# 3. Feature Scaling
scaler = MinMaxScaler()
data[['Age', 'Salary']] = scaler.fit_transform(data[['Age', 'Salary']])  # Normalize Age and Salary
print("\nDataset after normalization:\n", data)


Dataset after normalization:
       Name       Age  Salary  Department
0    Alice  0.230769    0.20           1
1      Bob  0.476923    0.60           2
2  Charlie  0.615385    0.44           0
3    David  0.000000    0.00           2
4    Alice  1.000000    1.00           0
5      Eve  0.538462    0.40           0
