Q1. Difference between Ordinal Encoding and Label Encoding
Ordinal Encoding: Assigns a unique integer to each category while preserving the order of categories. It is used for ordinal variables where the categories have a meaningful order.

Label Encoding: Assigns a unique integer to each category without considering any order. It is used for nominal variables where the categories do not have a meaningful order.

In [1]:
#Q4. Label Encoding with Scikit-Learn

In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample data
data = pd.DataFrame({
    'Color': ['red', 'green', 'blue', 'green', 'red'],
    'Size': ['small', 'medium', 'large', 'small', 'large'],
    'Material': ['wood', 'metal', 'plastic', 'wood', 'metal']
})

# Label encoding
encoder = LabelEncoder()

data['Color Encoded'] = encoder.fit_transform(data['Color'])
data['Size Encoded'] = encoder.fit_transform(data['Size'])
data['Material Encoded'] = encoder.fit_transform(data['Material'])

print(data)


   Color    Size Material  Color Encoded  Size Encoded  Material Encoded
0    red   small     wood              2             2                 2
1  green  medium    metal              1             1                 0
2   blue   large  plastic              0             0                 1
3  green   small     wood              1             2                 2
4    red   large    metal              2             0                 0


In [3]:
# Q5. Covariance Matrix for Age, Income, and Education Level

import numpy as np
import pandas as pd

# Sample data
data = pd.DataFrame({
    'Age': [25, 45, 35, 50, 23],
    'Income': [50000, 100000, 75000, 120000, 55000],
    'Education Level': [2, 4, 3, 4, 2]  # Assume: 1=High School, 2=Bachelor's, 3=Master's, 4=PhD
})

# Calculate covariance matrix
cov_matrix = data.cov()
print(cov_matrix)


                       Age       Income  Education Level
Age                 141.80     350000.0            11.75
Income           350000.00  887500000.0         28750.00
Education Level      11.75      28750.0             1.00


In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample data
data = pd.DataFrame({
    'Temperature': [20, 22, 25, 19, 23],
    'Humidity': [30, 40, 35, 45, 50],
    'Weather Condition': ['Sunny', 'Cloudy', 'Rainy', 'Sunny', 'Rainy'],
    'Wind Direction': ['North', 'South', 'East', 'West', 'North']
})

# One-Hot Encoding for categorical variables
encoder = OneHotEncoder(sparse=False)
encoded_weather = encoder.fit_transform(data[['Weather Condition']])
encoded_wind = encoder.fit_transform(data[['Wind Direction']])

# Combine continuous and encoded categorical data
encoded_data = np.hstack((data[['Temperature', 'Humidity']].values, encoded_weather, encoded_wind))

# Convert to DataFrame
encoded_df = pd.DataFrame(encoded_data, columns=[
    'Temperature', 'Humidity', 
    'Sunny', 'Cloudy', 'Rainy', 
    'North', 'South', 'East', 'West'
])

# Calculate covariance matrix for continuous variables
cov_matrix = encoded_df[['Temperature', 'Humidity']].cov()
print(cov_matrix)


             Temperature  Humidity
Temperature          5.7       0.0
Humidity             0.0      62.5


