In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# Load your dataset (replace 'data.csv' with your dataset file)
data = pd.read_csv('wine.csv')

# Step 1: Check for Null Values
null_values = data.isnull().sum()
print("Null Values:")
print(null_values)

# Step 2: Identify Categorical Variables
categorical_variables = data.select_dtypes(include=['object']).columns
print("Categorical Variables:")
print(categorical_variables)

# Step 3: Encode Categorical Variables
for column in categorical_variables:
    # Use LabelEncoder for binary or ordinal categorical variables
    if len(data[column].unique()) <= 2:
        label_encoder = LabelEncoder()
        data[column] = label_encoder.fit_transform(data[column])
    # Use OneHotEncoder for nominal categorical variables
    else:
        onehot_encoder = OneHotEncoder(sparse=False)
        encoded_columns = pd.DataFrame(onehot_encoder.fit_transform(data[[column]]))
        encoded_columns.columns = onehot_encoder.get_feature_names([column])
        data = pd.concat([data, encoded_columns], axis=1)
        data.drop(column, axis=1, inplace=True)

# Display the encoded dataset
print("Encoded Dataset:")
print(data.head())


Null Values:
fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64
Categorical Variables:
Index(['quality'], dtype='object')
Encoded Dataset:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0