In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load combined wine dataset with semicolon delimiter
combined_wine_df = pd.read_csv("/content/drive/MyDrive/Neural Network Soft Computing Assignment/combined_wine_dataset.csv")

# Check for missing values
missing_values = combined_wine_df.isnull().sum()
print(missing_values)

# One-hot encode 'Wine_Type'
combined_wine_df = pd.get_dummies(combined_wine_df, columns=['Wine_Type'], prefix='Wine_Type')

# Extract numerical columns for scaling
numerical_cols = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol']

# Initialize the scaler
scaler = StandardScaler()

# Scale numerical features
combined_wine_df[numerical_cols] = scaler.fit_transform(combined_wine_df[numerical_cols])

# Initialize the label encoder
label_encoder = LabelEncoder()

# Encode 'Quality_Category'
combined_wine_df['Quality_Category_Encoded'] = label_encoder.fit_transform(combined_wine_df['Quality_Category'])

# Drop columns that are not needed for training
combined_wine_df = combined_wine_df.drop(['Quality_Category', 'quality'], axis=1)

# Display the preprocessed dataset
combined_wine_df.head()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
Wine_Type               0
Quality_Category        0
dtype: int64


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,Wine_Type_Red,Wine_Type_White,Quality_Category_Encoded
0,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,1,0,2
1,0.451036,3.282235,-2.192833,-0.59764,1.197975,-0.31132,-0.862469,0.701486,-0.115073,0.999579,-0.580068,1,0,2
2,0.451036,2.5533,-1.917553,-0.660699,1.026697,-0.874763,-1.092486,0.768188,0.25812,0.797958,-0.580068,1,0,2
3,3.073817,-0.362438,1.661085,-0.744778,0.541412,-0.762074,-0.986324,1.101694,-0.363868,0.32751,-0.580068,1,0,2
4,0.142473,2.188833,-2.192833,-0.744778,0.569958,-1.10014,-1.446359,1.034993,1.81309,0.193097,-0.915464,1,0,2


In [8]:
# Specify the path to save the preprocessed dataset in Google Drive
output_path = "/content/drive/MyDrive/Neural Network Soft Computing Assignment/combined_wine_dataset1.csv"

# Save the preprocessed dataset to a CSV file
combined_wine_df.to_csv(output_path, index=False)

# Display a message indicating the successful save
print(f"Preprocessed dataset saved to {output_path}")


Preprocessed dataset saved to /content/drive/MyDrive/Neural Network Soft Computing Assignment/combined_wine_dataset1.csv


# New Section

# New Section