<a href="https://colab.research.google.com/github/sanjanabayya30/Generative_AI_2025/blob/main/2019_W9_A8_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adadelta

# -------------------------
# Step 1: Load and clean dataset
# -------------------------
raw_df = pd.read_csv("/content/drive/MyDrive/archive (1).zip")

# Try to split into columns manually in case data is comma-separated in one column
if raw_df.shape[1] == 1:
    df = raw_df.iloc[:, 0].str.split(",", expand=True)
    df.columns = df.iloc[0]
    df = df.drop(index=0).reset_index(drop=True)
else:
    df = raw_df

# Clean column names
df.columns = df.columns.str.strip().str.lower()

# Convert all data to numeric
df = df.apply(pd.to_numeric)

# -------------------------
# Step 2: Prepare features and labels
# -------------------------
# Find the quality column reliably
quality_col = [col for col in df.columns if "quality" in col.lower()][0]

X = df.drop(columns=[quality_col])
y = (df[quality_col] >= 6).astype(int)  # Binary classification: Good (>=6) vs Bad (<6)

# -------------------------
# Step 3: Split and scale the data
# -------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# -------------------------
# Step 4: Build the ANN model
# -------------------------
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(20, activation='relu'),
    Dense(25, activation='relu'),
    Dense(10, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary output
])

model.compile(optimizer=Adadelta(), loss='binary_crossentropy', metrics=['accuracy'])

# -------------------------
# Step 5: Train the model
# -------------------------
model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test), verbose=1)

# -------------------------
# Step 6: Save the trained model
# -------------------------
model.save("wine_quality_ann.h5")
print("✅ Model saved as 'wine_quality_ann.h5'")

# -------------------------
# Step 7: Evaluate the model
# -------------------------
train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0)
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)

print(f"\n✅ Training Accuracy: {train_acc:.4f}")
print(f"✅ Testing Accuracy: {test_acc:.4f}")

# Predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Metrics
print("\n📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\n📋 Classification Report:")
print(classification_report(y_test, y_pred))

# -------------------------
# Step 8: Load and test saved model
# -------------------------
loaded_model = load_model("wine_quality_ann.h5")
sample = X_test[0].reshape(1, -1)
sample_pred = loaded_model.predict(sample)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.5891 - loss: 0.6906 - val_accuracy: 0.5594 - val_loss: 0.6849
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5638 - loss: 0.6937 - val_accuracy: 0.5594 - val_loss: 0.6848
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5878 - loss: 0.6896 - val_accuracy: 0.5594 - val_loss: 0.6848
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5753 - loss: 0.6899 - val_accuracy: 0.5594 - val_loss: 0.6848
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5827 - loss: 0.7018 - val_accuracy: 0.5594 - val_loss: 0.6847
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5482 - loss: 0.7006 - val_accuracy: 0.5594 - val_loss: 0.6847
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━



✅ Model saved as 'wine_quality_ann.h5'

✅ Training Accuracy: 0.5629
✅ Testing Accuracy: 0.5750
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

📊 Confusion Matrix:
[[ 12 129]
 [  7 172]]

📋 Classification Report:




              precision    recall  f1-score   support

           0       0.63      0.09      0.15       141
           1       0.57      0.96      0.72       179

    accuracy                           0.57       320
   macro avg       0.60      0.52      0.43       320
weighted avg       0.60      0.57      0.47       320

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
