In [1]:
'''Q1. Install and load the latest versions of TensorFlow and Keras. Print their versions.'''



import tensorflow as tf
from keras import __version__ as keras_version

# Print TensorFlow version
print("TensorFlow version:", tf.__version__)

# Print Keras version
print("Keras version:", keras_version)


TensorFlow version: 2.17.0
Keras version: 3.4.1


In [4]:
'''Q2. Load the Wine Quality dataset and explore its dimensions.'''


import pandas as pd

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # Note: The CSV uses ';' as a separator

# Explore the dimensions of the dataset
print("Dimensions of the dataset:", wine_data.shape)  # (rows, columns)
print("Number of rows:", wine_data.shape[0])
print("Number of columns:", wine_data.shape[1])


print(wine_data.head())

print(wine_data.info())

Dimensions of the dataset: (1599, 12)
Number of rows: 1599
Number of columns: 12
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   al

In [6]:
'''Q3. Check for null values, identify categorical variables, and encode them.'''

import pandas as pd

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Step 1: Check for null values
null_values = wine_data.isnull().sum()
print("Null values in each column:\n", null_values)

# Step 2: Identify categorical variables
# Here, we assume 'quality' is the categorical variable
categorical_vars = wine_data.select_dtypes(include=['object']).columns.tolist()
print("\nCategorical variables:", categorical_vars)

# In the Wine Quality dataset, 'quality' is typically the only categorical variable
# If we treat 'quality' as categorical, we can see its unique values
print("\nUnique values in 'quality':", wine_data['quality'].unique())

# Step 3: Encode categorical variables
# For encoding, we can use Label Encoding or One-Hot Encoding.
# Here we'll use Label Encoding for the 'quality' column

from sklearn.preprocessing import LabelEncoder

# Create a label encoder
label_encoder = LabelEncoder()

# Encode the 'quality' column
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Display the first few rows of the updated DataFrame
print("\nDataFrame after encoding categorical variable 'quality':\n", wine_data.head())




Null values in each column:
 fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

Categorical variables: []

Unique values in 'quality': [5 6 7 4 8 3]

DataFrame after encoding categorical variable 'quality':
    fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  de

In [8]:
'''Q4. Separate the features and target variables from the dataframe.'''

import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column (as done previously)
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Step 1: Separate features and target variable
# Define the target variable
target_variable = 'quality'

# Features are all columns except the target variable
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Print the shapes of the features and target
print("Features shape:", X.shape)
print("Target shape:", y.shape)

# Display the first few rows of features and target
print("\nFirst few rows of features (X):\n", X.head())
print("\nFirst few rows of target (y):\n", y.head())


Features shape: (1599, 11)
Target shape: (1599,)

First few rows of features (X):
    fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0                  34.0   0.9978  3.51       0.56   

   

In [10]:
'''Q5. Perform a train-test split and divide the data into training, validation, and test datasets.'''


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Step 1: Split data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Split training data further into training and validation datasets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Print the shapes of the datasets
print("Training set shape:", X_train.shape, y_train.shape)
print("Validation set shape:", X_val.shape, y_val.shape)
print("Test set shape:", X_test.shape, y_test.shape)


Training set shape: (959, 11) (959,)
Validation set shape: (320, 11) (320,)
Test set shape: (320, 11) (320,)


In [12]:
'''Q6. Perform scaling on the dataset.'''


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Split data into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Step: Perform scaling
scaler = StandardScaler()  # You can also use MinMaxScaler() if preferred

# Fit the scaler on the training data and transform it
X_train_scaled = scaler.fit_transform(X_train)

# Transform the validation and test datasets using the same scaler
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Print the first few rows of the scaled datasets
print("\nFirst few rows of scaled training data (X_train_scaled):\n", X_train_scaled[:5])
print("\nFirst few rows of scaled validation data (X_val_scaled):\n", X_val_scaled[:5])
print("\nFirst few rows of scaled test data (X_test_scaled):\n", X_test_scaled[:5])



First few rows of scaled training data (X_train_scaled):
 [[-0.07720091 -0.83498749  0.23204161 -0.02597314 -0.15713964  0.8020062
   0.73648303 -0.26987976 -0.27478917  1.04133133  0.53682064]
 [-0.3675369   0.38524982 -1.08890028 -0.38079133 -0.17740278  0.31497229
  -0.16050354  0.11686153  0.49462051 -0.24600446 -0.3259643 ]
 [ 0.03893348  0.16338849 -1.1905112  -0.38079133 -0.11661338 -0.56168874
  -0.72859503  0.45062457 -0.59537654 -1.25348464 -0.80528927]
 [-0.07720091 -1.61150214  0.33365252  1.8190815  -0.52187604 -0.7565023
  -0.72859503  0.34466805 -0.59537654  1.54507142  0.44095564]
 [-0.94820886 -1.16777948  0.8417071  -0.09693678 -0.54213917  0.8020062
  -0.36980041 -1.0115754  -0.14655423  0.76147572  1.1120106 ]]

First few rows of scaled validation data (X_val_scaled):
 [[-0.65787288 -0.66859149  0.13043069 -0.30982769 -0.09635025  1.87348079
   0.76638258  0.13275501  0.81520788 -0.80571567 -0.99701925]
 [ 1.72288218  0.49618049  2.01023262  1.89004514 -0.03556085 

In [13]:
'''Q7. Create at least 2 hidden layers and an output layer for the binary categorical variables.'''

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Convert target variable to binary for classification
# Let's classify quality <= 5 as 0 (low quality) and > 5 as 1 (high quality)
y = (y > 5).astype(int)  # Binary classification

# Split data into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Perform scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Create the neural network model
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),  # Input layer
    layers.Dense(32, activation='relu'),  # Hidden layer 1
    layers.Dense(16, activation='relu'),  # Hidden layer 2
    layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_val_scaled, y_val),
                    epochs=50,
                    batch_size=32)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'\nTest accuracy: {test_accuracy:.4f}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.7430 - loss: 0.6016 - val_accuracy: 1.0000 - val_loss: 0.2425
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.1744 - val_accuracy: 1.0000 - val_loss: 0.0403
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 1.0000 - loss: 0.0303 - val_accuracy: 1.0000 - val_loss: 0.0113
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0101 - val_accuracy: 1.0000 - val_loss: 0.0053
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0049 - val_accuracy: 1.0000 - val_loss: 0.0032
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0030 - val_accuracy: 1.0000 - val_loss: 0.0022
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━

In [14]:
'''Q8. Create a Sequential model and add all the layers to it.'''

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Convert target variable to binary for classification
# Let's classify quality <= 5 as 0 (low quality) and > 5 as 1 (high quality)
y = (y > 5).astype(int)  # Binary classification

# Split data into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Perform scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Create the Sequential model
model = keras.Sequential()

# Add layers to the model
model.add(layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))  # Input layer
model.add(layers.Dense(32, activation='relu'))  # Hidden layer 1
model.add(layers.Dense(16, activation='relu'))  # Hidden layer 2
model.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_val_scaled, y_val),
                    epochs=50,
                    batch_size=32)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'\nTest accuracy: {test_accuracy:.4f}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 1.0000 - loss: 0.3564 - val_accuracy: 1.0000 - val_loss: 0.0768
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.0510 - val_accuracy: 1.0000 - val_loss: 0.0109
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0081 - val_accuracy: 1.0000 - val_loss: 0.0030
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0030 - val_accuracy: 1.0000 - val_loss: 0.0011
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0013 - val_accuracy: 1.0000 - val_loss: 5.7270e-04
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 6.0383e-04 - val_accuracy: 1.0000 - val_loss: 3.5486e-04
Epoch 7/50
[1m30/30[0m [

In [15]:
'''Q9. Implement a TensorBoard callback to visualize and monitor the model's training process.'''

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard
import os

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Convert target variable to binary for classification
# Let's classify quality <= 5 as 0 (low quality) and > 5 as 1 (high quality)
y = (y > 5).astype(int)  # Binary classification

# Split data into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Perform scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Create the Sequential model
model = keras.Sequential()

# Add layers to the model
model.add(layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))  # Input layer
model.add(layers.Dense(32, activation='relu'))  # Hidden layer 1
model.add(layers.Dense(16, activation='relu'))  # Hidden layer 2
model.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Set up TensorBoard callback
log_dir = os.path.join("logs", "fit", "run")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Print the model summary
model.summary()

# Train the model with the TensorBoard callback
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_val_scaled, y_val),
                    epochs=50,
                    batch_size=32,
                    callbacks=[tensorboard_callback])  # Include the TensorBoard callback

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'\nTest accuracy: {test_accuracy:.4f}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - accuracy: 0.5399 - loss: 0.6643 - val_accuracy: 1.0000 - val_loss: 0.3124
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.2327 - val_accuracy: 1.0000 - val_loss: 0.0607
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0391 - val_accuracy: 1.0000 - val_loss: 0.0096
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0074 - val_accuracy: 1.0000 - val_loss: 0.0038
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0032 - val_accuracy: 1.0000 - val_loss: 0.0023
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 1.0000 - loss: 0.0018 - val_accuracy: 1.0000 - val_loss: 0.0016
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━

In [16]:
'''Q10. Use Early Stopping to prevent overfitting by monitoring a chosen metric and stopping the training if
no improvement is observed.'''

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
import os

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Convert target variable to binary for classification
y = (y > 5).astype(int)  # Binary classification

# Split data into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Perform scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Create the Sequential model
model = keras.Sequential()

# Add layers to the model
model.add(layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))  # Input layer
model.add(layers.Dense(32, activation='relu'))  # Hidden layer 1
model.add(layers.Dense(16, activation='relu'))  # Hidden layer 2
model.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Set up TensorBoard callback
log_dir = os.path.join("logs", "fit", "run")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Set up EarlyStopping callback
early_stopping_callback = EarlyStopping(monitor='val_loss',  # Monitor validation loss
                                         patience=5,          # Number of epochs with no improvement after which training will be stopped
                                         restore_best_weights=True)  # Restore model weights from the epoch with the best value of the monitored quantity

# Print the model summary
model.summary()

# Train the model with the TensorBoard and EarlyStopping callbacks
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_val_scaled, y_val),
                    epochs=50,
                    batch_size=32,
                    callbacks=[tensorboard_callback, early_stopping_callback])  # Include both callbacks

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'\nTest accuracy: {test_accuracy:.4f}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - accuracy: 0.6952 - loss: 0.6026 - val_accuracy: 1.0000 - val_loss: 0.2526
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 1.0000 - loss: 0.1700 - val_accuracy: 1.0000 - val_loss: 0.0360
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 1.0000 - loss: 0.0247 - val_accuracy: 1.0000 - val_loss: 0.0088
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0071 - val_accuracy: 1.0000 - val_loss: 0.0042
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 1.0000 - loss: 0.0035 - val_accuracy: 1.0000 - val_loss: 0.0026
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0024 - val_accuracy: 1.0000 - val_loss: 0.0017
Epoch 7/50
[1m30/30[0m [32m━━━━━━━

In [17]:
'''Q11. Implement a ModelCheckpoint callback to save the best model based on a chosen metric during
training.'''


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint
import os

# Load the Wine Quality dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine_data = pd.read_csv(url, sep=';')  # The CSV uses ';' as a separator

# Encode the 'quality' column
label_encoder = LabelEncoder()
wine_data['quality'] = label_encoder.fit_transform(wine_data['quality'])

# Separate features and target variable
target_variable = 'quality'
X = wine_data.drop(columns=[target_variable])  # Features
y = wine_data[target_variable]                  # Target variable

# Convert target variable to binary for classification
y = (y > 5).astype(int)  # Binary classification

# Split data into training, validation, and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Perform scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Create the Sequential model
model = keras.Sequential()

# Add layers to the model
model.add(layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))  # Input layer
model.add(layers.Dense(32, activation='relu'))  # Hidden layer 1
model.add(layers.Dense(16, activation='relu'))  # Hidden layer 2
model.add(layers.Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Set up TensorBoard callback
log_dir = os.path.join("logs", "fit", "run")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Set up EarlyStopping callback
early_stopping_callback = EarlyStopping(monitor='val_loss',  # Monitor validation loss
                                         patience=5,          # Number of epochs with no improvement
                                         restore_best_weights=True)  # Restore best weights

# Set up ModelCheckpoint callback
model_checkpoint_callback = ModelCheckpoint(filepath='best_model.h5',  # Filepath to save the model
                                             monitor='val_loss',       # Metric to monitor
                                             save_best_only=True,      # Save only the best model
                                             mode='min',               # Save the model when the monitored metric is minimized
                                             verbose=1)                # Verbosity mode

# Print the model summary
model.summary()

# Train the model with the callbacks
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_val_scaled, y_val),
                    epochs=50,
                    batch_size=32,
                    callbacks=[tensorboard_callback, early_stopping_callback, model_checkpoint_callback])  # Include all callbacks

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f'\nTest accuracy: {test_accuracy:.4f}')

# Load the best model
best_model = keras.models.load_model('best_model.h5')
best_test_loss, best_test_accuracy = best_model.evaluate(X_test_scaled, y_test)
print(f'Best model test accuracy: {best_test_accuracy:.4f}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: The filepath provided must end in `.keras` (Keras model format). Received: filepath=best_model.h5