<a href="https://colab.research.google.com/github/sowmyarshetty/NNClass/blob/main/branching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras import layers, models, Model
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split

In [2]:
# Import the data
path = 'https://static.bc-edx.com/ai/ail-v-1-0/m19/lesson_3/datasets/wine_quality.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,color
0,7.2,0.39,0.63,11.0,0.044,55.0,156.0,0.9974,3.09,0.44,8.7,ok,white
1,6.9,0.63,0.02,1.9,0.078,18.0,30.0,0.99712,3.4,0.75,9.8,ok,red
2,6.9,0.3,0.33,4.1,0.035,26.0,155.0,0.9925,3.25,0.79,12.3,good,white
3,7.3,0.42,0.38,6.8,0.045,29.0,122.0,0.9925,3.19,0.37,12.6,good,white
4,6.9,0.18,0.38,8.1,0.049,44.0,176.0,0.9958,3.3,0.54,9.8,ok,white


In [3]:
# Preprocess y
#from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Preprocess "quality" column (one-hot encoding)
quality_encoder = OneHotEncoder(sparse_output=False)
quality_encoded = quality_encoder.fit_transform(df[["quality"]])
quality_columns = quality_encoder.get_feature_names_out(["quality"])
quality_encoded_df = pd.DataFrame(quality_encoded, columns=quality_columns)

# Preprocess "color" column (label encoding for binary; one-hot encoding for multiple categories)
color_encoder = LabelEncoder()
df["color_encoded"] = color_encoder.fit_transform(df["color"])

# Concatenate the encoded columns to the original DataFrame
processed_df = pd.concat([df, quality_encoded_df], axis=1)

# Drop the original "quality" and "color" columns
processed_df = processed_df.drop(["quality", "color"], axis=1)
processed_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,color_encoded,quality_bad,quality_good,quality_ok
0,7.2,0.39,0.63,11.0,0.044,55.0,156.0,0.9974,3.09,0.44,8.7,1,0.0,0.0,1.0
1,6.9,0.63,0.02,1.9,0.078,18.0,30.0,0.99712,3.4,0.75,9.8,0,0.0,0.0,1.0
2,6.9,0.3,0.33,4.1,0.035,26.0,155.0,0.9925,3.25,0.79,12.3,1,0.0,1.0,0.0
3,7.3,0.42,0.38,6.8,0.045,29.0,122.0,0.9925,3.19,0.37,12.6,1,0.0,1.0,0.0
4,6.9,0.18,0.38,8.1,0.049,44.0,176.0,0.9958,3.3,0.54,9.8,1,0.0,0.0,1.0


In [4]:
# Split data into X and two separate y variables
X = processed_df.drop(columns=["color_encoded",	"quality_bad", "quality_good", "quality_ok"])

y_color = processed_df["color_encoded"]
y_quality = processed_df[["quality_bad", "quality_good", "quality_ok"]]

# Split data into training and testing sets
#from sklearn.model_selection import train_test_split
X_train, X_test, y_color_train, y_color_test, y_quality_train, y_quality_test = train_test_split(X, y_color, y_quality)


In [5]:
X_train.shape

(4872, 11)

In [6]:
X_train.shape[1]

11

In [7]:
# Create the shared layers of the model

# 2 ways to use TF to create nn: sequential, functional

# input layer > shared layer 1 > shared layer 2 > 1) quality output 2) color output

# Input layer
input_layer = layers.Input(shape=(X_train.shape[1], ), name="input_features")

# Shared hidden layers
shared_layer1 = layers.Dense(64, activation="relu")(input_layer)
shared_layer2 = layers.Dense(32, activation="relu")(shared_layer1)

In [8]:
# Branch for quality prediction
quality_output = layers.Dense(3, activation="softmax", name="quality_output")(shared_layer2)

# Branch for color prediction
color_output = layers.Dense(1, activation="sigmoid", name="color_output")(shared_layer2)

In [9]:
# Create the model
model = Model(inputs=input_layer, outputs=[quality_output, color_output])

# Compile the model
model.compile(optimizer="adam", loss={"quality_output" : "categorical_crossentropy",
                                      "color_output" : "binary_crossentropy"},
              metrics={"quality_output" : "accuracy",
                       "color_output" : "accuracy"})

# Display the model summary
model.summary()

To fit the model to the data, we specify X_train as normal, but pass a dictionary for the y_data.

In [10]:
# Fit the model
model.fit(X_train, {"quality_output" : y_quality_train, "color_output" : y_color_train},
          epochs=4,
          batch_size=32,
          validation_split=0.2)

Epoch 1/4
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - color_output_accuracy: 0.8325 - color_output_loss: 0.3445 - loss: 7.1620 - quality_output_accuracy: 0.5204 - quality_output_loss: 6.8174 - val_color_output_accuracy: 0.9190 - val_color_output_loss: 0.2443 - val_loss: 0.9179 - val_quality_output_accuracy: 0.7590 - val_quality_output_loss: 0.6764
Epoch 2/4
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - color_output_accuracy: 0.9354 - color_output_loss: 0.2027 - loss: 0.8590 - quality_output_accuracy: 0.7608 - quality_output_loss: 0.6563 - val_color_output_accuracy: 0.9272 - val_color_output_loss: 0.1973 - val_loss: 0.9582 - val_quality_output_accuracy: 0.7600 - val_quality_output_loss: 0.7616
Epoch 3/4
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - color_output_accuracy: 0.9385 - color_output_loss: 0.1868 - loss: 0.8601 - quality_output_accuracy: 0.7404 - quality_output_loss: 0.6733 - val_col

<keras.src.callbacks.history.History at 0x7c1f1cf5e3d0>

In [11]:
# Evaluate the model with the testing data
results = model.evaluate(X_test, {"quality_output" : y_quality_test, "color_output" : y_color_test})
results

[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - color_output_accuracy: 0.9484 - color_output_loss: 0.1634 - loss: 0.8059 - quality_output_accuracy: 0.7705 - quality_output_loss: 0.6426


[0.8167828917503357,
 0.6629427671432495,
 0.1534889042377472,
 0.947692334651947,
 0.7612307667732239]