## Deep Learning Activity

In [1]:
# TensorFlow and Keras for deep learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# For handling the dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import metrics for regression 
from sklearn.metrics import mean_squared_error, r2_score

# Libraries needed for Streamlit app
import nbformat
import pickle

#### Text Classification

In [5]:

# Load and clean the dataset
df = pd.read_csv("ecommerceDataset.csv", header=None)
df.columns = ["Category", "Text"]

# Drop any rows with missing data
df.dropna(inplace=True)

# Encode text labels
label_encoder = LabelEncoder()
df["Label"] = label_encoder.fit_transform(df["Category"])

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(df["Text"], df["Label"], test_size=0.2, random_state=42)

# Tokenize text
max_vocab = 10000
max_len = 200

tokenizer = Tokenizer(num_words=max_vocab, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')

# Build model
model = keras.Sequential([
    layers.Embedding(input_dim=max_vocab, output_dim=64, input_length=max_len),
    layers.GlobalAveragePooling1D(),
    layers.Dense(64, activation='relu'),
    layers.Dense(len(label_encoder.classes_), activation='softmax')  # Output layer for classification
])

# Compile model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train model
model.fit(X_train_pad, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate model
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification report
y_pred_probs = model.predict(X_test_pad)
y_pred = np.argmax(y_pred_probs, axis=1)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# ----------------------------------------------- Code needed to create the corresponding pickle files -----------------------------------------------

# Save model
model.save("model_text.keras")

# Save tokenizer
with open("tokenizer_text.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# Save label encoder
with open("label_encoder_text.pkl", "wb") as f:
    pickle.dump(label_encoder, f)


Epoch 1/10




[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.6712 - loss: 0.8484 - val_accuracy: 0.9219 - val_loss: 0.2854
Epoch 2/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9341 - loss: 0.2330 - val_accuracy: 0.9155 - val_loss: 0.2424
Epoch 3/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.9560 - loss: 0.1585 - val_accuracy: 0.9415 - val_loss: 0.1994
Epoch 4/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9641 - loss: 0.1214 - val_accuracy: 0.9192 - val_loss: 0.2381
Epoch 5/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9702 - loss: 0.0984 - val_accuracy: 0.9351 - val_loss: 0.2189
Epoch 6/10
[1m1135/1135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9767 - loss: 0.0774 - val_accuracy: 0.9623 - val_loss: 0.1605
Epoch 7/10
[1m1135/1135[0

#### Regression

In [3]:
# Load the dataset
data = pd.read_csv("Walmart_Sales.csv")

# Convert Date column to datetime format
data["Date"] = pd.to_datetime(data["Date"], format="%d-%m-%Y")

# Extract useful features from Date
data["Year"] = data["Date"].dt.year
data["Month"] = data["Date"].dt.month
data["Day"] = data["Date"].dt.day
data["WeekOfYear"] = data["Date"].dt.isocalendar().week

# Drop the original Date column
data = data.drop(columns=["Date"])

# Define features (X) and target (y)
X = data.drop(columns=["Weekly_Sales"])
y = data["Weekly_Sales"]

# Normalize feature values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)

# Define a regression model
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)  # Single output for regression
])

# Compile the model
model.compile(optimizer='adam',
              loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

# Evaluate the model
predictions = model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

# Use the model 
custom_input = np.array([[1, 0, 45.0, 2.60, 211.0, 8.1, 2012, 11, 3, 44]]) # Construct a custom input in order to obtain the corresponding prediction value
custom_input_scaled = scaler.transform(custom_input) # Normalize the custom input
custom_prediction = model.predict(custom_input_scaled) # Obtain the predicted value that corresponds to the custom input

# Show results
print()
print("Evaluation metrics for the model:")
print()
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"R^2 Score: {r2:.4f}")
print()
print(f"Predicted Weekly Sales for custom input: {custom_prediction[0][0]:.2f}") # Print the predicted value

# ----------------------------------------- Code needed to create the corresponding pickle file -----------------------------------------

# Save model
model.save("model_regression.h5")

# Save scaler
with open("scaler_regression.pkl", "wb") as f:
    pickle.dump(scaler, f)








Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - loss: 1388529844224.0000
Epoch 2/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 625us/step - loss: 1465060163584.0000
Epoch 3/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 570us/step - loss: 1419510022144.0000
Epoch 4/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 559us/step - loss: 1414898647040.0000
Epoch 5/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 550us/step - loss: 1391384199168.0000
Epoch 6/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 521us/step - loss: 1334003105792.0000
Epoch 7/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 539us/step - loss: 1294334689280.0000
Epoch 8/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552us/step - loss: 1248728711168.0000
Epoch 9/50
[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 549us/step - l




Evaluation metrics for the model:

Root Mean Squared Error: 532464.14
R^2 Score: 0.1167

Predicted Weekly Sales for custom input: 1031579.69


#### Image Classification

In [4]:

# Load metadata
train_df = pd.read_csv("Butterflies_Training_set.csv")
test_df = pd.read_csv("Butterflies_Testing_set.csv")

# Paths
train_dir = "train_images_butterflies"  
test_dir = "test_images_butterflies"

# Parameters
img_size = (128, 128)

# Load and preprocess training images
X = []
y = []

for i, row in train_df.iterrows():
    img_path = os.path.join(train_dir, row["filename"])
    image = load_img(img_path, target_size=img_size)
    image = img_to_array(image) / 255.0
    X.append(image)
    y.append(row["label"])

X = np.array(X)
y = np.array(y)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Build model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate
val_loss, val_acc = model.evaluate(X_val, y_val)
print()
print(f"Validation Accuracy of the model: {val_acc:.4f}")
print(f"Validation Loss of the model: {val_loss:.4f}")

# Predict on test set
X_test = []

for fname in test_df["filename"]:
    img_path = os.path.join(test_dir, fname)
    image = load_img(img_path, target_size=img_size)
    image = img_to_array(image) / 255.0
    X_test.append(image)

X_test = np.array(X_test)
pred_probs = model.predict(X_test)
pred_classes = np.argmax(pred_probs, axis=1)
pred_labels = label_encoder.inverse_transform(pred_classes)

# Save predictions
test_df["predicted_label"] = pred_labels
test_df.to_csv("predicted_test_labels.csv", index=False)
print()
print(test_df.head())

# ------------------------------------------ Create the corresponding pickle file ------------------------------------------

# Save model
model.save("model_image.h5")

# Save label encoder
with open("label_encoder_image.pkl", "wb") as f:
    pickle.dump(label_encoder, f)



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 108ms/step - accuracy: 0.0405 - loss: 4.2606 - val_accuracy: 0.2392 - val_loss: 3.0320
Epoch 2/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 105ms/step - accuracy: 0.3807 - loss: 2.3889 - val_accuracy: 0.3769 - val_loss: 2.4092
Epoch 3/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 103ms/step - accuracy: 0.6799 - loss: 1.1841 - val_accuracy: 0.4292 - val_loss: 2.3588
Epoch 4/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 103ms/step - accuracy: 0.8862 - loss: 0.4506 - val_accuracy: 0.3992 - val_loss: 2.9147
Epoch 5/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 103ms/step - accuracy: 0.9669 - loss: 0.1478 - val_accuracy: 0.4131 - val_loss: 3.1355
Epoch 6/10
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 103ms/step - accuracy: 0.9811 - loss: 0.0948 - val_accuracy: 0.4285 - val_loss: 3.5661
Epoch 7/10




      filename         predicted_label
0  Image_1.jpg      CLODIUS PARNASSIAN
1  Image_2.jpg           CRIMSON PATCH
2  Image_3.jpg          ORANGE OAKLEAF
3  Image_4.jpg             RED POSTMAN
4  Image_5.jpg  MILBERTS TORTOISESHELL
