# Image Classification with Google MobileNetV2

## Import Libraries

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import os
from IPython.display import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib.pyplot import imread
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator # type: ignore
import warnings
warnings.filterwarnings("ignore")
import kagglehub


In [None]:
# Kinds of flowers
kinds = np.array(os.listdir('C:/fruit_recognition/client/src/predict/archive/train'))
print(f"Kinds in this dataset: {kinds}")

In [None]:
# Paths of directories
path = 'C:/fruit_recognition/client/src/predict/archive/train'
kind_path = [path + "/" + year for year in kinds]
kind_path

In [None]:
# Numbers of flowers for each kinds
for i, kind in enumerate(kind_path):
    print(f"There are {len(os.listdir(kind))} pictures in {kinds[i]}")

In [None]:
# Example images for each kinds
plt.figure(figsize=(10, 4))
plt.suptitle("Example Images")

num_kinds = len(kinds)
num_cols = min(num_kinds, 9)  # Ensure maximum of 9 columns
num_rows = (num_kinds + num_cols - 1) // num_cols

for i, kind in enumerate(kinds):
    ax = plt.subplot(num_rows, num_cols, i + 1)
    # Get all JPG files in the directory
    jpg_files = [file for file in os.listdir(kind_path[i]) if file.lower().endswith('.jpg')]
    # Use the first file if available
    if jpg_files:
        path = os.path.join(kind_path[i], jpg_files[0])
        img = plt.imread(path)
        plt.imshow(img)
        plt.title(kind.capitalize())
        plt.axis("off")
    else:
        print(f"No JPG file found in directory: {kind_path[i]}")

plt.show()

In [None]:
# All ids


id_df = []
for i in range(len(kinds)):
    # 获取 kind_path[i] 目录下所有以 .jpg 结尾的文件
    jpg_files = [img.split(".")[0] for img in os.listdir(kind_path[i]) if img.lower().endswith('.jpg')]
    id_df.extend(jpg_files)

len(id_df)

In [None]:
# All kinds
kind_df = []
for i, kind in enumerate(kinds):
    # 获取 kind_path[i] 目录下所有以 .jpg 结尾的文件
    jpg_files = os.listdir(kind_path[i])
    jpg_files = [img.split(".")[0] for img in jpg_files if img.lower().endswith('.jpg')]
    for x in range(len(jpg_files)):
        kind_df.append(kind)

len(kind_df)

In [None]:
# Create a dataframe
df = pd.DataFrame(columns=["id", "kind"])
df["id"] = id_df
df["kind"] = kind_df
df.tail()

In [None]:
# Check numbers
df["kind"].value_counts()

In [None]:
filenames = []
for i in range(len(kinds)):
    # 获取 kind_path[i] 目录下所有以 .jpg 结尾的文件
    jpg_files = [kind_path[i] + "/" + kind for kind in os.listdir(kind_path[i]) if kind.lower().endswith('.jpg')]
    filenames.extend(jpg_files)

filenames[:5]


In [None]:
# Check a random flower
print(filenames[2317])
print(df.loc[2317])

In [None]:
boolean_kinds = [kind == kinds for kind in kind_df ]
boolean_kinds[:5]

In [None]:
len(boolean_kinds)

## Creating test and train sets

In [None]:
X = filenames
y = boolean_kinds

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=18)
len(X_train), len(X_test), len(y_train), len(y_test)

In [None]:
X_train[:3], y_train[:3]

## Preprocessing Images

In [None]:
image = imread(filenames[15])
image.shape

In [None]:
image.max(), image.min()

In [None]:
image[:50]

In [None]:
# turn image into a tensor
tf.constant(image)[:2]

In [None]:
img_size = 224

In [None]:
# Create a function for preprocessing images
def process_image(image_path, img_size=img_size):
  # Read in an image file
  image = tf.io.read_file(image_path)
  # Turn the jpeg image into numerical Tensor with 3 colour channels (Red, Green, Blue)
  image = tf.image.decode_jpeg(image, channels=3)
  # Convert the colour channel values from 0-255 to 0-1 values
  image = tf.image.convert_image_dtype(image, tf.float32)
  # Resize the image to our desired value (224, 224)
  image = tf.image.resize(image, size=[img_size, img_size])

  return image

In [None]:
process_image(X[17]), tf.constant(y[17])

## Creating data batches

In [None]:
# Create a simple function to return a tuple (image, label)
def get_image_label (image_path, label):
  image = process_image(image_path)
  return image, label

In [None]:
batch_size = 32

In [None]:
# Create a function to turn data into batches
def create_data_batches(X, y=None, batch_size=batch_size, test_data=False):
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X),
                                                   tf.constant(y)))
        # Shuffling pathnames and labels before mapping image processor function is faster than shuffling images
        data = data.shuffle(buffer_size=len(X))

        # Create (image, label) tuples (this also turns the iamge path into a preprocessed image)
        data = data.map(get_image_label)

        # Turn the training data into batches
        data_batch = data.batch(batch_size)
        return data_batch

In [None]:
# Create training and validation data batches
train_data = create_data_batches(X_train, y_train)
test_data = create_data_batches(X_test, y_test)

In [None]:
train_data.element_spec, test_data.element_spec

In [None]:
train_images, train_labels = next(train_data.as_numpy_iterator())
#test_images, test_labels = next(test_data.as_numpy_iterator())
train_images[:2], train_labels[:2]

In [None]:
# Visualizing data batches
plt.figure(figsize=(10, 10))
for i in range(25):
    ax = plt.subplot(5, 5, i+1)
    plt.imshow(train_images[i])
    plt.title(kinds[train_labels[i].argmax()])
    plt.axis("off")

## Model

In [None]:
# Setup input shape to the model
input_shape = (None, img_size, img_size, 3) # batch, height, width, colour channels
# Setup output shape of our model
output_shape = len(kinds)

In [None]:
import kagglehub

# Download latest version
path = kagglehub.model_download("google/mobilenet-v2/tensorFlow2/140-224-classification")

print("Path to model files:", path)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Lambda(lambda x: hub.KerasLayer("C:/Users/thang/.cache/kagglehub/models/google/mobilenet-v2/tensorFlow2/140-224-classification/2")(x)),  # MobileNet V2作为特征提取器
    tf.keras.layers.BatchNormalization(),  # 批归一化层
    tf.keras.layers.Dense(units=output_shape, activation="softmax")  # 输出层
    
])

In [87]:
import tensorflow as tf
import tensorflow_hub as hub

# Define the output shape (number of classes)
output_shape = 10  # Change this to match the number of your output classes

# Define a function to wrap the TensorFlow Hub layer
def mobilenet_v2_layer():
    return hub.KerasLayer("C:/Users/thang/.cache/kagglehub/models/google/mobilenet-v2/tensorFlow2/140-224-classification/2", 
                          trainable=False)

# Create the Sequential model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),  # Input shape for MobileNet V2
    tf.keras.layers.Lambda(lambda x: mobilenet_v2_layer()(x)),  # Wrap the Hub layer
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(units=output_shape, activation="softmax")
])

# Print the model summary
model.summary()


In [None]:
  model.compile(
      loss=tf.keras.losses.CategoricalCrossentropy(),
      optimizer=tf.keras.optimizers.Adam(),
      metrics=["accuracy"]
  )

In [None]:
model.build(input_shape)

In [None]:
model.summary()

In [None]:
# Create early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor="accuracy",
                                                  patience=3)

In [None]:
import tensorflow as tf
print("Num GPUs Avail able: ", len(tf.config.experimental.list_physical_devices('GPU')))


In [None]:
history = model.fit(x=train_data,
                    epochs=10,
                    callbacks=[early_stopping],
                    
                   validation_data=test_data)


In [None]:
final_model_path ="C:/fruit_recognition/client/src/predict/model.h5"
model.save(final_model_path)


In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='mobilenet.png', show_shapes=True)

In [None]:
# 绘制训练集和验证集的损失曲线
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# 绘制训练集和验证集的准确率曲线
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# 计算验证集的损失和准确率
loss, accuracy = model.evaluate(test_data)
print(f'Validation Loss: {loss:.4f}')
print(f'Validation Accuracy: {accuracy:.4f}')

## Predictions

In [None]:
def create_data_batches(X, y=None, batch_size=batch_size, test_data=False):
    if test_data:
        print("Creating test data batches...")
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X))) # only filepaths (no labels)
        data_batch = data.map(process_image).batch(batch_size)
        return data_batch
    else:
        print("Creating data batches...")
        # Turn filepaths and labels into Tensors
        data = tf.data.Dataset.from_tensor_slices((tf.constant(X),
                                                   tf.constant(y)))
        # Shuffling pathnames and labels before mapping image processor function is faster than shuffling images
        data = data.shuffle(buffer_size=len(X))

        # Create (image, label) tuples (this also turns the iamge path into a preprocessed image)
        data = data.map(get_image_label)

        # Turn the training data into batches
        data_batch = data.batch(batch_size)
        return data_batch

In [None]:
train_data = create_data_batches(X_train, y_train)
test_data = create_data_batches(X_test, test_data=True)

In [None]:
predictions = model.predict(test_data)
predictions

In [None]:
true_labels = [kinds[np.argmax(data)] for data in y_test]
pred_labels = [kinds[np.argmax(pred)] for pred in predictions]

In [None]:
true_labels[:20]

In [None]:
pred_labels[:20]

In [None]:
print(accuracy_score(true_labels, pred_labels))

In [None]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("tf.keras version:", tf.keras.__version__)


In [None]:
import numpy as np
# Function to preprocess the image
def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Resize to match the model input shape
    image = tf.expand_dims(image, axis=0)  # Add batch dimension
    image /= 255.0  # Normalize to [0, 1] range
    return image
# Load the trained model
model_path = "C:/fruit_recognition/client/src/predict/model.h5"
model = tf.keras.models.load_model(model_path, custom_objects={'KerasLayer': hub.KerasLayer})

# Path to the test image
test_image_path = 'C:/fruit_recognition/client/src/predict/Image/Image_1.jpg'

# Preprocess the test image
test_image = preprocess_image(test_image_path)

# Use the loaded model to predict the class of the test image
predictions = model.predict(test_image)

# Get the top 5 class indices and corresponding probabilities
top5_indices = np.argsort(predictions[0])[-5:][::-1]
top5_probabilities = predictions[0][top5_indices]

# Get the corresponding class labels
class_names = kinds  # Replace with the actual list of class names
top5_labels = [class_names[i] for i in top5_indices]

# Display the top 5 predictions
print("Top 5 Predictions:")
for label, probability in zip(top5_labels, top5_probabilities):
    print(f"{label}: {probability:.4f}")

# Display the predicted class
class_index = np.argmax(predictions[0])
class_label = class_names[class_index]
print(f"\nThe predicted class of the test image is: {class_label}")


In [None]:

print(tf.__version__)


In [None]:
import pkg_resources

package_name = 'tensorflow-metadata'
version = pkg_resources.get_distribution(package_name).version
print(f"{package_name} version: {version}")


In [None]:
import json
from datetime import datetime


kinds_str_list = ', '.join(kinds.tolist())

# Thông tin cần thiết để tạo metadata.json
metadata = {
    "tfjsVersion": "2.15.0",
    "tmVersion": "0.14.0",
    "timeStamp": datetime.utcnow().isoformat() + "Z",
    "userMetadata": {},
    "modelName": "tm-my-image-model",
    "labels": kinds_str_list,  # Thay thế bằng các nhãn thực tế của bạn
    "imageSize": 224
}

# Đường dẫn lưu metadata.json
output_path = '/kaggle/working/metadata.json'

# Tạo tệp metadata.json
with open(output_path, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"metadata.json file created successfully at {output_path}.")


In [None]:
import subprocess

# Define the shell command to convert the TensorFlow.js model
command = "tensorflowjs_converter --input_format=keras /kaggle/working/model.h5 /kaggle/working/"

# Execute the shell command
subprocess.run(command, shell=True)


In [None]:
kinds