In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from tensorflow.keras.applications import InceptionV3
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.utils.class_weight import compute_class_weight
import os
from PIL import Image
from sklearn.base import BaseEstimator, ClassifierMixin
from tensorflow.keras.models import clone_model
from sklearn.model_selection import ParameterGrid
import pandas as pd

In [None]:
#Getting path to downloaded dataset
base_dir = r"C:\Users\Dell Inspiron\Documents\School_2024_2\Intro to AI\Nigeria\Data_Nigeria"
train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')
val_dir = os.path.join(base_dir, 'val')
whole_data = base_dir

In [None]:
#To download directly
#Installing kaggle
!pip install kaggle


In [None]:
#Placing Kaggle API at the right directory
kaggle_dir = Path.home() / '.kaggle'
kaggle_dir.mkdir(exist_ok=True)

kaggle_json_path = kaggle_dir / 'kaggle.json'

with open('kaggle.json') as f:
    kaggle_creds = json.load(f)

with open(kaggle_json_path, 'w') as f:
    json.dump(kaggle_creds, f)

kaggle_json_path.chmod(0o600)

In [None]:
#Downloading dataset
!kaggle datasets download -d peaceedogun/nigerian-foods-and-snacks-multiclass

In [None]:
#Cross-checking for any corrupted images in dataset
def check_corrupt_images(directory):
    corrupt_images = []
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            filepath = os.path.join(subdir, file)
            try:
                img = Image.open(filepath)
                img.verify()  # Verify that it is, in fact, an image
            except (IOError, SyntaxError) as e:
                print(f'Bad file: {filepath}')
                corrupt_images.append(filepath)
    return corrupt_images
#Checking dataset
corrupt_images = check_corrupt_images(r"C:\Users\Dell Inspiron\Documents\School_2024_2\Intro to AI\Nigeria\Data_Nigeria")
print(f'Found {len(corrupt_images)} corrupt images.')

In [None]:
def load_data(train_fp, test_fp, val_fp):
    datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
    )
    val_datagen = ImageDataGenerator(
        rescale=1./255,
    )

    train_gen = datagen.flow_from_directory(
        train_fp,
        target_size=(299, 299),
        batch_size=32,
        class_mode='categorical'
    )
    validation_gen = val_datagen.flow_from_directory(
        val_fp,
        target_size=(299, 299),
        batch_size=32,
        class_mode='categorical'
    )
    test_gen = val_datagen.flow_from_directory(
        test_fp,
        target_size=(299, 299),
        batch_size=32,
        class_mode='categorical'
    )
    return train_gen, validation_gen, test_gen

train_gen, validation_gen, test_gen = load_data(train_dir, test_dir, val_dir)
print(f"Number of training samples: {train_gen.samples}")
print(f"Number of validation samples: {validation_gen.samples}")

In [None]:
#Visualizing training data distribution to gain insights
def plot_class_distribution(generator):
    # Counting the number of samples for each class
    class_counts = np.bincount(generator.classes)

    # Getting the class names from the generator
    class_names = list(generator.class_indices.keys())

    # Creating a bar plot with class names on the x-axis and class counts on the y-axis
    sns.barplot(x=class_names, y=class_counts)

    # Rotating the x-axis labels for better readability if they are long
    plt.xticks(rotation=90)
    plt.title("Class Distribution for Training Data")
    plt.show()

plot_class_distribution(train_gen)

In [None]:
#Visualizing validation data distribution to gain insights
def plot_class_distribution(generator):
    # Counting the number of samples for each class
    class_counts = np.bincount(generator.classes)

    # Getting the class names from the generator
    class_names = list(generator.class_indices.keys())

    # Creating a bar plot with class names on the x-axis and class counts on the y-axis
    sns.barplot(x=class_names, y=class_counts)

    # Rotating the x-axis labels for better readability if they are long
    plt.xticks(rotation=90)
    plt.title("Class Distribution for Validation Data")
    plt.show()

plot_class_distribution(validation_gen)


In [None]:
#Visualizing test data distribution to gain insights
def plot_class_distribution(generator):
    # Counting the number of samples for each class
    class_counts = np.bincount(generator.classes)

    # Getting the class names from the generator
    class_names = list(generator.class_indices.keys())

    # Creating a bar plot with class names on the x-axis and class counts on the y-axis
    sns.barplot(x=class_names, y=class_counts)

    # Rotating the x-axis labels for better readability if they are long
    plt.xticks(rotation=90)
    plt.title("Class Distribution for Testing Data")
    plt.show()

plot_class_distribution(test_gen)

Dataset is imbalanced, hence, higher weights would be assigned to underrepresented class during model training.

In [None]:
#Loading the InceptionV3 model
base_model = InceptionV3(weights = 'imagenet', include_top = False, input_shape = (299,299,3))
#Adding new layers
out = base_model.output
pool = GlobalAveragePooling2D()(out)
#Adding dropout layers to reduce overfitting
pool = Dropout(0.5)(pool)
output = Dense(1024, activation = 'relu')(pool)
output = Dropout(0.5)(output)
predictions = Dense(train_gen.num_classes, activation = 'softmax')(output)
#Compiling layers to create new model
model = Model(inputs = base_model.input, outputs = predictions)
#Freezing the InceptionV3 model layers
for layer in base_model.layers:
    layer.trainable = False

In [None]:
#Compiling the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
#Specifying the callback functions to be used in fine tuning model
callbacks = [
    ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min'),
    EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
]


In [None]:
#Adjusting weights based on differing class sizes
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_gen.classes),
    y=train_gen.classes
)
class_weights = dict(enumerate(class_weights))


In [None]:
#Fine tuning the model
history = model.fit(
    train_gen,
    steps_per_epoch=train_gen.samples // train_gen.batch_size,
    validation_data=validation_gen,
    validation_steps=validation_gen.samples // validation_gen.batch_size,
    epochs=10,
    class_weight=class_weights,
    callbacks=callbacks
)

In [None]:
#Redefining the model creation function
def create_model(learning_rate=0.001, dropout_rate=0.5, num_classes=None):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
    out = base_model.output
    pool = GlobalAveragePooling2D()(out)
    pool = Dropout(dropout_rate)(pool)
    output = Dense(1024, activation='relu')(pool)
    output = Dropout(dropout_rate)(output)
    predictions = Dense(num_classes, activation='softmax')(output)
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in base_model.layers:
        layer.trainable = False
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
#Creating a custom KerasClassifier
class KerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn=None, **sk_params):
        self.build_fn = build_fn
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **fit_params):
        self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
        return self.model.fit(X, y, **fit_params)

    def predict(self, X):
        return np.argmax(self.model.predict(X), axis=1)

    def filter_sk_params(self, fn):
        res = {}
        for k, v in self.sk_params.items():
            if k in fn.__code__.co_varnames:
                res[k] = v
        return res


In [None]:
#Creating a model with KerasClassifier wrapper for GridSearchCV
num_classes = train_gen.num_classes
model = KerasClassifier(build_fn=create_model, num_classes=num_classes, epochs=5, batch_size=16, verbose=0)


In [None]:
#Creating custom scorer to work with modified GridSearchCV
def evaluate_model(model, generator):
    scores = model.evaluate(generator, steps=generator.samples // generator.batch_size)
    return scores[1]  #Assuming accuracy is the main focus

def custom_scorer(estimator, X):
    return evaluate_model(estimator.model, X)

custom_scorer = make_scorer(custom_scorer, greater_is_better=True)

In [None]:
#Defining the parameter grid for GridSearchCV
param_grid = {
    'learning_rate': [1e-3, 1e-2],
    'dropout_rate': [ 0.5, 0.7],
    'batch_size': [16, 32]
}

#Performing GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=3, scoring=custom_scorer)
grid_result = grid.fit(train_gen, validation_data=validation_gen)

#Summarizing the results of GridSearchCV
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")
best_model = grid_result.best_estimator_.model

In [None]:
#Evaluating the model on the test set
test_loss, test_acc = best_model.evaluate(test_gen)
print(f'Test accuracy: {test_acc}')

In [None]:
#Fine-tuning the best model
callbacksnew = [
    ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min'),
    EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
]

class_weights_new = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_gen.classes),
    y=train_gen.classes
)
class_weightsn = dict(enumerate(class_weights_new))

historynew = best_model.fit(train_gen, validation_data=validation_gen, epochs=10, class_weight=class_weightsn, callbacks=callbacksnew)

In [None]:
#Evaluating the model on the test set
test_loss, test_acc = best_model.evaluate(test_gen)
print(f'Test accuracy: {test_acc}')

In [None]:
#Generating confusion matrix
def get_predictions_and_labels(generator, model):
    all_preds = []
    all_labels = []

    # Iterate over batches from the generator
    for batch in generator:
        imgs, labels = batch
        preds = model.predict(imgs)
        all_preds.extend(np.argmax(preds, axis=1))
        all_labels.extend(np.argmax(labels, axis=1))

    return np.array(all_preds), np.array(all_labels)
#Getting predictions and true labels from the test generator
y_pred, y_true = get_predictions_and_labels(test_gen, best_model)

In [None]:
#Computing the confusion matrix
cm = confusion_matrix(y_true, y_pred)
print('Confusion Matrix:\n', cm)

#Plotting confusion matrix
def plot_confusion_matrix(cm, class_names):
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

# Example usage: assuming you have a list of class names
class_names = list(test_gen.class_indices.keys())
plot_confusion_matrix(cm, class_names)

In [None]:
#Loading the trained model
model = tf.keras.models.load_model("C:\\Users\\user\\OneDrive - Ashesi University\\intro to ai\\venv\\best_model.keras")

In [None]:
#Loading additional information from Excel
info_df = pd.read_excel("C:/Users/user/OneDrive - Ashesi University/intro to ai/Nigerianfood_additionalinfo.xlsx")

In [None]:
#Preprocessing the image for prediction
def preprocess_image(img):
    img = img.resize((299, 299))  # Resize image to the model's expected input size
    img_array = np.array(img, dtype=np.float32)  # Convert image to numpy array with float32 type
    if img_array.ndim == 2:  # Check if image is grayscale
        img_array = np.stack([img_array] * 3, axis=-1)  # Convert grayscale to RGB
    img_array /= 255.0  # Normalize to [0, 1]
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array

In [None]:
#Getting the food name based on predicted class index
def get_food_name(predicted_class):
    food_names = [
        'Abacha and Ugba', 'Akara and Eko', 'Amala and Gbegiri-Ewedu', 'Asaro', 'Boli(Bole)', 
        'Chin Chin', 'Egusi Soup', 'Ewa-Agoyin', 'Fried plantains(Dodo)', 'Jollof Rice', 
        'Meat Pie', 'Moin-moin', 'Nkwobi', 'Okro Soup', 'Pepper Soup', 'Puff Puff', 
        'Suya', 'Vegetable Soup'
    ]
    return food_names[predicted_class]

In [1]:
#Getting additional info based on the food name
def get_additional_info(food_name):
    if food_name in info_df['food_name'].values:
        info = info_df[info_df['food_name'] == food_name].iloc[0]
        return {
            'Origin or State': info['Origin_or_State'],
            'Popular Countries': info['Pop_Countries'],
            'Health Benefits': info['Health_Benefits'],
            'Calories': info['calories'],
            'Nutrient Ratio': info['Nutrient_Ratio'],
            'Ingredients': info['Ingredients'],
            'Protein Content': info['Protein_Content'],
            'Fat Content': info['Fat_Content'],
            'Carbohydrate Content': info['Carbohydrate_Content'],
            'Allergens': info['Allergens'],
            'Mineral Content': info['Mineral-Content'],
            'Vitamin Content': info['Vitamin_Content'],
            'Suitability': info['Suitability'],
            'Fiber Content': info['Fiber_Content']
        }
    return None

In [None]:
#Predict and retrieve additional information
def predict_and_get_info(image):
    processed_image = preprocess_image(image)
    predictions = model.predict(processed_image)
    predicted_class = np.argmax(predictions, axis=1)[0]
    food_name = get_food_name(predicted_class)
    additional_info = get_additional_info(food_name)

    return food_name, additional_info

In [None]:
#Image to be predicted
image_path = "C:\\Users\\user\\OneDrive - Ashesi University\\intro to ai\\Nigeria\\Nkwobi\\20180617_095955.jpg"

In [None]:
#Loading the image
image = Image.open(image_path)

In [None]:
#Predicting and getting additional information
food_name, additional_info = predict_and_get_info(image)

In [None]:
#Displaying results
print(f"Predicted Food: {food_name}")
if additional_info:
    for key, value in additional_info.items():
        print(f"{key}: {value}")
else:
    print("No additional information available.")