

In [39]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/kaggle.json

# Set appropriate permissions
!chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists
mv: cannot stat 'kaggle.json': No such file or directory


In [40]:
import os
import kaggle
import gdown

def download_kaggle_dataset_to_drive(dataset_name, drive_folder_path):
    # Create a folder in Google Drive to store the downloaded dataset
    if not os.path.exists(drive_folder_path):
        os.makedirs(drive_folder_path)

    # Set the path where you want to download the dataset in the drive_folder_path
    download_path = os.path.join(drive_folder_path, dataset_name)

    # Use the Kaggle API to download the dataset files
    kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True, quiet=True)

    # List the downloaded files
    downloaded_files = os.listdir(download_path)
    print(f'Downloaded {len(downloaded_files)} files from the dataset: {dataset_name}')

# Example usage:
dataset_name = 'kmader/food41'
drive_folder_path = '/content/drive/MyDrive/KaggleDatasets'
download_kaggle_dataset_to_drive(dataset_name, drive_folder_path)
csv_file_path = os.path.join(drive_folder_path, dataset_name)


Downloaded 13 files from the dataset: kmader/food41


In [41]:
file_path = os.path.join(drive_folder_path, dataset_name)

In [42]:
file_path

'/content/drive/MyDrive/KaggleDatasets/kmader/food41'

In [26]:
import numpy as np
import pandas as pd
from pathlib import Path
import os.path

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

import tensorflow as tf

from sklearn.metrics import confusion_matrix,classification_report

In [44]:
image_dir = Path(file_path+"/images")

In [45]:
image_dir

PosixPath('/content/drive/MyDrive/KaggleDatasets/kmader/food41/images')

### Creating Dataframe


In [46]:
filepaths=list(image_dir.glob(r"**/*.jpg"))

In [47]:
labels=list(map(lambda x:os.path.split(os.path.split(x)[0])[1],filepaths))

In [48]:
len(labels)

101000

In [49]:
filepaths = pd.Series(filepaths,name="Filepath").astype(str)
labels = pd.Series(labels,name="Label")


In [50]:
images = pd.concat([filepaths,labels],axis=1)

In [51]:
images.head()

Unnamed: 0,Filepath,Label
0,/content/drive/MyDrive/KaggleDatasets/kmader/f...,macaroni_and_cheese
1,/content/drive/MyDrive/KaggleDatasets/kmader/f...,macaroni_and_cheese
2,/content/drive/MyDrive/KaggleDatasets/kmader/f...,macaroni_and_cheese
3,/content/drive/MyDrive/KaggleDatasets/kmader/f...,macaroni_and_cheese
4,/content/drive/MyDrive/KaggleDatasets/kmader/f...,macaroni_and_cheese


In [69]:
category_samples=[]

for category in images["Label"].unique():
  category_slice = images.query("Label == @category")
  category_samples.append(category_slice.sample(300,random_state=2))
image_df = pd.concat(category_samples,axis=0).sample(frac=1.0,random_state=2).reset_index(drop=True)

In [70]:
image_df

Unnamed: 0,Filepath,Label
0,/content/drive/MyDrive/KaggleDatasets/kmader/f...,pho
1,/content/drive/MyDrive/KaggleDatasets/kmader/f...,hummus
2,/content/drive/MyDrive/KaggleDatasets/kmader/f...,dumplings
3,/content/drive/MyDrive/KaggleDatasets/kmader/f...,baby_back_ribs
4,/content/drive/MyDrive/KaggleDatasets/kmader/f...,panna_cotta
...,...,...
30295,/content/drive/MyDrive/KaggleDatasets/kmader/f...,tuna_tartare
30296,/content/drive/MyDrive/KaggleDatasets/kmader/f...,gyoza
30297,/content/drive/MyDrive/KaggleDatasets/kmader/f...,grilled_salmon
30298,/content/drive/MyDrive/KaggleDatasets/kmader/f...,frozen_yogurt


#Train test Split



In [72]:
train_df, test_df = train_test_split(image_df,train_size=0.7,shuffle=True,random_state=2)

In [73]:
import tensorflow as tf

train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input,
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)


In [74]:
train_images=train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(224,224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=42,
    subset="training"
)
val_images=train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(224,224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=True,
    seed=42,
    subset="validation"
)
test_images=test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col="Filepath",
    y_col="Label",
    target_size=(224,224),
    color_mode="rgb",
    class_mode="categorical",
    batch_size=32,
    shuffle=False
)


Found 16968 validated image filenames belonging to 101 classes.
Found 4242 validated image filenames belonging to 101 classes.
Found 9090 validated image filenames belonging to 101 classes.


#Model MobileNetV2



In [75]:
pretrained_model=tf.keras.applications.MobileNetV2(
    input_shape=(224,224,3),
    include_top=False,
    weights="imagenet",
    pooling="avg"
)
pretrained_model.trainable=False

In [76]:
 inputs=pretrained_model.input

 x=tf.keras.layers.Dense(129,activation="relu")(pretrained_model.output)
 x=tf.keras.layers.Dense(129,activation="relu")(x)

outputs= tf.keras.layers.Dense(101,activation="softmax")(x)
model = tf.keras.Model(inputs,outputs)

#Training

In [77]:
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
history=model.fit(
    train_images,
    validation_data=val_images,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=10,
            restore_best_weights=True
        )
    ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

KeyboardInterrupt: ignored

#Result 1


In [None]:
results = model.evaluate(test_images,verbose=0)
print("Test Accuracy: {:.2f}%".format(results[1]*100))

In [None]:
predictions=np.argmax(model.predict(test_images),axis=1)

cm = confusion_matrix(test_images.labels,predictions)
clr = classification_report(test_images.labels,predictions,target_names=test_images.class_indices)

In [65]:
plt.figure(figsize=(30,30))
sns.heatmap(cm,annot=True,fmt="g",vmin=0,cmap="Blues",cbar=False,rotation=90)
plt.xticks(ticks=np.arange(101)+0.5,labels=test_images.class_indices)
plt.xlabel("predicted")
plt.ylabel("actual")
plt.title("confusion matrix")
plt.show()


([<matplotlib.axis.XTick at 0x7cc108313e50>,
  <matplotlib.axis.XTick at 0x7cc108313e20>,
  <matplotlib.axis.XTick at 0x7cc10838bbe0>,
  <matplotlib.axis.XTick at 0x7cc10838be20>,
  <matplotlib.axis.XTick at 0x7cc1081acb50>,
  <matplotlib.axis.XTick at 0x7cc1081ad330>,
  <matplotlib.axis.XTick at 0x7cc1081adb10>,
  <matplotlib.axis.XTick at 0x7cc1081ae2f0>,
  <matplotlib.axis.XTick at 0x7cc1081ad900>,
  <matplotlib.axis.XTick at 0x7cc1081ac250>,
  <matplotlib.axis.XTick at 0x7cc1081aec80>,
  <matplotlib.axis.XTick at 0x7cc1081af460>,
  <matplotlib.axis.XTick at 0x7cc1081afc40>,
  <matplotlib.axis.XTick at 0x7cc1081af2e0>,
  <matplotlib.axis.XTick at 0x7cc1081ad4e0>,
  <matplotlib.axis.XTick at 0x7cc1081dc6a0>,
  <matplotlib.axis.XTick at 0x7cc1081dce80>,
  <matplotlib.axis.XTick at 0x7cc1081dd660>,
  <matplotlib.axis.XTick at 0x7cc1081aee30>,
  <matplotlib.axis.XTick at 0x7cc1081dd030>,
  <matplotlib.axis.XTick at 0x7cc1081de080>,
  <matplotlib.axis.XTick at 0x7cc1081de860>,
  <matplot

Error in callback <function flush_figures at 0x7cc1ab189360> (for post_execute):


KeyboardInterrupt: ignored

In [66]:
print(clr)

              precision    recall  f1-score   support

           0       0.14      0.03      0.05        31
           1       0.20      0.54      0.29        26
           2       0.44      0.35      0.39        34
           3       0.62      0.32      0.42        25
           4       0.17      0.48      0.26        29
           5       0.29      0.18      0.22        22
           6       0.59      0.79      0.68        28
           7       0.77      0.57      0.65        30
           8       0.27      0.23      0.25        26
           9       0.48      0.45      0.47        31
          10       0.25      0.68      0.36        31
          11       0.28      0.72      0.40        25
          12       0.44      0.43      0.44        28
          13       0.32      0.43      0.37        21
          14       0.17      0.53      0.26        32
          15       0.50      0.06      0.10        36
          16       0.32      0.44      0.38        27
          17       0.25    

#model 2


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50

pretrained_model = ResNet50(
    input_shape=(224, 224, 3),
    include_top=False,
    weights="imagenet",
    pooling="avg"
)
pretrained_model.trainable = False

inputs = pretrained_model.input
x = tf.keras.layers.Dense(129, activation="relu")(pretrained_model.output)
x = tf.keras.layers.Dense(129, activation="relu")(x)

outputs = tf.keras.layers.Dense(101, activation="softmax")(x)
model = tf.keras.Model(inputs, outputs)
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    train_images,
    validation_data=val_images,
    epochs=20,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=10,
            restore_best_weights=True
        )
    ]
)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
 77/531 [===>..........................] - ETA: 1:29 - loss: 4.1880 - accuracy: 0.0657

In [None]:
results = model.evaluate(test_images,verbose=0)
print("Test Accuracy: {:.2f}%".format(results[1]*100))

In [None]:
predictions=np.argmax(model.predict(test_images),axis=1)

cm = confusion_matrix(test_images.labels,predictions)
clr = classification_report(test_images.labels,predictions,target_names=test_images.class_indices)

In [None]:
plt.figure(figsize=(30,30))
sns.heatmap(cm,annot=True,fmt="g",vmin=0,cmap="Blues",cbar=False,rotation=90)
plt.xticks(ticks=np.arange(101)+0.5,labels=test_images.class_indices)
plt.xlabel("predicted")
plt.ylabel("actual")
plt.title("confusion matrix")
plt.show()
