In [None]:
import csv
import os
import cv2
import numpy as np
import pandas as pd
import imgaug.augmenters as iaa
from sklearn.model_selection import train_test_split, KFold
from sklearn.utils import shuffle
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn import metrics
from sklearn.metrics import precision_score, recall_score, f1_score, auc
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications.resnet50 import preprocess_input
from keras.utils import to_categorical

### Safe the filenames to the csv file and read it

In [None]:
# safe the filenames of data to the csv file
for split in ["training", "validation", "evaluation"]:
    with open("/kaggle/working/dataset-1-" + split +".csv", "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["filename", "ground_truth"])
    for food_type, i in zip(["Egg", "Meat", "Noodles-Pasta", "Rice", "Vegetable-Fruit"], [0, 1, 2, 3, 4]):
        filenames = os.listdir("/kaggle/input/food11-image-dataset/"+ split + "/" + food_type)
        with open("/kaggle/working/dataset-1-" + split +".csv", "a", newline="") as csvfile:
            writer = csv.writer(csvfile)
            for filename in filenames:
                writer.writerow([food_type +"/" + filename, i])

# read the csv file
train_data = pd.read_csv(f'/kaggle/working/dataset-1-training.csv')
valid_data = pd.read_csv(f'/kaggle/working/dataset-1-validation.csv')
test_data = pd.read_csv(f'/kaggle/working/dataset-1-evaluation.csv')

# shuffle the data 
train_data = shuffle(train_data)
valid_data = shuffle(valid_data)
test_data = shuffle(test_data)

### Preprocess the data

In [None]:
# different amounts of training data
# train_data = train_data.sample(frac=0.5)

# preprocess the data

# data augmentation
seq = iaa.Sequential([
    iaa.Fliplr(p=0.5), # horizontally flip 50% of all images
    iaa.Affine(rotate=(-20, 20), mode='symmetric'), # rotate the images between -20 and 20 degrees, use symmetric padding mode
    iaa.Crop(percent=(0, 0.2)), # crop images by 0 to 20% of their height/width
])

# read the image files
x_train = []
y_train = []
for i, row in train_data.iterrows(): # iterate each row
    img = cv2.imread("/kaggle/input/food11-image-dataset/training/" + row['filename'])
    img = cv2.resize(img, (100, 100)) # resize the image size
#     img = preprocess_input(img) # standardize
    x_train.append(img)
    y_train.append(row['ground_truth'])
    for j in range(5): # generate x augmented images per original image
        img_aug = seq(image=img)
        img_aug = cv2.resize(img_aug, (100, 100))
        x_train.append(img_aug)
        y_train.append(row['ground_truth'])
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape)

x_valid = []
y_valid = []
for i, row in valid_data.iterrows():
    img = cv2.imread("/kaggle/input/food11-image-dataset/validation/" + row['filename'])
    img = cv2.resize(img, (100, 100))
#     img = preprocess_input(img) # standardize
    x_valid.append(img)
    y_valid.append(row['ground_truth'])
    for j in range(5): # generate x augmented images per original image
        img_aug = seq(image=img)
        img_aug = cv2.resize(img_aug, (100, 100))
        x_valid.append(img_aug)
        y_valid.append(row['ground_truth'])
x_valid = np.array(x_valid)
y_valid = np.array(y_valid)
print(x_valid.shape)

x_test = []
y_test = []
for i, row in test_data.iterrows():
    img = cv2.imread("/kaggle/input/food11-image-dataset/evaluation/" + row['filename'])
    img = cv2.resize(img, (100, 100))
#     img = preprocess_input(img) # standardize
    x_test.append(img)
    y_test.append(row['ground_truth'])
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_test.shape)

### ResNet

In [None]:
# convert the labels to one_hot_encoding
y_train_one_hot = to_categorical(y_train, num_classes=5)
y_valid_one_hot = to_categorical(y_valid, num_classes=5)
y_test_one_hot = to_categorical(y_test, num_classes=5)

# define the model
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(100, 100, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(5, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# freeze the weight of ResNet50
for layer in base_model.layers:
    layer.trainable = False

# compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

model.fit(x_train, y_train_one_hot, validation_data=(x_valid, y_valid_one_hot), epochs=5)
print("\n")

# calculate and print the results
test_loss, test_acc = model.evaluate(x_test, y_test_one_hot)
test_pred = model.predict(x_test)
test_pred_classes = np.argmax(test_pred, axis=1)
precision = precision_score(y_test, test_pred_classes, average='weighted')
recall = recall_score(y_test, test_pred_classes, average='weighted')
f1 = f1_score(y_test, test_pred_classes, average='weighted')
print("\n")
print(f"Test accuracy: {test_acc:.3f}")
print(f'Precision: {precision:.3f}')
print(f'Recall: {recall:.3f}')
print(f'F1 score: {f1:.3f}')

### LogisticRegression, KNN, Decision Tree

In [None]:
# # logisticRegression model
# model = LogisticRegression(penalty='l1', C=0.01, solver='liblinear')

# # KNN model
# # model = KNeighborsClassifier(n_neighbors=5)

# # DesisionTree
# # model = DecisionTreeClassifier(max_depth=5, random_state=42)

# # train model on all training data
# model.fit(x_train, y_train)

# # predict test data
# y_pred_one_hot = model.predict_proba(x_test)
# y_pred = np.argmax(y_pred_one_hot, axis=1)

# # calculate accuracy, precision, recall, f1
# precision = metrics.precision_score(y_test, y_pred, average='weighted')
# recall = metrics.recall_score(y_test, y_pred, average='weighted')
# accuracy = metrics.accuracy_score(y_test, y_pred)
# f1 = metrics.f1_score(y_test, y_pred, average='weighted')

# print("Test data:")
# print(f'Accuracy: {accuracy:.3f}')
# print(f'Precision: {precision:.3f}')
# print(f'Recall: {recall:.3f}')
# print(f'F1 Score: {f1:.3f}')