In [None]:

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'muffin-vs-chihuahua-image-classification:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F2733586%2F4749004%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240622%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240622T202750Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D428067383cfc00054695b559bf65458b53aae0dcd7685d04330835bd3624d0b709b0b44c4132eab944394f09350b0a3810b31fb0cc335b4156067c9457df702a0a241155e83374efe15542d4b08cf0d1cf57cf9a2b080789cc585adf26ed29e3132a1818807864cd41e0f64c27f7fff973d47660456bdd6f2300766ff102ed828ecab39c5d1f41713b97d9e2559ed2c7192e012a20f7bf97d4b3c97455f26e0139a966670d134d0a5fe82475948677a7ac033099217cd5d99d1e59837d0efb3288aa450ba3dc8428d2eb304b449f278dcea8a44413afb6983b8fb1a3e70356a26c10f7c9e39b0faebf9b5f63a2b780ef66b087ca1fbde8a84de960f9f56c269a'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Task - to develop neural network for classification of 2 classes
At first, try to develop simple NN, see result, and apply CNN lyers to solve task.
in this notebook I will not use pre-taught models.


In [None]:
#pip install tensorflow #here I use TensorFlow library

In [None]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Rescaling, BatchNormalization, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras import regularizers
import matplotlib.pyplot as plt

In [None]:
#download data with Keras function ImageDataGenerator
train_dir = '/kaggle/input/muffin-vs-chihuahua-image-classification/test'
test_dir = '/kaggle/input/muffin-vs-chihuahua-image-classification/train'

datagen = ImageDataGenerator(rescale=1./255)  # Scaling pixels to scale [0, 1]

train_data = datagen.flow_from_directory(train_dir, target_size=(128, 128), batch_size=64, class_mode='sparse', shuffle=False)

test_data = datagen.flow_from_directory(test_dir, target_size=(128, 128), batch_size=64, class_mode='sparse', shuffle=False)

In [None]:
train_data

In [None]:
x, y = next(train_data)
print('Shape of x:', x.shape)
print('Shape of y:', y.shape)

In [None]:
first_image = x[0]
first_label = y[0]
print('Shape of first_image:', first_image.shape)
print('First label:', first_label)

In [None]:
test_data

In [None]:
#write function for   fcc nn
def fcc_nn_model():
    model= Sequential()
    model.add(Flatten (input_shape=[128,128,3]))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model


In [None]:
#and fit model
model=fcc_nn_model()
history_fcc_nn = model.fit(train_data,
          epochs=50,
          validation_data=test_data, batch_size=128)

#оценка модели на тестовых данных
test_loss, test_accuracy = model.evaluate(test_data)

In [None]:
#visualize fitting of model
def check_metrics(history, figure_name):

    train = history.history[f'{figure_name}']
    val = history.history[f'val_{figure_name}']

    # Построение графика лосса и валидационного лосса
    epochs = range(1, len(train) + 1)
    plt.plot(epochs, train, 'b', label=f'Training {figure_name}', color='orange')
    plt.plot(epochs, val, 'b', label=f'Validation {figure_name}')
    plt.title(f'Training and Validation {figure_name}')
    plt.xlabel('Epochs')
    plt.ylabel(f'{figure_name}')
    plt.legend()
    plt.show()

In [None]:
check_metrics(history_fcc_nn, 'loss')
check_metrics(history_fcc_nn, 'accuracy')

*Model try to aproximize target, but do it not better, then simple dummy model*

In [None]:
#try to add Normalizstion layers
def fcc_nn_model_2():
    model= Sequential()
    model.add(Flatten (input_shape=[128,128,3]))
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(2, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model

In [None]:
model_fcc_2=fcc_nn_model_2()
history_fcc_nn_2 = model_fcc_2.fit(train_data,
          epochs=30,
          validation_data=test_data,
          batch_size=128)

#evaluate model on test data
test_loss_fcc, test_accuracy_fcc = model_fcc_2.evaluate(test_data)

In [None]:
check_metrics(history_fcc_nn_2, 'loss')
check_metrics(history_fcc_nn_2, 'accuracy')

In [None]:
#add regularization to model
def fcc_nn_model_3():
    model= Sequential()
    model.add(Flatten(input_shape=[128,128,3]))
    model.add(Dense(128, activation='relu',
                    kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dropout(0.1))
    model.add(Dense(64, activation='relu',
                    kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dropout(0.1))
    model.add(Dense(32, activation='relu',
                    kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu',
                    kernel_regularizer=regularizers.l2(0.01)))
    model.add(Dropout(0.1))
    model.add(Dense(2, activation='softmax'))
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
model_fcc_3=fcc_nn_model_3()
history_fcc_nn_3 = model_fcc_3.fit(train_data,
          epochs=30,
          validation_data=test_data)

#оценка модели на тестовых данных
test_loss_fcc_3, test_accuracy_fcc_3 = model_fcc_3.evaluate(test_data)
print('Accuracy on test', test_accuracy_fcc_3)

**OUTPUT simple FCC NC aren't able to explain target, ley use CNN NC**

In [None]:
#add Convolution layers, which are mo suitable for solving such tasks
def conv_nn_model():
    model= Sequential()
    model.add(Conv2D(64, kernel_size=(2,2), activation='relu', input_shape=[128,128,3]))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model

In [None]:
model_1=conv_nn_model()
history_1 = model_1.fit(train_data,
          epochs=50,
          validation_data=test_data)

#evaluate model
test_loss_1, test_accuracy_1 = model_1.evaluate(test_data)

In [None]:
#посмотрим графики
check_metrics(history_1, 'loss')
check_metrics(history_1, 'accuracy')

*Very simple structure can't explain dependencies in our target*

In [None]:
#add regularization layers
def conv_nn_model_2():
    model= Sequential()
    model.add(Conv2D(64, kernel_size=(3,3), activation='relu', input_shape=[128,128,3], padding='same'))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=(3,3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(BatchNormalization())
    model.add(Conv2D(32, kernel_size=(3,3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(BatchNormalization())
    model.add(Flatten())
    model.add(Dense (32, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(16, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model

In [None]:
model_2=conv_nn_model_2()

history_2 = model_2.fit(train_data,
          epochs=50,
          validation_data=test_data)

#оценка модели на тестовых данных
test_loss_2, test_accuracy_2 = model_2.evaluate(test_data)
print('Accuracy on test', test_accuracy_2)

In [None]:
check_metrics(history_2, 'loss')
check_metrics(history_2, 'accuracy')

In [None]:
#take out regularization on Connolutional layers, as it  had bad impact on results of prevouis model
def conv_nn_model_3():
    model= Sequential()
    model.add(Conv2D(64, kernel_size=(3,3), activation='relu', input_shape=[128,128,3]))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(Conv2D(64, kernel_size=(2,2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(Flatten())
    model.add(Dense (32, activation='relu'))
    model.add(Dropout(0.01))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.01))
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model

In [None]:
model_2_2=conv_nn_model_3()

history_2_2 = model_2_2.fit(train_data,
          epochs=60,
          validation_data=test_data)

#оценка модели на тестовых данных
test_loss_2_2, test_accuracy_2_2 = model_2_2.evaluate(test_data)
print('Accuracy on test', test_accuracy_2_2)

In [None]:
check_metrics(history_2_2, 'loss')
check_metrics(history_2_2, 'accuracy')

In [None]:
#model already try to catch taget, improve it
def model_cnn():
    model= Sequential()
    model.add(Conv2D(64, kernel_size=(3,3), activation='relu', input_shape=[128,128,3]))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(Dropout(0.01))
    model.add(Conv2D(64, kernel_size=(2,2), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.01))
    model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(3,3)))
    model.add(Dropout(0.01))
    model.add(Flatten())
    model.add(Dense (32, activation='relu'))
    model.add(Dropout(0.01))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.01))
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics = ['accuracy'])
    return model

In [None]:
model_3= model_cnn()

history_3=model_3.fit(x=train_data, validation_data=test_data, epochs=60)

test_loss_3, test_accuracy_3 = model_3.evaluate(test_data)
print('Accuracy on test', test_accuracy_3)

In [None]:
highest_accuracy_3 = max(history_3.history['accuracy'])
print('Highest train accuracy:', highest_accuracy_3)

test_loss_3, test_accuracy_3 = model_3.evaluate(test_data)
print('Highest test accuracy:', test_accuracy_3)

In [None]:
check_metrics(history_3, 'loss')
check_metrics(history_3, 'accuracy')

**the best model has following structure**
-  model= Sequential()
- model.add(Conv2D(64, kernel_size=(3,3), activation='relu', input_shape=[128,128,3]))
- model.add(MaxPooling2D(pool_size=(3,3)))
- model.add(Dropout(0.01))
- model.add(Conv2D(64, kernel_size=(2,2), activation='relu'))
- model.add(MaxPooling2D(pool_size=(2,2)))
- model.add(Dropout(0.01))
- model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
- model.add(MaxPooling2D(pool_size=(3,3)))
- model.add(Dropout(0.01))
- model.add(Flatten())
- model.add(Dense (32, activation='relu'))
- model.add(Dropout(0.01))
- model.add(Dense(16, activation='relu'))
- model.add(Dropout(0.01))
- model.add(Dense(2, activation='softmax'))

- model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics = ['accuracy'])


**Best accuaracy received at 60 epochs = 0.83 without usage of any pretaught models**

*This task need to use convolutional layes to solve it*