In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'chest-xray-pneumonia:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F17810%2F23812%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240918%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240918T144215Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Da28f56d4c4ed28f71d0eb40c3bfe602b5b1769f7accaf7419f9cbb5ef7650a6b1c22b43beec2e1fd3026b3a2d8577d5cde414c86669c09055e2286a751af762f125c571ae554f4819dd36996bace219897abc040d6a5fbaa8527962c6ecd9a66d229e12d46231492eb2483af0be8953aaabee2e86b8d57e738c59d6d986335ad1b9cf5dee8b9618e766ee854c0014156fb5d6beeba1e034838fede5e1715d7008d2be4f9745c6c57a94cd0cf159f0df3f07e1a980077bf9aa83d67c4e816584121c668206d0c2497a0651081b98bf6df00902a3ee91bef7e6096cf74528016a633acec81989acc226a93f176057ee17d405aad18a3e3fae3951ead1b3eafabcc'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading chest-xray-pneumonia, 2463365435 bytes compressed
Downloaded and uncompressed: chest-xray-pneumonia
Data source import complete.


In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person1463_bacteria_3811.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person1414_bacteria_3628.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person1423_bacteria_3650.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person512_virus_1029.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person735_bacteria_2638.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person1502_virus_2612.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person372_virus_755.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person1230_bacteria_3185.jpeg
/kaggle/input/chest-xray-pneumonia/chest_xray/chest_xray/train/PNEUMONIA/person70_bacteria_343.jpeg
/kaggle/input/chest-xray

In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Image data generator for augmentation
datagen = ImageDataGenerator(
    rescale=1./255,            # Normalize the pixel values
    rotation_range=15,         # Randomly rotate images by 15 degrees
    width_shift_range=0.1,     # Randomly shift images horizontally (10%)
    height_shift_range=0.1,    # Randomly shift images vertically (10%)
    shear_range=0.2,           # Apply random shear transformations
    zoom_range=0.2,            # Randomly zoom into images
    brightness_range=[0.8, 1.2],  # Adjust brightness
    fill_mode='nearest'        # Fill missing pixels with the nearest pixel
)

# Load data from the directory
train_data = datagen.flow_from_directory(
    '/kaggle/input/chest-xray-pneumonia/chest_xray/train',
    target_size=(224, 224),  # Resize to match input size for model
    batch_size=8,            # Small batch size for efficient training
    class_mode='categorical'  # Two classes: 'normal' and 'pneumonia'
)

validation_data = datagen.flow_from_directory(
    '/kaggle/input/chest-xray-pneumonia/chest_xray/val',
    target_size=(224, 224),
    batch_size=8,
    class_mode='categorical'
)

from tensorflow.keras.applications import EfficientNetB0, DenseNet121
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Concatenate, Flatten

# Input layer
input_shape = (224, 224, 3)
inputs = tf.keras.Input(shape=input_shape)

# EfficientNetB0 feature extraction
efficient_net = EfficientNetB0(include_top=False, weights='imagenet', input_tensor=inputs)
efficient_net_output = GlobalAveragePooling2D()(efficient_net.output)

# DenseNet121 feature extraction
dense_net = DenseNet121(include_top=False, weights='imagenet', input_tensor=inputs)
dense_net_output = GlobalAveragePooling2D()(dense_net.output)

# Concatenate features from both networks
concatenated_features = Concatenate()([efficient_net_output, dense_net_output])

# Add Dropout for regularization
concatenated_features = Dropout(0.3)(concatenated_features)

# Final classification layer
outputs = Dense(2, activation='softmax')(concatenated_features)
model = tf.keras.Model(inputs, outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [4]:
# Train the model
history = model.fit(train_data, validation_data=validation_data, epochs=5)
model.save("history")

Epoch 1/5


  self._warn_if_super_not_called()


[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m430s[0m 247ms/step - accuracy: 0.8900 - loss: 0.3064 - val_accuracy: 0.5000 - val_loss: 12.0213
Epoch 2/5
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 222ms/step - accuracy: 0.9421 - loss: 0.1557 - val_accuracy: 0.6875 - val_loss: 0.5617
Epoch 3/5
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 221ms/step - accuracy: 0.9630 - loss: 0.1074 - val_accuracy: 0.8125 - val_loss: 0.8608
Epoch 4/5
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 220ms/step - accuracy: 0.9687 - loss: 0.0870 - val_accuracy: 0.5625 - val_loss: 98.6624
Epoch 5/5
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 224ms/step - accuracy: 0.9712 - loss: 0.0811 - val_accuracy: 0.9375 - val_loss: 0.1278


ValueError: Invalid filepath extension for saving. Please add either a `.keras` extension for the native Keras format (recommended) or a `.h5` extension. Use `model.export(filepath)` if you want to export a SavedModel for use with TFLite/TFServing/etc. Received: filepath=history.

In [6]:
# Image data generator for the test set (no augmentation, just rescaling)
test_datagen = ImageDataGenerator(rescale=1./255)

# Load the test data
test_data = test_datagen.flow_from_directory(
    '/kaggle/input/chest-xray-pneumonia/chest_xray/test',  # Path to the test dataset
    target_size=(224, 224),  # Resize to match input size for model
    batch_size=8,            # Batch size can be the same as training
    class_mode='categorical',  # Two classes: 'normal' and 'pneumonia'
    shuffle=False            # Important: don't shuffle for evaluation
)


# Evaluate the model
test_loss, test_acc = model.evaluate(test_data)
print(f'Test accuracy: {test_acc}')


Found 624 images belonging to 2 classes.
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 91ms/step - accuracy: 0.9434 - loss: 0.1589
Test accuracy: 0.9182692170143127


In [7]:
model.save("model.h5")



In [8]:
model.save("model.keras")

In [9]:
tf.saved_model.save(model, "saved_model")