In [1]:
from tensorflow.keras.layers.experimental.preprocessing import Rescaling, RandomZoom
from code.data_loader import get_datasets
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.metrics import Recall
import tensorflow as tf
tf.random.set_seed(42)
import pandas as pd

# Configuration settings
config = {
    "batch_size": 64,
    "image_size": (64, 64),
    "color_mode": "grayscale",
    "label_mode": "binary",
    "shuffle_buffer_size": 1000
}

# Creating a Rescaling layer externally
rescaling_layer = Rescaling(1./255)

# Directories
directories = {
    "train": "data/chest_xray/new_train",
    "test": "data/chest_xray/new_test",
    "val": "data/chest_xray/new_val"
}

# Loading datasets with external preprocessing
train_ds, test_ds, val_ds = get_datasets(directories, config, preprocessing_layer=rescaling_layer)


2024-02-24 23:15:08.415108: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 4684 files belonging to 2 classes.
Found 587 files belonging to 2 classes.
Found 585 files belonging to 2 classes.


In [None]:
## Data Preparation

Describe and justify the process for preparing the data for analysis.

***
Questions to consider:
* Were there variables you dropped or created?
* How did you address missing values or outliers?
* Why are these choices appropriate given the data and the business problem?
***

In [None]:
# from tensorflow.keras.utils import image_dataset_from_director
# from tensorflow.keras.layers.experimental.preprocessing import Rescaling
# from tensorflow.keras.callbacks import EarlyStopping
# import matplotlib.pyplot as plt
# from tensorflow.keras.regularizers import l1, l2
# from tensorflow.keras import layers
# import warnings
# warnings.filterwarnings('ignore')
# from tensorflow.keras.optimizers import SGD
# import time
# import random
# random.seed(42)
# import numpy as np
# np.random.seed(42)


model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape= (64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')])

model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=Recall(name='recall'))
    
history = model.fit(train_ds,
                    epochs=50,
                    validation_data=val_ds,
                    verbose=0)
        
train_scores = model.evaluate(train_ds)
val_scores = model.evaluate(val_ds)
num_metrics = int(len(history.history.keys())/2)
metrics_names = list(history.history.keys())[:num_metrics]
diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
print('------------------------------')
print('')

In [None]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', input_shape= (64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dense(1, activation='sigmoid')])

model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=Recall(name='recall'))
    
history = model.fit(train_ds,
                    epochs=50,
                    validation_data=val_ds,
                    verbose=1)
        
train_scores = model.evaluate(train_ds)
val_scores = model.evaluate(val_ds)
num_metrics = int(len(history.history.keys())/2)
metrics_names = list(history.history.keys())[:num_metrics]
diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
print('------------------------------')
print('')


In [2]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', input_shape= (64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=Recall(name='recall'))
    
history = model.fit(train_ds,
                    epochs=1,
                    validation_data=val_ds,
                    verbose=1)
        
train_scores = model.evaluate(train_ds)
val_scores = model.evaluate(val_ds)
num_metrics = int(len(history.history.keys())/2)
metrics_names = list(history.history.keys())[:num_metrics]
diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
print('------------------------------')
print('')



Unnamed: 0,loss,recall
Train,0.231767,0.899649
Val,0.225081,0.885246
Diff,-0.006686,-0.014403


------------------------------



In [None]:
 # Define your data augmentation
data_augmentation = Sequential([
  layers.RandomZoom(0.3),
  # Add more augmentation layers if needed
])

# Apply augmentation to the training dataset
augmented_train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', input_shape= (64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')])

model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=Recall(name='recall'))
    
history = model.fit(train_ds,
                    epochs=1,
                    validation_data=val_ds,
                    verbose=1)
        
train_scores = model.evaluate(train_ds)
val_scores = model.evaluate(val_ds)
num_metrics = int(len(history.history.keys())/2)
metrics_names = list(history.history.keys())[:num_metrics]
diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
print('------------------------------')
print('')

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define your data augmentation
data_augmentation = Sequential([
  layers.RandomZoom(0.2),
  # Add more augmentation layers if needed
])

# Apply augmentation to the training dataset
augmented_train_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

# Define the model without the RandomZoom layer
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[Recall(name='recall')])

# Train with the augmented dataset
history = model.fit(augmented_train_ds, epochs=1, validation_data=val_ds, verbose=1)


In [None]:
model = Sequential([
    # RandomZoom(0.3),
    Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal',input_shape=(64, 64, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu', kernel_initializer='he_normal'),
    Dropout(0.5),  
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=Recall(name='recall'))
    
history = model.fit(train_ds,
                    epochs=76,
                    validation_data=val_ds,
                    verbose=1)

In [None]:
train_scores = model.evaluate(train_ds)
val_scores = model.evaluate(val_ds)
num_metrics = int(len(history.history.keys())/2)
metrics_names = list(history.history.keys())[:num_metrics]
diff_scores = [b - a for a, b in zip(train_scores, val_scores)]
display(pd.DataFrame([train_scores,val_scores,diff_scores],index=['Train','Val','Diff'],columns=metrics_names))
print('------------------------------')
print('')

In [None]:
## Data Modeling
Describe and justify the process for analyzing or modeling the data.

Questions to consider:

How did you analyze or model the data?
How did you iterate on your initial approach to make it better?
Why are these choices appropriate given the data and the business problem?

In [None]:
## Evaluation
Evaluate how well your work solves the stated business problem.

***
Questions to consider:
* How do you interpret the results?
* How well does your model fit your data? How much better is this than your baseline model?
* How confident are you that your results would generalize beyond the data you have?
* How confident are you that this model would benefit the business if put into use?
***

In [None]:
## Conclusions
Provide your conclusions about the work you've done, including any limitations or next steps.

***
Questions to consider:
* What would you recommend the business do as a result of this work?
* What are some reasons why your analysis might not fully solve the business problem?
* What else could you do in the future to improve this project?
***