In [18]:
avg_ratio = 1.3675285705588607
import tensorflow as tf
import pandas as pd
from tensorflow.keras.preprocessing import image_dataset_from_directory
tf.random.set_seed(42)
import warnings
warnings.filterwarnings('ignore')
from tensorflow.keras.metrics import BinaryAccuracy, Recall, AUC
import time
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
def create_dataset(directory, width, batch_size, ratio = avg_ratio):
    height = int(width/ratio)
    dataset = image_dataset_from_directory(directory,
                                        label_mode='binary',
                                        color_mode="grayscale", # will save memory as images are already in grayscale
                                        batch_size=batch_size,
                                        image_size=(height,width),
                                        shuffle=True,
                                        crop_to_aspect_ratio = True,
                                        seed=42)
    return dataset
from tensorflow.keras.layers.experimental.preprocessing import Rescaling

def process_dataset(dataset,buffer_size):
    # Define the rescaling layer
    rescale = Rescaling(1./255)
    
    # Normalizing the dataset
    dataset = dataset.map(lambda x, y: (rescale(x), y))
    
    # improves speed by only having to read the dataset for the first epoch
    dataset = dataset.cache()
    
    # increases generalization by shuffling elements each epoch
    dataset = dataset.shuffle(buffer_size=buffer_size, seed=42)
    
    # this automatically adjusts the number of batches
    dataset = dataset.prefetch(tf.data.AUTOTUNE)

    return dataset

def model_timer(num_epochs, batch_size, image_size, buffer_size):
    train_dir = "data/chest_xray/new_train"
    val_dir = "data/chest_xray/new_val"
    start_load = time.time()
    train_ds = process_dataset(create_dataset(train_dir, width=image_size, batch_size=batch_size,ratio = avg_ratio),buffer_size=buffer_size)
    val_ds = process_dataset(create_dataset(val_dir, width=image_size, batch_size=batch_size,ratio = avg_ratio),buffer_size=buffer_size)
    end_load = time.time()
    load_time = end_load - start_load

    for inputs, labels in train_ds.take(1):
        input_shape = inputs.shape[1:]
        break

    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal',input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu', kernel_initializer='he_normal'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    results = model.fit(train_ds,epochs=num_epochs,validation_data=val_ds,verbose=0)
    end_time = time.time()
    training_time = end_time - start_time
    print(f'Training Time: {round(training_time)} seconds')
    print('')
    return [num_epochs,batch_size, image_size,buffer_size,load_time,training_time]

In [None]:
batch_sizes = [1024, 512, 256, 128, 64, 32, 16]
image_size = [1024, 512, 256, 128, 64, 32, 16]
buffer_size = [5271,int(5271/2),int(5271/5),int(5271/10),int(5271/52)]

In [23]:
timing_data = []
for i in [5271,int(5271/2),int(5271/5),int(5271/10),int(5271/52)]:
    timing_data.append(model_timer(1, 1000, 64, i))
df = pd.DataFrame(timing_data,columns=['num_epochs', 'batch_size', 'image_size', 'buffer_size', 'load_time', 'train_time'])
df
    

Found 4684 files belonging to 2 classes.
Found 585 files belonging to 2 classes.
Training Time: 7 seconds

Found 4684 files belonging to 2 classes.
Found 585 files belonging to 2 classes.
Training Time: 9 seconds

Found 4684 files belonging to 2 classes.
Found 585 files belonging to 2 classes.
Training Time: 7 seconds

Found 4684 files belonging to 2 classes.
Found 585 files belonging to 2 classes.
Training Time: 7 seconds

Found 4684 files belonging to 2 classes.
Found 585 files belonging to 2 classes.
Training Time: 7 seconds



Unnamed: 0,num_epochs,batch_size,image_size,buffer_size,load_time,train_time
0,1,1000,64,5271,0.579167,6.861868
1,1,1000,64,2635,0.47843,8.69253
2,1,1000,64,1054,0.67893,7.008754
3,1,1000,64,527,0.445698,6.996885
4,1,1000,64,101,0.456078,6.697848


In [22]:
import pandas as pd
df = pd.DataFrame(timing_data,columns=['num_epochs', 'batch_size', 'image_size', 'buffer_size', 'load_time', 'train_time'])
df

Unnamed: 0,num_epochs,batch_size,image_size,buffer_size,load_time,train_time
0,1,1000,64,1000,0.567773,6.988521
1,1,1000,64,1001,0.463299,6.834014
2,1,1000,64,1002,0.45008,10.910749
3,1,1000,64,1003,0.484972,7.628691
4,1,1000,64,1004,0.481132,6.723069


Found 4684 files belonging to 2 classes.
Found 585 files belonging to 2 classes.


In [4]:
  # We only need the first batch to determine the input shape

Input shape: (187, 256, 1)


In [None]:
metrics=[BinaryAccuracy(name='accuracy'),
         Recall(name='recall'),
         AUC(name='auc')]

In [None]:
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import time
import numpy as np
from sklearn.datasets import make_classification

# Example model function (define your own)
def get_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal'),
        MaxPooling2D((2, 2)),
    
        Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal'),
        MaxPooling2D((2, 2)),
    
        Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal'),
        MaxPooling2D((2, 2)),
    
        Flatten(),
    
        Dense(128, activation='relu', kernel_initializer='he_normal'),
    
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    
    

# Function to train the model and measure training time
def train_and_time_model(batch_size, image_size, epochs):
    # Generate dummy data for demonstration (replace with your own dataset)
    X, y = make_classification(n_samples=1000, n_features=np.prod(image_size), n_informative=10)
    X = X.reshape((-1,) + image_size)  # Reshape to match the image size

    # Initialize the KerasClassifier with the model
    model = KerasClassifier(model=get_model, model__input_shape=image_size, batch_size=batch_size, epochs=epochs)
    
    # Start timing
    start_time = time.time()
    
    # Train the model
    model.fit(X, y)
    
    # End timing
    end_time = time.time()
    
    # Calculate training time
    training_time = end_time - start_time
    
    # Output training time, batch size, image size, and number of epochs
    print(f"Training Time: {training_time:.2f} seconds")
    print(f"Batch Size: {batch_size}")
    print(f"Image Size: {image_size}")
    print(f"Number of Epochs: {epochs}")

# Example usage
batch_size = 32
image_size = (28, 28)  # Example image size (e.g., for MNIST)
epochs = 10
train_and_time_model(batch_size, image_size, epochs)
