In [None]:
import sys
!pip install kecam --target=/kaggle/working/mysitepackages
sys.path.append('/kaggle/working/mysitepackages')

In [None]:
import os
import numpy as np
import pandas as pd
import PIL.Image as Image
import tensorflow as tf
import json
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.layers import Dense, Concatenate, Dropout
from tensorflow.keras import Model, Input
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import layers
from keras_cv_attention_models import tinyvit
from keras_cv_attention_models import efficientnet
from keras.layers import GlobalAveragePooling2D

red_on_plat = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=0,
    mode='auto',
    min_delta=0,
    cooldown=0,
    min_lr=0,
)

In [None]:
HEIGHT = 256
WIDTH = 1024
NUM_CLASSES = 12017 # CHANGE TO NUMBER OF CELLS

def data_augmentation(image):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2)
    image = tf.image.random_hue(image, max_delta=0.02)
    return image

def process_path_labels(dir_paths):
    image_paths = []
    output1_list = []
    for dir_path in dir_paths:
        img_path = os.listdir(dir_path)[0]
        img_path = os.path.join(dir_path, img_path)

        output1 = np.zeros(NUM_CLASSES) 

        path_info = dir_path.split('/')[-1].split(',')
        output1[int(path_info[0])] = 1

        output1_list.append(output1)

        image_paths.append(img_path)
    return image_paths, output1_list 

def parse_image(filename):
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image, channels=3)
    return image

def process_image_and_label(filename, label):
    image = parse_image(filename)
    image = data_augmentation(image)
    return image, label

def load_and_preprocess_data(image_paths, output1): 
    image_dataset = tf.data.Dataset.from_tensor_slices(image_paths)
    label1_dataset = tf.data.Dataset.from_tensor_slices(output1)

    dataset = tf.data.Dataset.zip((image_dataset, (label1_dataset))) 
    dataset = dataset.map(process_image_and_label, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset

def prepare_for_training(ds, cache=False, shuffle_buffer_size=5000, batch_size=256):
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.repeat()
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds


In [None]:
def initialize_model(inp_shape=(HEIGHT,WIDTH,3),out_1_shape=(NUM_CLASSES),weights_initial='imagenet',trainable=True,feature_model=InceptionResNetV2):
    print('Initialising Network')
    input_layer = Input(shape=inp_shape)
    
    feature_extraction = feature_model(include_top=False,weights=weights_initial,input_shape=inp_shape,pooling='avg',input_tensor=input_layer)
    feature_extraction.trainable = trainable

    embed = feature_extraction.output
    
    out_1 = Dense(out_1_shape,activation='softmax',name='Grid')(embed)
    
    model = Model(inputs=input_layer,outputs=out_1)
        
    # model.summary()
    print('Network Initialised and compiled. Input shape: {}, Output shape: {}'.format(model.input_shape,model.output_shape))
    print('Loss: Categorical Cross Entropy')
    return model

"""def initialize_model():
    input_layer = tf.keras.Input(shape=(HEIGHT, WIDTH, 3))
    
    feature_extraction = tinyvit.TinyViT_21M(input_shape=(HEIGHT, WIDTH, 3), num_classes=0, pretrained='imagenet21k-ft1k')(input_layer)
    feature_extraction.trainable = True
    
    gap = GlobalAveragePooling2D()(feature_extraction) 
    out_1 = tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name='Grid')(gap)
    
    model = tf.keras.Model(inputs=input_layer, outputs=out_1)
    print('Model initialized.')
    return model"""



In [None]:
dir_path = '/kaggle/input/500k-images/500k_world'  # CHANGE TO RIGHT DIRECTORY
json_file = '/kaggle/input/500k-split/500k_split.json' # preprocessed paths CHANGE TO RIGHT PATH
with open(json_file, 'r') as infile:
    data_dict = json.load(infile)
    
train_paths = data_dict['train_paths']
val_paths = data_dict['val_paths']
test_paths = data_dict['test_paths']

In [None]:
print('Preparing for images for training.')
batch_size = 256
image_paths_train, output1_train = process_path_labels(train_paths)
image_paths_val, output1_val = process_path_labels(val_paths)

train_ds = load_and_preprocess_data(image_paths_train, output1_train)
val_ds = load_and_preprocess_data(image_paths_val, output1_val)

train_ds = prepare_for_training(train_ds, batch_size=batch_size)
val_ds = prepare_for_training(val_ds, batch_size=batch_size)
print('Finished image preparation.')

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect('local')
tpu_strategy = tf.distribute.TPUStrategy(tpu)
with tpu_strategy.scope():
    model = initialize_model()
    model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001), 
                  loss=tf.keras.losses.CategoricalCrossentropy(), 
                  metrics=['accuracy'])

history_classifier = model.fit(train_ds, steps_per_epoch=len(train_paths)//batch_size, epochs=10, validation_data=val_ds, validation_steps=len(val_paths)//batch_size, callbacks=[red_on_plat])
model.save('500k_1_Model')
model.save_weights('500k_weights.h5')

hist_df = pd.DataFrame(history_classifier.history) 
hist_df.to_csv('history_classifier.csv')

In [None]:
"""print('Evaluation')

image_paths_test, output1_test = process_path_labels(test_paths) 
test_ds = load_and_preprocess_data(image_paths_test, output1_test) 

def prepare_for_testing(ds, batch_size=256):
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

test_ds = prepare_for_testing(test_ds, batch_size=256)

# Evaluate model:
test_loss, test_accuracy = model.evaluate(test_ds)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")"""