In [None]:
import os
import numpy as np
import pandas as pd
import PIL.Image as Image
import tensorflow as tf
import json
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.layers import Dense, Concatenate, Dropout
from tensorflow.keras import Model, Input
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras import layers
from keras.layers import GlobalAveragePooling2D

red_on_plat = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.3,
    patience=3,
    verbose=0,
    mode='auto',
    min_delta=0,
    cooldown=0,
    min_lr=0,
)

early_stop = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    min_delta = 0,
    patience = 10,
    verbose = 1,
    mode = 'auto',
    baseline = None,
    restore_best_weights = False
)

def degrees_to_radians(deg):
    pi_on_180 = 0.017453292519943295
    return deg * pi_on_180

def km_away(observation, prediction):    
    obv_rad = tf.map_fn(degrees_to_radians, observation)
    prev_rad = tf.map_fn(degrees_to_radians, prediction)
    dlon_dlat = obv_rad - prev_rad 
    v = dlon_dlat / 2
    v = tf.sin(v)
    v = v ** 2
    a = v[:,1] + tf.cos(obv_rad[:,1]) * tf.cos(prev_rad[:,1]) * v[:,0] 
    c = tf.sqrt(a)
    c = 2 * tf.math.asin(c)
    c = c * 6378.1
    final = tf.reduce_sum(c)
    final = final/tf.dtypes.cast(tf.shape(observation)[0], dtype= tf.float32)
    return final

In [None]:
HEIGHT = 256
WIDTH = 1024
NUM_CLASSES = 12017 # CHANGE TO NUMBER OF CELLS

def data_augmentation(image):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
    image = tf.image.random_saturation(image, lower=0.8, upper=1.2)
    image = tf.image.random_hue(image, max_delta=0.02)
    return image

def process_path_labels(dir_paths):
    image_paths = []
    output1_list = []
    output2_list = []

    for dir_path in tqdm(dir_paths, desc="Processing paths"):
        img_path = os.listdir(dir_path)[0]
        img_path = os.path.join(dir_path, img_path)

        output1 = np.zeros(NUM_CLASSES)  
        output2 = np.zeros(2)

        path_info = dir_path.split('/')[-1].split(',')
        output1[int(path_info[0])] = 1
        output2 = np.array([float(path_info[1]), float(path_info[2])])

        output1_list.append(output1)
        output2_list.append(output2)
        image_paths.append(img_path)
    return image_paths, output1_list, output2_list

def parse_image(filename):
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image, channels=3)
    return image

def process_image_and_label(filename, label1, label2):
    image = parse_image(filename)
    image = data_augmentation(image)
    return image, (label1, label2)

def load_and_preprocess_data(image_paths, output1, output2):
    image_dataset = tf.data.Dataset.from_tensor_slices(image_paths)
    label1_dataset = tf.data.Dataset.from_tensor_slices(output1)
    label2_dataset = tf.data.Dataset.from_tensor_slices(output2)

    dataset = tf.data.Dataset.zip((image_dataset, label1_dataset, label2_dataset))
    dataset = dataset.map(process_image_and_label, num_parallel_calls=tf.data.AUTOTUNE)
    return dataset

def prepare_for_training(ds, cache=False, shuffle_buffer_size=5000, batch_size=256):
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    ds = ds.repeat()
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

In [None]:
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
tpu_strategy = tf.distribute.TPUStrategy(tpu)
with tpu_strategy.scope():
    model = tf.keras.models.load_model('/kaggle/input/500k-model/500k_25_Model', custom_objects={'km_away': km_away})

    gap = model.get_layer('Grid').output

    for layer in model.layers:
        layer.trainable = False

    dense_2 =  Dense(250,name='mid_layer')(gap)
    drop = Dropout(0.5)(dense_2)
    dense_3 =  Dense(200,name='mid_layer_1')(drop)
    dense_4 =  Dense(150,name='mid_layer_2')(dense_3)

    dense_7 =  Dense(100,kernel_regularizer='l2',name='mid_layer_3')(dense_4)
    dense_8 =  Dense(80,kernel_regularizer='l2',name='mid_layer_4')(dense_7)
    drop = Dropout(0.5)(dense_8)
    dense_11 = Dense(50,kernel_regularizer='l2',name='mid_layer_5')(drop)
    out_2 =  Dense(2,activation='linear',name='Coords')(dense_11)

    new_model = tf.keras.Model(inputs=model.input, outputs=[model.output, out_2])

    with tf.keras.utils.custom_object_scope({'km_away': km_away}):
        new_model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=0.001), loss={'Grid': None, 'Coords': 'mae'}, metrics={'Grid': None, 'Coords': km_away})
        
    print('Model compiled.')

In [None]:
dir_path = '/kaggle/input/500k-images/500k_world'  # CHANGE TO RIGHT DIRECTORY

json_file = '/kaggle/input/500k-split/500k_split.json' # preprocessed paths CHANGE TO RIGHT PATH
with open(json_file, 'r') as infile:
    data_dict = json.load(infile)
    
train_paths = data_dict['train_paths']
val_paths = data_dict['val_paths']
test_paths = data_dict['test_paths']

In [None]:
print('starting image processing')
image_paths_train, output1_train, output2_train = process_path_labels(train_paths)
print('train processed')
image_paths_val, output1_val, output2_val = process_path_labels(val_paths)
print('val processed')

train_ds = load_and_preprocess_data(image_paths_train, output1_train, output2_train)
print('train loaded')
val_ds = load_and_preprocess_data(image_paths_val, output1_val, output2_val)
print('val loaded')

batch_size = 256
train_ds = prepare_for_training(train_ds, batch_size=batch_size)
print('train prepared')
val_ds = prepare_for_training(val_ds, batch_size=batch_size)
print('finished image processing')

In [None]:
history_classifier = new_model.fit(train_ds, steps_per_epoch=len(train_paths)//batch_size, epochs=25, validation_data=val_ds, validation_steps=len(val_paths)//batch_size, callbacks=[red_on_plat, early_stop])
new_model.save('500k_finetuned')
new_model.save_weights('500k_finetuned_weights', save_format = 'tf')

hist_df = pd.DataFrame(history_classifier.history) 
hist_df.to_csv('history_classifier.csv')

In [None]:
print('Evaluation')

image_paths_test, output1_test, output2_test = process_path_labels(test_paths) 
test_ds = load_and_preprocess_data(image_paths_test, output1_test, output2_test) 

def prepare_for_testing(ds, batch_size=256):
    ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

test_ds = prepare_for_testing(test_ds)

results = new_model.evaluate(test_ds)