Basic version already sort of done in Collab

Transfer model with VGG and ResNEt
Choose 1 or can keep both types

In [5]:
!pip install --upgrade keras-cv tensorflow
!pip install --upgrade keras



In [6]:
import tensorflow as tf
from tensorflow import keras
import keras_cv
from keras_cv import layers as cv_layers
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle
import glob
import cv2
import re
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [7]:
# Check version
print(tf.__version__)
print(keras.__version__)
print(keras_cv.__version__)

2.17.0
3.6.0
0.9.0


In [8]:
# Connect to Drive folder with Data
from google.colab import drive
drive.mount('/content/drive')
folder_path = '/content/drive/My Drive/Sarah_Master_Project/Data/Colab_data'

#TODO: this is for Colab obviously change this if in vscode

Mounted at /content/drive


In [9]:
image_file = os.path.join(folder_path, 'Kenya2019_array_augmented.npy')
images = np.load(image_file, allow_pickle=True)
images = images.item()


In [10]:
df = pd.read_csv(os.path.join(folder_path, 'clustered_geopoints.txt'), sep = '\t')
#labels = df['avg_wealthscore'].values
df['avg_roofcat'] = df['avg_roofcat'].replace(0.5, 0)
labels = df['avg_roofcat'].values
image_codes = df['image_code'].astype(str).values

#Remove nan
non_nan = ~np.isnan(labels)
labels = labels[non_nan]
image_codes = image_codes[non_nan]

In [11]:
df['avg_roofcat'].value_counts()

Unnamed: 0_level_0,count
avg_roofcat,Unnamed: 1_level_1
1.0,2190
0.0,436


In [12]:
# Normalise images
images = {key: np.array(img).astype('float32') / 255.0 for key, img in images.items()}

X = np.array([images[code] for code in image_codes if code in images])


print(f"Image data shape: {X.shape}")
print(f"Labels shape: {labels.shape}")

Image data shape: (2626, 224, 224, 3)
Labels shape: (2626,)


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


# Flat arrays
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)


In [14]:
# Preprocess
NUM_CLASSES = 2
BATCH_SIZE = 16


def preprocess_data(images, labels, augment=False):
    labels = tf.cast(labels, tf.int32)
    if NUM_CLASSES > 1:  
        labels = tf.one_hot(labels, NUM_CLASSES)
    inputs = {"images": images, "labels": labels}
    outputs = inputs
    return outputs['images'], outputs['labels']

In [15]:
# Convert your images and labels to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

# Apply preprocessing, batching, and prefetching to datasets
train_dataset = train_dataset.batch(BATCH_SIZE).map(
    lambda x, y: preprocess_data(x, y, augment=True),
    num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)

test_dataset = test_dataset.batch(BATCH_SIZE).map(
    lambda x, y: preprocess_data(x, y, augment=False),
    num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)


# Model: RESNET 50

For vgg change backbone

backbone = keras_cv.models.EfficientNetV2Backbone.from_preset("efficientnetv2_b0_imagenet")


In [16]:
backbone = keras_cv.models.ResNetBackbone.from_preset("resnet50_imagenet")


if NUM_CLASSES > 1:  # For classification
    activation = "softmax"
    loss = 'categorical_crossentropy'
else:  # For regression
    activation = "linear"
    loss = 'mean_squared_error'

model = keras_cv.models.ImageClassifier(
    backbone=backbone,
    num_classes=NUM_CLASSES,
    activation=activation,
)

model.compile(
    loss=loss,
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    metrics=['accuracy'] if NUM_CLASSES > 1 else ['mae']
)

Downloading from https://www.kaggle.com/api/v1/models/keras/resnetv1/keras/resnet50_imagenet/2/download/config.json...


100%|██████████| 777/777 [00:00<00:00, 1.79MB/s]


Downloading from https://www.kaggle.com/api/v1/models/keras/resnetv1/keras/resnet50_imagenet/2/download/model.weights.h5...


100%|██████████| 90.3M/90.3M [00:02<00:00, 36.3MB/s]


In [17]:
model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=16,
    verbose =1
)

Epoch 1/16
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 390ms/step - accuracy: 0.6620 - loss: 0.6145 - val_accuracy: 0.7966 - val_loss: 0.6566
Epoch 2/16
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 147ms/step - accuracy: 0.8866 - loss: 0.3093 - val_accuracy: 0.1540 - val_loss: 0.7711
Epoch 3/16
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 149ms/step - accuracy: 0.9453 - loss: 0.1801 - val_accuracy: 0.1540 - val_loss: 0.7888
Epoch 4/16
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 151ms/step - accuracy: 0.9904 - loss: 0.0842 - val_accuracy: 0.1844 - val_loss: 0.9819
Epoch 5/16
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 154ms/step - accuracy: 0.9984 - loss: 0.0356 - val_accuracy: 0.1597 - val_loss: 1.8356
Epoch 6/16
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 152ms/step - accuracy: 1.0000 - loss: 0.0180 - val_accuracy: 0.1559 - val_loss: 2.0803
Epoch 7/1

<keras.src.callbacks.history.History at 0x7fe5e31b7070>

In [18]:
model.evaluate(test_dataset)

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.8486 - loss: 0.4421


[0.4350639283657074, 0.8612167239189148]

In [23]:
from sklearn.metrics import mean_absolute_error

y_pred = model.predict(test_dataset)
y_pred = np.argmax(y_pred, axis=1)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
Mean Absolute Error: 0.13878326996197718


In [22]:
print(y_pred.shape)
print(y_test.shape)

(526, 2)
(526,)


In [24]:
from sklearn.metrics import r2_score

r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')


R-squared: -0.06527951172145952


In [25]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.55      0.54      0.55        81
         1.0       0.92      0.92      0.92       445

    accuracy                           0.86       526
   macro avg       0.73      0.73      0.73       526
weighted avg       0.86      0.86      0.86       526

