In [2]:
import torch
import torch.nn as nn
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split

# Load LSOA data
lsoas = pd.read_csv('data/test_lsoas.csv')
print(lsoas.columns.tolist())

# Build list of all images with their IMD scores
image_dir = 'data/images/'
records = []

for fname in os.listdir(image_dir):
    if not fname.endswith('.jpg') or fname == 'test_image.jpg':
        continue
    lsoa_code = fname.rsplit('_', 1)[0]
    match = lsoas[lsoas['LSOA11CD'] == lsoa_code]
    if not match.empty:
        score = match['IMD19'].values[0]
        records.append({
            'image_path': os.path.join(image_dir, fname),
            'lsoa': lsoa_code,
            'imd_score': float(score)
        })

records_df = pd.DataFrame(records)
print(f"Total images found: {len(records_df)}")
print(records_df.head())

['LSOA11CD', 'IMD19', 'lat', 'lon']
Total images found: 36
                      image_path       lsoa  imd_score
0    data/images/E01030408_0.jpg  E01030408    32736.0
1    data/images/E01002082_0.jpg  E01002082      546.0
2   data/images/E01030542_90.jpg  E01030542    32796.0
3  data/images/E01030353_180.jpg  E01030353    32727.0
4  data/images/E01030542_180.jpg  E01030542    32796.0


In [4]:
# Normalise IMD score to 0-1
score_min = records_df['imd_score'].min()
score_max = records_df['imd_score'].max()
records_df['imd_norm'] = (records_df['imd_score'] - score_min) / (score_max - score_min)

print(f"Score range: {score_min:.0f} to {score_max:.0f}")
print(records_df[['lsoa', 'imd_score', 'imd_norm']].drop_duplicates('lsoa'))

# Split 80/20
train_df, val_df = train_test_split(records_df, test_size=0.2, random_state=42)
print(f"\nTrain: {len(train_df)} | Val: {len(val_df)}")

Score range: 546 to 32796
         lsoa  imd_score  imd_norm
0   E01030408    32736.0  0.998140
1   E01002082      546.0  0.000000
2   E01030542    32796.0  1.000000
3   E01030353    32727.0  0.997860
6   E01000601     1192.0  0.020031
7   E01030518    32789.0  0.999783
8   E01001178     1096.0  0.017054
11  E01023839    32742.0  0.998326
14  E01002857     1012.0  0.014450

Train: 28 | Val: 8


In [5]:
class StreetViewDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['image_path']).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(row['imd_norm'], dtype=torch.float32)
        return image, label

train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], 
                         [0.229, 0.224, 0.225])
])

train_dataset = StreetViewDataset(train_df, transform=train_transform)
val_dataset = StreetViewDataset(val_df, transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

Train batches: 4
Val batches: 1


In [8]:
from sklearn.metrics import r2_score

device = torch.device('cpu')

model = models.resnet18(weights='IMAGENET1K_V1')
model.fc = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(512, 1),
    nn.Sigmoid()
)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()

best_val_loss = float('inf')
best_epoch = 0

for epoch in range(25):
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    val_loss = 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device).unsqueeze(1)
            outputs = model(images)
            val_loss += criterion(outputs, labels).item()
            all_preds.extend(outputs.cpu().numpy().flatten())
            all_labels.extend(labels.cpu().numpy().flatten())

    avg_train = train_loss / len(train_loader)
    avg_val = val_loss / len(val_loader)
    r2 = r2_score(all_labels, all_preds)

    print(f"Epoch {epoch+1:2d}/25 | Train: {avg_train:.4f} | Val: {avg_val:.4f} | R²: {r2:.3f}")

    # Save best model
    if avg_val < best_val_loss:
        best_val_loss = avg_val
        best_epoch = epoch + 1
        torch.save(model.state_dict(), 'models/resnet18_best.pth')
        print(f"  → Best model saved (epoch {best_epoch})")

print(f"\nBest model was at epoch {best_epoch} with val loss {best_val_loss:.4f}")

Epoch  1/25 | Train: 0.2164 | Val: 0.2004 | R²: 0.121
  → Best model saved (epoch 1)
Epoch  2/25 | Train: 0.0382 | Val: 0.1713 | R²: 0.249
  → Best model saved (epoch 2)
Epoch  3/25 | Train: 0.0349 | Val: 0.1340 | R²: 0.412
  → Best model saved (epoch 3)
Epoch  4/25 | Train: 0.0058 | Val: 0.0973 | R²: 0.573
  → Best model saved (epoch 4)
Epoch  5/25 | Train: 0.0190 | Val: 0.0803 | R²: 0.648
  → Best model saved (epoch 5)
Epoch  6/25 | Train: 0.0045 | Val: 0.0425 | R²: 0.814
  → Best model saved (epoch 6)
Epoch  7/25 | Train: 0.0145 | Val: 0.0296 | R²: 0.870
  → Best model saved (epoch 7)
Epoch  8/25 | Train: 0.0470 | Val: 0.0132 | R²: 0.942
  → Best model saved (epoch 8)
Epoch  9/25 | Train: 0.0009 | Val: 0.0156 | R²: 0.931
Epoch 10/25 | Train: 0.0069 | Val: 0.0219 | R²: 0.904
Epoch 11/25 | Train: 0.0085 | Val: 0.0237 | R²: 0.896
Epoch 12/25 | Train: 0.0373 | Val: 0.0161 | R²: 0.929
Epoch 13/25 | Train: 0.0007 | Val: 0.0170 | R²: 0.925
Epoch 14/25 | Train: 0.0004 | Val: 0.0180 | R²: 0.