In [None]:
import numpy as np
import geopandas as gpd
import pandas as pd
import torch
import os
import time

from torch import nn, Tensor
from torchvision.models import resnet50, inception_v3
from sklearn.neighbors import NearestNeighbors
from torchvision import transforms
from datetime import datetime
from torch.utils.data import DataLoader
from PIL import Image
from tqdm import tqdm


pd.set_option('display.max_columns', None)

In [None]:
class Embedder(nn.Module):

    def __init__(self):

        super(Embedder, self).__init__()

        # Initialize model
        self.model = inception_v3(pretrained=True)

        # Remove final affine layer in order to obtain feature extraction
        self.model.fc = nn.Identity()

        # Make sure dropout is not used
        self.model.eval()

        # Freeze parameters
        for param in self.model.parameters():
            param.requires_grad = False

    def forward(self, images):

        return self.model(images)

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Set path to data directories
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Current Device:", device.type)

# if device.type == "cpu":
#     DATA_DIR = "/users/kevin/ess164/final_project/"
# else:
#     DATA_DIR = "/home/groups/fischer/CS231n/"

# DATA_FILE = "drive/MyDrive/semester 4/csci 1470: final project/test_set.geojson"
DATA_FILE = "drive/MyDrive/semester 4/csci 1470: final project/training_set.geojson"
transform = transforms.Compose(
    [
        transforms.CenterCrop(299),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)

Current Device: cuda


In [None]:
# Load cleaned dataset
AERIAL_IMAGES_DIR = "drive/MyDrive/semester 4/csci 1470: final project/datasetimages/train/aerial_embeddings_julia"
STREETVIEW_IMAGES_DIR = "drive/MyDrive/semester 4/csci 1470: final project/datasetimages/train/streetview_embeddings_julia"

data_df = gpd.read_file(DATA_FILE)
data_df.head()
data_df = data_df.iloc[0:10000].copy()
data_df.head()

Unnamed: 0,_count,_mean,_variance,CURRENT_ENERGY_RATING,STREET_ADDRESS,county,streetview,aerial,geometry
0,0.026031,0.345563,0.002113278,E,"30 BROWNSHILL GREEN ROAD , Coventry, CV6 2DT",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.53086 52.42845, -1.53091 52.42839..."
1,0.011097,0.406351,0.002842315,D,"253, Abingdon Road, Oxford, OX1 4TH",oxford,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.25049 51.73521, -1.25048 51.73521..."
2,0.005652,0.368617,0.01354495,E,"34 Paxton Road, Coventry, CV6 1AG",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.52776 52.41461, -1.52788 52.41462..."
3,0.01399,0.306281,1.345053e-27,E,"54 Dickens Road, Coventry, CV6 2JR",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.52866 52.4333, -1.52865 52.43327,..."
4,0.030122,0.294832,0.0002894256,D,"21 Linwood Drive, Coventry, CV2 2LZ",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.44713 52.43537, -1.44715 52.43517..."


**Adds embeddings to the a directory to the dataframe and one of the test, train, or val folders**

In [None]:
tqdm.pandas()

def embed_aerial_view(row):

    try:
        aerial_string_index = row['aerial'].find("drive/MyDrive")
        aerial_path = row['aerial'][aerial_string_index:]
        av_image = Image.open(aerial_path).convert("RGB") ##'aerial' includes .jpg at the end
        av_transformed = transform(av_image)
        av_embedded = embedder(av_transformed.unsqueeze(0))

        av_embedded = av_embedded.cpu().detach().numpy()

        address_string = row['aerial'].split('aerial/')[1].replace('.jpg','.npz') ##everything after aerial/

        filepath = os.path.join(AERIAL_IMAGES_DIR, address_string)
        np.savez_compressed(filepath, av_embedded)
        return filepath

    except Exception as e:
        print(f"aerial failed because: {e}")
        return np.nan

def embed_street_view(row):

    try:
        streetview_string_index = row['streetview'].find("drive/MyDrive")
        streetview_path = row['streetview'][streetview_string_index:]
        sv_image = Image.open(streetview_path).convert("RGB")
        sv_transformed = transform(sv_image)
        sv_embedded = embedder(sv_transformed.unsqueeze(0))

        sv_embedded = sv_embedded.cpu().detach().numpy()

        address_string = row['streetview'].split('streetview/')[1].replace('.jpg','.npz') ##everything after streetview/

        filepath = os.path.join(STREETVIEW_IMAGES_DIR, address_string)
        np.savez_compressed(filepath, sv_embedded)
        return filepath

    except Exception as e:
        print(f"streetview failed because: {e}")
        return np.nan

embedder = Embedder()

data_df['embedded_aerial_path'] = np.nan
data_df['embedded_street_path'] = np.nan

# data_df['embedded_aerial_path'] = data_df.progress_apply(embed_aerial_view, axis=1) # THIS ONE IS COMPLETE FOR TEST, VAL AERIAL, NEED TO DO FOR TRAIN
data_df['embedded_street_path'] = data_df.progress_apply(embed_street_view, axis=1) # THIS ONE IS COMPLETE FOR TEST, VAL STREETVIEW, NEED TO DO FOR TRAIN

100%|██████████| 10000/10000 [1:43:44<00:00,  1.61it/s]


In [None]:
# Display GeoDataFrame with paths to embedded images
data_df

Unnamed: 0,_count,_mean,_variance,CURRENT_ENERGY_RATING,STREET_ADDRESS,county,streetview,aerial,geometry,embedded_aerial_path,embedded_street_path
0,0.007042,0.386985,0.000000,C,"16 PORTMAN CLOSE, LONDON, Westminster, W1H 6BR",westminster,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-0.15689 51.51695, -0.15703 51.51693...",,drive/MyDrive/semester 4/csci 1470: final proj...
1,0.005975,0.318649,0.000000,D,"14 Calvert Close, Coventry, CV3 5PQ",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.50494 52.3874, -1.50503 52.38745,...",,drive/MyDrive/semester 4/csci 1470: final proj...
2,0.005428,0.276488,0.000279,D,"19 William Bristow Road, Coventry, CV3 5LN",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.49754 52.3918, -1.49765 52.39179,...",,drive/MyDrive/semester 4/csci 1470: final proj...
3,0.012454,0.244667,0.002731,E,"17, Ouseley Close, Marston, Oxford, OX3 0JS",oxford,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.23487 51.76713, -1.2349 51.76717,...",,drive/MyDrive/semester 4/csci 1470: final proj...
4,0.009106,0.341013,0.010524,D,"34 St. Nicholas Street, Coventry, CV1 4BP",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.51322 52.41421, -1.51309 52.41424...",,drive/MyDrive/semester 4/csci 1470: final proj...
...,...,...,...,...,...,...,...,...,...,...,...
3586,0.006264,0.297146,0.000848,D,"45 SUSSEX ROAD , Coventry, CV5 8JW",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.53564 52.41328, -1.53558 52.41326...",,drive/MyDrive/semester 4/csci 1470: final proj...
3587,0.010919,0.509448,0.018055,E,"Flat C, 146 Ashmore Road, Westminster, W9 3DE",westminster,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-0.20275 51.5295, -0.20276 51.52955,...",,drive/MyDrive/semester 4/csci 1470: final proj...
3588,0.004510,0.424533,0.000000,D,"133 Eastcotes, Coventry, CV4 9AT",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.56228 52.40046, -1.56239 52.40046...",,drive/MyDrive/semester 4/csci 1470: final proj...
3589,0.024755,0.145999,0.007129,D,"219 Aldermans Green Road, Coventry, CV2 1PJ",coventry,/content/drive/MyDrive/semester 4/csci 1470: f...,/content/drive/MyDrive/semester 4/csci 1470: f...,"POLYGON ((-1.47275 52.44796, -1.4728 52.44783,...",,drive/MyDrive/semester 4/csci 1470: final proj...


In [None]:
# Display embedded example image (can't be displayed as image, must be array)
import matplotlib.pyplot as plt
example_img = np.load(STREETVIEW_IMAGES_DIR + "/1_Acland_Close_Headington_Oxford_OX3_7XE.npz")['arr_0.npy']
example_img

array([[0.28313094, 0.8117503 , 0.99038243, ..., 0.02529472, 0.49273926,
        0.3191418 ]], dtype=float32)

In [None]:
import shutil

ALL_AERIAL_IMAGES_DIR = "drive/MyDrive/semester 4/csci 1470: final project/datasetimages/train/aerial_embeddings_all"
ALL_STREETVIEW_IMAGES_DIR = "drive/MyDrive/semester 4/csci 1470: final project/datasetimages/train/streetview_embeddings_all"

files = os.listdir(AERIAL_IMAGES_DIR)
files = files[218:]

# Move files with tqdm progress bar
for file in tqdm(files, desc="Moving files"):
    source_path = os.path.join(STREETVIEW_IMAGES_DIR, file)
    destination_path = os.path.join(ALL_STREETVIEW_IMAGES_DIR, file)
    shutil.copy(source_path, destination_path)

print("File transfer completed!")

Moving files: 100%|██████████| 9764/9764 [1:47:50<00:00,  1.51it/s]

File transfer completed!



