<a href="https://colab.research.google.com/github/kelsdoerksen/giga-connectivity/blob/main/SatCLIP_Embedding_Extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Script for extracting feature embeddings for the lat,lon school locations to use for connectivity prediction

In [None]:
!rm -r sample_data .config # Empty current directory
!git clone https://github.com/microsoft/satclip.git . # Clone SatCLIP repository

In [None]:
!pip install lightning --quiet
!pip install rasterio --quiet
!pip install torchgeo --quiet
!pip install basemap --quiet

In [None]:
# Loading required packages
import sys
import pandas as pd
sys.path.append('./satclip')

import torch
from load import get_satclip

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Automatically select device

In [None]:
# Get [lon, lat] of schools as float.64 tensor to extract embeddings for

def get_coords(df):
  """
  Function to return coords of school locations
  as 2D tensor to extract GeoCLIP embeddings for
  in order lon, lat
  """

  total_coords = []
  for i in range(len(df)):
    coord = torch.tensor((df.loc[i]['lon'], df.loc[i]['lat']))
    total_coords.append(coord)

  locations = torch.stack(total_coords)

  return locations

SatCLIP model names:

*   satclip-resnet18-l10
*   satclip-resnet18-l40
*   satclip-resnet50-l10
*   satclip-resnet50-l40
*   satclip-vit16-l10
*   satclip-vit16-l40

In [None]:
# Processing data for locations for the embeddings to be extracted from
aoi = 'RWA'
split = 'Testing'
aoi_df = pd.read_csv('{}Data_uncorrelated_fixed.csv'.format(split))

In [None]:
# Get coordinates for aoi of interest
coords = get_coords(aoi_df)

In [None]:
satclip_model = 'satclip-resnet50-l40'

In [None]:
# Grab embeddings for each model type
embeddings = []

!wget 'https://satclip.z13.web.core.windows.net/satclip/satclip-resnet50-l40.ckpt'
model = get_satclip('satclip-resnet50-l40.ckpt', device=device)
model.eval()
with torch.no_grad():
  x  = model(coords.double().to(device)).detach().cpu()


In [None]:
identifying_info_df = aoi_df[['giga_id_school', 'connectivity', 'lat', 'lon']]
emb_df = pd.DataFrame(x.numpy())

In [None]:
emb_df_labelled = pd.concat([identifying_info_df, emb_df], axis=1)

In [None]:
emb_df_labelled['data_split'] = split

In [None]:
# Export to dataframe
emb_df_labelled.to_csv('{}_{}_embeddings_{}.csv'.format(aoi, satclip_model, split))