<a href="https://colab.research.google.com/github/pahdsn/ati_sense_hackathon/blob/master/HarryCode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Installations

!pip install geopandas
!pip install rasterio

Collecting geopandas
[?25l  Downloading https://files.pythonhosted.org/packages/2a/9f/e8a440a993e024c0d3d4e5c7d3346367c50c9a1a3d735caf5ee3bde0aab1/geopandas-0.8.2-py2.py3-none-any.whl (962kB)
[K     |████████████████████████████████| 972kB 8.9MB/s 
[?25hCollecting pyproj>=2.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/05/0c/d7c2c7c370ea5368b813a44e772247ed1a461dc47de70c5d02e079abc7e0/pyproj-3.0.0.post1-cp37-cp37m-manylinux2010_x86_64.whl (6.4MB)
[K     |████████████████████████████████| 6.5MB 27.0MB/s 
[?25hCollecting fiona
[?25l  Downloading https://files.pythonhosted.org/packages/47/c2/67d1d0acbaaee3b03e5e22e3b96c33219cb5dd392531c9ff9cee7c2eb3e4/Fiona-1.8.18-cp37-cp37m-manylinux1_x86_64.whl (14.8MB)
[K     |████████████████████████████████| 14.8MB 320kB/s 
Collecting click-plugins>=1.0
  Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl
Collecting mu

In [66]:
# Imports

import numpy as np
from matplotlib import pyplot as plt
import geopandas as gpd
import pandas as pd
import rasterio
import os
import glob
from google.colab import drive
import gdal
from shapely.geometry import Point
from PIL import Image
from tqdm import tqdm

In [10]:
# Mount Drive and set up paths

drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/Polar_Hack')
SAMPLING_DIR = "./samples/"
META_DIR = "./samples_meta/"
SHAPEFILE_DIR = "./EE_Polar_Training_Dataset_v-1-0-0/Sea_Ice/" 
TIFF_DIR = "./Sentinel geotiffs/"

shapefiles = glob.glob(SHAPEFILE_DIR+'*.shp')
images = glob.glob(TIFF_DIR+'*.tif')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [83]:
# Define routine for extracting label on subset of image

def get_id(shapefile):
  '''
  Extracts datetime component of name
  '''
  return shapefile.split("_")[-1][:-4].upper()

def geo_ref(x,y,GT):
  '''
  return georeferenced point from pixel coordinates
  '''
  X_geo = GT[0] + x * GT[1] + y * GT[2]
  Y_geo = GT[3] + x * GT[4] + y * GT[5]
  return Point(X_geo, Y_geo)

def sample(shapefile,x,y,N):
  '''
  Find tiff file, create NxN sample with origin (x,y) in pixel coordinates
  Return id, sample and class from shapefile
  '''
  id = get_id(shapefile)
  shape_data = gpd.read_file(shapefile)
  tiff =  gdal.Open([g for g in images if id in g][0])
  point = geo_ref(x+N/2,y+N/2,tiff.GetGeoTransform())
  i=0
  classification = None
  while i < shape_data.shape[0] and classification == None:
    if shape_data['geometry'][i].contains(point):
      classification = shape_data['poly_type'][i]
    i += 1
  if classification != None:
    im = Image.fromarray(np.transpose(tiff.ReadAsArray()[:,x:x+N,y:y+N],(1,2,0)))
    image_name = SAMPLING_DIR+id+'X'+str(x)+'Y'+str(y)+'.png'
    im.save(image_name)
    return id, classification, image_name
  else:
    return None, None, None

In [84]:
# Raw image dimensions
xx = 15564
yy = 15218

def get_samples(grid_space, sample_size):
  '''
  Grid space - how densely to sample the raw S1 images
  sample size - size of square sample images (both in pixels)
  Saves sample images in png format and metadata as csv file
  '''
  metadata = pd.DataFrame(columns=['id','x','y','label','image'])
  for S in tqdm(shapefiles):
    for x in np.arange(100,xx,grid_space):
      for y in np.arange(100,yy,grid_space):
        id, label, name = sample(S,x,y,sample_size)
        if label != None:
          metadata = metadata.append({'id':id,'x':x,'y':y,'label':label,'image':name},ignore_index=True)
    metadata.to_csv(META_DIR+'samples.csv')
  return metadata

In [85]:
samples = get_samples(5000,64)

100%|██████████| 12/12 [02:15<00:00, 11.32s/it]
