# Sentinel-2 Data Processing
The purpose of this notebook is to process the Sentinel-2 data for Ghana such that we can use it in a DL model for school mapping

## Imports

In [32]:
import rasterio
import numpy as np
from rasterio.windows import Window
import geopandas as gpd
import numpy as np

## Loading Data

In [33]:
# Loading binary school data
school_locations = rasterio.open('/Users/kelseydoerksen/Desktop/Giga/Ghana/schools_location_binary.tif')
data = school_locations.read(1)

# Loading probability data with geometry (will be useful for filename saving later)
ghana_prob = gpd.read_file('/Users/kelseydoerksen/Desktop/Giga/Ghana/ghana_schools_utm30_prob.gpkg')

# Loading sentinel-2 band we are interested in
sent2_band = 'B12'
sent2_img = rasterio.open('/Users/kelseydoerksen/Desktop/Giga/Ghana/{}.tif'.format(sent2_band))
out_meta = sent2_img.meta.copy()
sent2_data = sent2_img.read()

## Data Processing

In [3]:
# Get coordinates of schools and sort by y so we can grab filenames
coords = ghana_prob.get_coordinates()
ghana_prob['x'] = coords['x']
ghana_prob['y'] = coords['y']
ghana_prob = ghana_prob.sort_values(by='y', ascending=False)
uid_sorted = ghana_prob['UID'].to_list()

In [4]:
# Get locations of schools as arrays
xindex, yindex = np.where(data==1)
for x,y in zip(xindex, yindex):
    print('School at location X: {}, Y: {}'.format(x, y))

School at location X: 483, Y: 33547
School at location X: 925, Y: 35955
School at location X: 978, Y: 32928
School at location X: 1012, Y: 34346
School at location X: 1106, Y: 34128
School at location X: 1133, Y: 35680
School at location X: 1142, Y: 34377
School at location X: 1149, Y: 35675
School at location X: 1153, Y: 35664
School at location X: 1158, Y: 35657
School at location X: 1185, Y: 34506
School at location X: 1188, Y: 34963
School at location X: 1193, Y: 34965
School at location X: 1202, Y: 32360
School at location X: 1221, Y: 32887
School at location X: 1221, Y: 35319
School at location X: 1234, Y: 33023
School at location X: 1317, Y: 33044
School at location X: 1321, Y: 34587
School at location X: 1327, Y: 34611
School at location X: 1350, Y: 35888
School at location X: 1353, Y: 33449
School at location X: 1354, Y: 32497
School at location X: 1356, Y: 33448
School at location X: 1360, Y: 32919
School at location X: 1374, Y: 34268
School at location X: 1377, Y: 34272
Scho

In [None]:
# Generate new 512x512 images with school at center
xsize, ysize = 512, 512
for i in range(len(xindex)):
    filename_suffix = str(uid_sorted[i])
    window = Window(yindex[i]-256, xindex[i]-256, xsize, ysize)
    transform = sent2_img.window_transform(window)

    out_meta.update({
        'height': xsize,
        'width': ysize,
        'transform': transform,
        'dtype': 'uint16'})

    with rasterio.open('/Users/kelseydoerksen/Desktop/Giga/Ghana/{}_{}.tif'.format(sent2_band, filename_suffix), 'w', **out_meta) as dst:
        # Read the data from the window and write it to the output raster
        cropped_img = sent2_img.read(window=window)
        dst.write(cropped_img)