In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier

import os
from tqdm import tqdm

import rasterio
from rasterio.windows import Window
from pyproj import CRS

### Entrenamos un modelo de juguete con el dataset de la clase 11

El dataset es el que sale de SampleExtraction, que luego se usa en TrainVectorClassifier.

Link: https://storage.googleapis.com/gis2022-teledeteccion/clase11/extra/samples.sqlite

Con `ogr2ogr -f "CSV" samples.csv samples.sqlite` lo pasan a CSV

In [None]:
df = pd.read_csv('samples.csv')
df['ratio_1'] = df['band_0']/df['band_5']
df['ratio_2'] = df['band_0']/df['band_4']
df.head()

In [None]:
X = df[['band_0','band_1','band_2','band_3','band_4','band_5', 'ratio_1', 'ratio_2']]
y = df['id']

clf = RandomForestClassifier(max_depth=5, random_state=0)
clf.fit(X, y)

### Se lo aplicamos a la imagen

La imagen se encuentra en https://storage.googleapis.com/gis2022-teledeteccion/clase11/extra/input_merge.tif

`wget https://storage.googleapis.com/gis2022-teledeteccion/clase11/extra/input_merge.tif`

In [None]:
tile = 'input_merge.tif'

#### Funciones auxiliares

In [None]:
def metadata_from_tile(in_raster):
    with rasterio.open(in_raster) as src:
        return(src.width, src.height, src.transform)

def sliding_windows(size, step_size, width, height, whole=False):
    """Slide a window of +size+ by moving it +step_size+ pixels"""
    w, h = size, size
    sw, sh = step_size, step_size
    end_i = height - h if whole else height
    end_j = width - w if whole else width
    for pos_i, i in enumerate(range(0, end_i, sh)):
        for pos_j, j in enumerate(range(0, end_j, sw)):
            real_w = w if whole else min(w, abs(width - j))
            real_h = h if whole else min(h, abs(height - i))
            yield Window(j, i, real_w, real_h), (pos_i, pos_j)

In [None]:
width, height, transform = metadata_from_tile(tile)
windows = sliding_windows(100, 100, width, height)

In [None]:
width, height, transform

#### Ejemplo para una window cualquiera

In [None]:
window = list(windows)[1400]

In [None]:
#leemos esa parte de la imagen y creamos las bandas según se crearon para el modelo original
src = rasterio.open(tile)
img = src.read(window=window[0]) 
r,m,n = img.shape
img_df = pd.DataFrame(img.reshape(r,m*n)).T.fillna(-99)
bands = ['band_0','band_1','band_2','band_3','band_4','band_5']
img_df.columns = bands
img_df['ratio_1'] = img_df['band_0']/img_df['band_5']
img_df['ratio_2'] = img_df['band_0']/img_df['band_4']
img_df

In [None]:
res = clf.predict(img_df).astype(np.float64)
res = np.expand_dims(res.reshape(n,m), axis=0)
res.shape

Pasemos este procedimiento a una función para que sea más cómodo. Recordar agregar todas las combinaciones de bandas y procedimientos que apliquemos sobre el dataset de entrada al modelo.

In [None]:
def create_windowed_dataset(in_raster, window, bands_list):
    img_df = pd.DataFrame()
    src = rasterio.open(in_raster)
    img = src.read(window=window)
    r,m,n = img.shape
    img_df = pd.DataFrame(img.reshape(r,m*n)).T
    img_df.columns = bands_list
    img_df['ratio_1'] = img_df['band_0']/img_df['band_5']
    img_df['ratio_2'] = img_df['band_0']/img_df['band_4']
    img_df = img_df.replace([np.inf, -np.inf], np.nan).fillna(-99)
    return(img_df)

## Aplicamos para toda el área del tile de entrada

In [None]:
width, height, transform = metadata_from_tile(tile)
bands = ['band_0','band_1','band_2','band_3','band_4','band_5']
#agrandar si tienen mas memoria, corre un poco mas rapido pero ojo que se pueden distorsionar los bordes
windows = sliding_windows(100, 100, width, height)
os.makedirs('./predictions/', exist_ok=True)
out_raster = f'./predictions/{tile}'
with rasterio.open(out_raster, 'w', driver='GTiff', count=1, width=width, height=height, dtype=np.float64, transform=transform, crs=CRS.from_epsg(4326), compress='lzw'
) as dst:
    for window, _ in tqdm(windows):           
        img_df = create_windowed_dataset(tile, window, bands)
        n_bands = img_df.shape[1]
        r,m,n = (n_bands, window.width, window.height)
        #clf es el clasificador que entrenamos antes
        res = clf.predict(img_df).astype(np.float64)
        res = np.expand_dims(res.reshape(m,n), axis=0)
        dst.write(res, window=window)

In [None]:
from matplotlib import pyplot
src = rasterio.open(out_raster)
pyplot.imshow(src.read(1))
pyplot.show()