In [16]:
import glob
import os 
import rasterio as rio
import pandas as pd
import geopandas as gpd
import rioxarray as rxr
import xarray as xr 
from shapely import Point, Polygon
import matplotlib.pyplot as plt

In [2]:
PATH=os.path.join('..','data','landsat_tiles','')
CSV_PATH=os.path.join('..','data','wealth_index.csv')

In [3]:
# GLOB FILES
files=dict()
csv=pd.read_csv(CSV_PATH)
for year in csv.year.unique():
    if int(year) < 2016:
        continue
    files[year]=dict()
    for country in csv.country.unique():
        files[year][country]=[]
        country_cap=country.title()
        files[year][country]=glob.glob(os.path.join(PATH,str(country_cap)+"_"+str(year)+"*.tif"))

files

{2017: {'angola': [],
  'benin': ['../data/landsat_tiles/Benin_2017-0000017920-0000000000.tif'],
  'burkina_faso': [],
  'cameroon': [],
  'cote_d_ivoire': [],
  'democratic_republic_of_congo': [],
  'ethiopia': [],
  'ghana': [],
  'guinea': [],
  'kenya': [],
  'lesotho': [],
  'madagascar': [],
  'malawi': ['../data/landsat_tiles/Malawi_2017-0000026880-0000000000.tif',
   '../data/landsat_tiles/Malawi_2017-0000017920-0000000000.tif',
   '../data/landsat_tiles/Malawi_2017-0000000000-0000000000.tif'],
  'mali': [],
  'mozambique': [],
  'nigeria': [],
  'rwanda': [],
  'senegal': [],
  'sierra_leone': [],
  'tanzania': ['../data/landsat_tiles/Tanzania_2017-0000000000-0000035840.tif',
   '../data/landsat_tiles/Tanzania_2017-0000035840-0000035840.tif',
   '../data/landsat_tiles/Tanzania_2017-0000026880-0000035840.tif',
   '../data/landsat_tiles/Tanzania_2017-0000000000-0000017920.tif',
   '../data/landsat_tiles/Tanzania_2017-0000017920-0000035840.tif',
   '../data/landsat_tiles/Tanzania

In [4]:
# UPDATE POINT COORDINATES & CRS
                  
gdf = gpd.GeoDataFrame(csv, geometry=gpd.points_from_xy(csv.lon, csv.lon))
gdf.set_crs(epsg="4326", inplace=True)
gdf = gdf.to_crs('EPSG:3857')

gdf.head()

Unnamed: 0,country,year,month,day,cluster,lat,lon,households,wealthpooled,geometry
0,angola,2011,,,1,-12.350257,13.534922,36,2.312757,POINT (1506700.586 1520912.661)
1,angola,2011,,,2,-12.360865,13.551494,32,2.010293,POINT (1508545.372 1522810.211)
2,angola,2011,,,3,-12.613421,13.413085,36,0.877744,POINT (1493137.790 1506965.991)
3,angola,2011,,,4,-12.581454,13.397711,35,1.066994,POINT (1491426.344 1505206.609)
4,angola,2011,,,5,-12.578135,13.418748,37,1.750153,POINT (1493768.184 1507614.069)


In [13]:
def open_tif(file):
    return rxr.open_rasterio(file, masked=True)

def get_bb_from_point(point, margin=3000):
    minx=point.x-margin
    miny=point.y-margin
    maxx=point.x+margin
    maxy=point.y+margin
    return Polygon([
        Point(minx, miny), 
        Point(maxx, miny), 
        Point(maxx, maxy), 
        Point(minx, maxy), 
        Point(minx, miny)
    ])

def clip_geometry(point, tif, margin=3000):
    try :
        tile = tif.rio.clip_box(
                minx=point.x-margin,
                miny=point.y-margin,
                maxx=point.x+margin,
                maxy=point.y+margin
            )
    except rxr.exceptions.NoDataInBounds:
        return None
    return tile

def write_tile(tile, name, path=os.path.join('..','data','landsat_tiles','')):
    tile.rio.to_raster(path+name+'.tif')
    return 

def add_roi_coords(gdf, margin=3000):
    gdf['roi'] = gdf['geometry'].map( lambda point: get_bb_from_point(point) )
    return gdf

def write_gdf(gdf, name, path=PATH):
    gdf.to_file(name+".geojson", driver="GeoJSON")

In [10]:
gdf = add_roi_coords(gdf)
gdf.head()

Unnamed: 0,country,year,month,day,cluster,lat,lon,households,wealthpooled,geometry,roi
0,angola,2011,,,1,-12.350257,13.534922,36,2.312757,POINT (1506700.586 1520912.661),"POLYGON ((1503700.586 1517912.661, 1509700.586..."
1,angola,2011,,,2,-12.360865,13.551494,32,2.010293,POINT (1508545.372 1522810.211),"POLYGON ((1505545.372 1519810.211, 1511545.372..."
2,angola,2011,,,3,-12.613421,13.413085,36,0.877744,POINT (1493137.790 1506965.991),"POLYGON ((1490137.790 1503965.991, 1496137.790..."
3,angola,2011,,,4,-12.581454,13.397711,35,1.066994,POINT (1491426.344 1505206.609),"POLYGON ((1488426.344 1502206.609, 1494426.344..."
4,angola,2011,,,5,-12.578135,13.418748,37,1.750153,POINT (1493768.184 1507614.069),"POLYGON ((1490768.184 1504614.069, 1496768.184..."


In [None]:
# def plot_tile(tile, bands=[0,1,2]): # default bands: rgb
#     tile.plot.imshow()

 # cp -r landsat/*/*/*.tif landsat_tiles/


In [28]:
# LOOPING OVER OCCURENCES
for year in files:
    countries = files[year]
    for country in countries:
        occurences = gdf[gdf.year == year]
        occurences = occurences[occurences.country == country]
        for idx, row in occurences.iterrows():
            for file in countries[country]:
                tif = open_tif(file)
                tile = clip_geometry(row.geometry, tif)
                if tile is not None:
                    print(tile)
                    break
      
    
                # if tile is not None:
                #     print('ok')
                #     write_tile(
                #         tile=tile,
                #         name=str(row.geometry.x)+'_'+str(row.geometry.y)
                #     )
                #     if idx%25==0:
                #         plt.figure()
                #         tile.plot.imshow()
                #         plt.show()

<xarray.DataArray (band: 7, y: 201, x: 201)>
[282807 values with dtype=float64]
Coordinates:
  * band         (band) int64 1 2 3 4 5 6 7
  * x            (x) float64 1.264e+06 1.264e+06 1.264e+06 ... 1.27e+06 1.27e+06
  * y            (y) float64 1.278e+06 1.278e+06 ... 1.272e+06 1.272e+06
    spatial_ref  int64 0
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
    long_name:      ('B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7')
<xarray.DataArray (band: 7, y: 201, x: 201)>
[282807 values with dtype=float64]
Coordinates:
  * band         (band) int64 1 2 3 4 5 6 7
  * x            (x) float64 1.283e+06 1.283e+06 ... 1.288e+06 1.289e+06
  * y            (y) float64 1.297e+06 1.297e+06 ... 1.291e+06 1.291e+06
    spatial_ref  int64 0
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
    long_name:      ('B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7')
<xarray.DataArray (band: 7, y: 201, x: 201)>
[282807 values with dtype=float64]
