# Malawi Case Study

In [1]:
# Imports
import descarteslabs as dl
from datetime import datetime
import sys
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from datetime import date
import shapefile
import numpy as np

### Reading survery data 

In [2]:
survery_filename = '/home/ec2-user/crop-loss-EPAR/data/Malawi Survey Responses EPAR Radiant 2Sigma.csv'
LSMS_df = pd.read_csv(survery_filename)
LSMS_df.head()

Unnamed: 0,y3_hhid,gardenid,plotid,crop_loss,crop_loss_reason_rain_excess,crop_loss_reason_rain_little,crop_loss_reason_insects,crop_loss_reason_disease,crop_loss_reason_weeds,crop_loss_reason_hail,...,plot_twi,hh_gps_lat,hh_gps_long,annual_temp,annual_precip,ea_id,region,district,ta_code,reside
0,0001-002,RG01,R01,,,,,,,,...,12.0,-9.50446,33.2302,212.0,1367.0,10101215,North,Chitipa,1,RURAL
1,0001-002,RG02,R01,,,,,,,,...,12.0,-9.50446,33.2302,212.0,1367.0,10101215,North,Chitipa,1,RURAL
2,0001-002,RG03,R01,,,,,,,,...,12.0,-9.50446,33.2302,212.0,1367.0,10101215,North,Chitipa,1,RURAL
3,0001-002,,,,,,,,,,...,,,,,,10101215,North,Chitipa,1,RURAL
4,0003-001,RG01,R01,,,,,,,,...,13.0,-9.50446,33.2302,212.0,1374.0,10101215,North,Chitipa,1,RURAL


### Reading AOIs 

In [3]:
# Methods
def filter_duplicates(items):
    for item in items:
        prev = None
        for i in item:
            if i != prev:
                yield i 
                prev = i

            
def search_scenes(geom, product, start_datetime, end_datetime):
    scenes, ctx = dl.scenes.search(geom,
                                   products=product,
                                   start_datetime = start_datetime,
                                   end_datetime = end_datetime
                                   )    
    return scenes, ctx


In [4]:
# Reading the shapefile to a dataframe with three IDs, and coordinates
# Data cleanings that are done outside of this notebook:
# Duplicate entries are cleaned, missing data are removed, and 
aoi_filename = '/home/ec2-user/crop-loss-EPAR/data/Malawi_LSMS_Enumeration_Areas_100m_Example.shp'
shapes = shapefile.Reader(aoi_filename)
aois = shapes.shapeRecords()
col_names = ["y3_hhid", "gardenid", "plotid", "coordinates"]
aois_df = pd.DataFrame(index=np.arange(0, len(aois)), columns = col_names)
for i, r in enumerate(aois):
    aois_df.at[i] = [r.record[0], r.record[1], r.record[2], np.nan]
    aois_df.at[i, "coordinates"] = [r.shape.__geo_interface__['coordinates'][0]]

In [5]:
# generating an empty geometry
geom = {
    "type": "Polygon",
    "coordinates": [
    ]
}

In [6]:
bands = ["blue", "green", "red", "nir", "swir1", "swir2"]
samples_df = pd.DataFrame(columns = LSMS_df.columns) #.append(pd.Index(["datetime blue green red nir swir1 swir2"]))

for i_plot in range(0, 20):#LSMS_df.shape[0]):
    y3_hhid = LSMS_df["y3_hhid"][i_plot]
    gardenid = LSMS_df["gardenid"][i_plot]
    plotid = LSMS_df["plotid"][i_plot]
    
    aoi = aois_df.loc[(aois_df['y3_hhid'] == y3_hhid)
                & (aois_df['gardenid'] == gardenid)
                & (aois_df['plotid'] == plotid)]
    if not aoi.empty:
        temp_coor = aoi.iloc[0]["coordinates"]
        coor = list()
        coor.insert(0, tuple(filter_duplicates(temp_coor)))
        geom['coordinates'] = coor
        scenes, ctx = search_scenes(geom, "landsat:LC08:PRE:LaSRC", "2015-12-01", "2016-04-30")
        
        ctx.assign(resolution = 30)
        arr = scenes[-1].ndarray("blue", ctx)
        data = np.empty((arr.data.shape[1]*arr.data.shape[2], len(scenes)))
        
        temp_sample = LSMS_df.iloc[i_plot].copy()
        temp_sample.loc['datetime'] = list(scenes.each.properties['acquired'])
        for band in bands:
            for scene in range(0, len(scenes)):
                arr = scenes[scene].ndarray(band, ctx) 
                data[:, scene] = np.squeeze(arr.data.swapaxes(0, 1).swapaxes(1, 2).reshape(-1, arr.data.shape[0]))
            temp_sample.loc[band] = data
        
        
        samples_df = samples_df.append(temp_sample)
    
samples_df = samples_df.drop(columns=["y3_hhid", "gardenid", "plotid", "hh_gps_lat", "hh_gps_long"])


In [7]:
samples_df.head()

Unnamed: 0,crop_loss,crop_loss_reason_rain_excess,crop_loss_reason_rain_little,crop_loss_reason_insects,crop_loss_reason_disease,crop_loss_reason_weeds,crop_loss_reason_hail,crop_loss_reason_floods,crop_loss_reason_frost,crop_loss_reason_animals,...,district,ta_code,reside,blue,datetime,green,nir,red,swir1,swir2
0,,,,,,,,,,,...,Chitipa,1,RURAL,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[2015-12-14T07:51:53.079426Z, 2015-12-30T07:51...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0...."
1,,,,,,,,,,,...,Chitipa,1,RURAL,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[2015-12-14T07:51:53.079426Z, 2015-12-30T07:51...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0...."
2,,,,,,,,,,,...,Chitipa,1,RURAL,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[2015-12-14T07:51:53.079426Z, 2015-12-30T07:51...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0...."
4,,,,,,,,,,,...,Chitipa,1,RURAL,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[2015-12-14T07:51:53.079426Z, 2015-12-30T07:51...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0...."
5,,,,,,,,,,,...,Chitipa,1,RURAL,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[2015-12-14T07:51:53.079426Z, 2015-12-30T07:51...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0....","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0...."


### Export data to csv 

In [8]:
samples_df.to_csv('/home/ec2-user/crop-loss-EPAR/exports/malawi_data.csv')