# FLAT input data prepping

This code prepares the final input file to the FLAT model. 

**Original code:** [Konstantinos Pegios](https://github.com/kopegios) <br />
**Conceptualization & Methodological review :** [Alexandros Korkovelos](https://github.com/akorkovelos) & [Konstantinos Pegios](https://github.com/kopegios)<br />
**Updates, Modifications:** [Alexandros Korkovelos](https://github.com/akorkovelos) & [Oluchi Monwe](https://github.com/oluchee)<br />
**Funding:** The World Bank (contract number: 7190531), [KTH](https://www.kth.se/en/itm/inst/energiteknik/forskning/desa/welcome-to-the-unit-of-energy-systems-analysis-kth-desa-1.197296)

In [311]:
#Import modules and libraries

import geopandas as gpd
import rasterio as rio
import pandas as pd
import fiona
import gdal
import osr
import ogr
import rasterio.mask
import time
import os
import ogr, gdal, osr, os
import numpy as np
import itertools
import re

from osgeo import gdal,ogr
import struct
import csv

import glob

from rasterio.warp import calculate_default_transform, reproject
from rasterio.enums import Resampling
from rasterstats import point_query, zonal_stats
from pyproj import Proj
from shapely.geometry import Point, Polygon

In [312]:
# Import data 

#path = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops"
path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data\Crop_Maps"
name_of_file = "Ben_Maize_2018_admin2.csv"

In [313]:
# Import csv as pandas dataframe
df = pd.read_csv(path + "\\" + name_of_file)
df.head(3)

Unnamed: 0.1,Unnamed: 0,country_code,country,admin2,geometry,Harvest_Area_By_District,iso3,prod_level,alloc_key,cell5m,x,y,harea_spam_ha,shareofsum,harv_area_ha,prod_tonnes,district_area_ha,Test,shareofdistrict
0,0,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412189,4062988,2.375,11.625,568.4,0.000523,605.834085,748.784639,3.6e-05,False,16701680.0
1,1,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412190,4062989,2.458333,11.625,801.5,0.000738,854.285747,1055.860113,3.6e-05,False,23551010.0
2,2,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412191,4062990,2.541667,11.625,581.5,0.000535,619.796834,766.041991,3.6e-05,False,17086600.0


In [314]:
df.columns

Index(['Unnamed: 0', 'country_code', 'country', 'admin2', 'geometry',
       'Harvest_Area_By_District', 'iso3', 'prod_level', 'alloc_key', 'cell5m',
       'x', 'y', 'harea_spam_ha', 'shareofsum', 'harv_area_ha', 'prod_tonnes',
       'district_area_ha', 'Test', 'shareofdistrict'],
      dtype='object')

In [315]:
# Add geometry and convert to spatial dataframe in source CRS
df['geometry'] = list(zip(df['x'], df['y']))
df['geometry'] = df['geometry'].apply(Point)
df = gpd.GeoDataFrame(df, geometry='geometry', crs={'init': 'epsg:4326'})

# Reproject data in to Ordnance Survey GB coordinates
df = df.to_crs({'init': 'epsg:3395'})

  return _prepare_from_string(" ".join(pjargs))


In [316]:
#Create a new dataframe of the production in kg. Initial column unit is tonnes (Conversion: 1000kg = 1 tonne).
df['Production_kg'] = df['prod_tonnes'] *1000

#Determining yield 
df['yield'] =  df['Production_kg'] / df['harv_area_ha']   

#Calculating the area of each unit [ha]
df['area'] =  (df['geometry'].area)/10000

#Calculating the perimeter of each unit [km]
df["perimeter"] = (df["geometry"].length)/10000

In [317]:
df.rename(columns={'x': 'lon'}, inplace=True)
df.rename(columns={'y': 'lat'}, inplace=True)
df.rename(columns={'area': 'statearea_ha'}, inplace=True)
df.rename(columns={'country_code': 'c_code'}, inplace=True)

In [318]:
df.head()

Unnamed: 0.1,Unnamed: 0,c_code,country,admin2,geometry,Harvest_Area_By_District,iso3,prod_level,alloc_key,cell5m,...,shareofsum,harv_area_ha,prod_tonnes,district_area_ha,Test,shareofdistrict,Production_kg,yield,statearea_ha,perimeter
0,0,BEN,Benin,Banikoara,POINT (264383.791 1294455.762),40793.503413,BEN,BN01001,9412189,4062988,...,0.000523,605.834085,748.784639,3.6e-05,False,16701680.0,748784.6,1235.956605,0.0,0.0
1,1,BEN,Benin,Banikoara,POINT (273660.415 1294455.762),40793.503413,BEN,BN01001,9412190,4062989,...,0.000738,854.285747,1055.860113,3.6e-05,False,23551010.0,1055860.0,1235.956605,0.0,0.0
2,2,BEN,Benin,Banikoara,POINT (282937.039 1294455.762),40793.503413,BEN,BN01001,9412191,4062990,...,0.000535,619.796834,766.041991,3.6e-05,False,17086600.0,766042.0,1235.956605,0.0,0.0
3,3,BEN,Benin,Banikoara,POINT (255107.166 1285047.132),40793.503413,BEN,BN01001,9422188,4067307,...,0.0,0.0,0.0,3.6e-05,True,0.0,0.0,,0.0,0.0
4,4,BEN,Benin,Banikoara,POINT (264383.791 1285047.132),40793.503413,BEN,BN01001,9422189,4067308,...,0.000557,645.164271,797.395042,3.6e-05,False,17785930.0,797395.0,1235.956605,0.0,0.0


In [319]:
#Function to change the order of columns in the dataframe 
def change_column_order(df, col_name, index):
    cols = df.columns.tolist()
    cols.remove(col_name)
    cols.insert(index, col_name)
    return df[cols]

In [320]:
# Allocate order in dataframe columns
df = change_column_order(df, 'alloc_key', 1)
df = change_column_order(df, 'lon', 2)
df = change_column_order(df, 'lat', 3)
df = change_column_order(df, 'country', 4)
df = change_column_order(df, 'c_code', 5)
df = change_column_order(df, 'statearea_ha', 6)
df = change_column_order(df, 'district_area_ha', 7)
df = change_column_order(df, 'harv_area_ha', 8)

In [321]:
df.head()

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,statearea_ha,district_area_ha,harv_area_ha,admin2,...,prod_level,cell5m,harea_spam_ha,shareofsum,prod_tonnes,Test,shareofdistrict,Production_kg,yield,perimeter
0,0,9412189,2.375,11.625,Benin,BEN,0.0,3.6e-05,605.834085,Banikoara,...,BN01001,4062988,568.4,0.000523,748.784639,False,16701680.0,748784.6,1235.956605,0.0
1,1,9412190,2.458333,11.625,Benin,BEN,0.0,3.6e-05,854.285747,Banikoara,...,BN01001,4062989,801.5,0.000738,1055.860113,False,23551010.0,1055860.0,1235.956605,0.0
2,2,9412191,2.541667,11.625,Benin,BEN,0.0,3.6e-05,619.796834,Banikoara,...,BN01001,4062990,581.5,0.000535,766.041991,False,17086600.0,766042.0,1235.956605,0.0
3,3,9422188,2.291667,11.541667,Benin,BEN,0.0,3.6e-05,0.0,Banikoara,...,BN01001,4067307,0.0,0.0,0.0,True,0.0,0.0,,0.0
4,4,9422189,2.375,11.541667,Benin,BEN,0.0,3.6e-05,645.164271,Banikoara,...,BN01001,4067308,605.3,0.000557,797.395042,False,17785930.0,797395.0,1235.956605,0.0


In [322]:
df.head()

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,statearea_ha,district_area_ha,harv_area_ha,admin2,...,prod_level,cell5m,harea_spam_ha,shareofsum,prod_tonnes,Test,shareofdistrict,Production_kg,yield,perimeter
0,0,9412189,2.375,11.625,Benin,BEN,0.0,3.6e-05,605.834085,Banikoara,...,BN01001,4062988,568.4,0.000523,748.784639,False,16701680.0,748784.6,1235.956605,0.0
1,1,9412190,2.458333,11.625,Benin,BEN,0.0,3.6e-05,854.285747,Banikoara,...,BN01001,4062989,801.5,0.000738,1055.860113,False,23551010.0,1055860.0,1235.956605,0.0
2,2,9412191,2.541667,11.625,Benin,BEN,0.0,3.6e-05,619.796834,Banikoara,...,BN01001,4062990,581.5,0.000535,766.041991,False,17086600.0,766042.0,1235.956605,0.0
3,3,9422188,2.291667,11.541667,Benin,BEN,0.0,3.6e-05,0.0,Banikoara,...,BN01001,4067307,0.0,0.0,0.0,True,0.0,0.0,,0.0
4,4,9422189,2.375,11.541667,Benin,BEN,0.0,3.6e-05,645.164271,Banikoara,...,BN01001,4067308,605.3,0.000557,797.395042,False,17785930.0,797395.0,1235.956605,0.0


<div class="alert alert-block alert-warning">
<b>Note:</b> The geodataframe, contains crop points with needed attributes including a geometry column. It needs to be exported as shapefile. However, when this is done with geopandas, the column geometry (Point(x,y)) is not transfered in the attribute table. This is however needed for the extraction process below, otherwise "None" values are returned. In order to overcome this issue, the csv is imported into Qgis and then exported as feature layer in the same folder. This solves the issue. 
</div>

In [323]:
# Define output path
# Overwriting the csv file
#path = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops"
path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data"
shpname = "Ben_Maize_Harv_2018_admin2_10km_new.csv"

#MAIZE
df.to_csv(os.path.join(path,"{c}".format(c=shpname)), index=False)
df.to_file('Ben_Maize_Harv_2018_admin2_10km_new.shp',driver = 'ESRI Shapefile')
# Define output path
#path = r"N:\Agrodem\Downscaling\Output_Data\Crop_Maps"
#path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data\Crop_Maps"

#Maize


#### >>> Please open Qgis and act as suggested above<<<

In [324]:
y = gpd.read_file(r'C:\Github\agrodem_new\agrodem_preprocessing\Downscaling\Downscaling_prepping\Ben_Maize_Harv_2018_admin2_10km_new.shp')

In [325]:
y.head()

Unnamed: 0,Unnamed_ 0,alloc_key,lon,lat,country,c_code,statearea_,district_a,harv_area_,admin2,...,cell5m,harea_spam,shareofsum,prod_tonne,Test,shareofdis,Production,yield,perimeter,geometry
0,0,9412189,2.375,11.625,Benin,BEN,0.0,3.6e-05,605.834085,Banikoara,...,4062988,568.4,0.000523,748.784639,0,16701680.0,748784.6,1235.956605,0.0,POINT (264383.791 1294455.762)
1,1,9412190,2.458333,11.625,Benin,BEN,0.0,3.6e-05,854.285747,Banikoara,...,4062989,801.5,0.000738,1055.860113,0,23551010.0,1055860.0,1235.956605,0.0,POINT (273660.415 1294455.762)
2,2,9412191,2.541667,11.625,Benin,BEN,0.0,3.6e-05,619.796834,Banikoara,...,4062990,581.5,0.000535,766.041991,0,17086600.0,766042.0,1235.956605,0.0,POINT (282937.039 1294455.762)
3,3,9422188,2.291667,11.541667,Benin,BEN,0.0,3.6e-05,0.0,Banikoara,...,4067307,0.0,0.0,0.0,1,0.0,0.0,,0.0,POINT (255107.166 1285047.132)
4,4,9422189,2.375,11.541667,Benin,BEN,0.0,3.6e-05,645.164271,Banikoara,...,4067308,605.3,0.000557,797.395042,0,17785930.0,797395.0,1235.956605,0.0,POINT (264383.791 1285047.132)


## Extracting raster input to csv 

In [326]:
#raster_path = r"N:\Agrodem\Downscaling\Input_Data\rasters"
raster_path = r"C:\Oluchi\Downscaling\Input_Data\raster_reprojected"
raster_files =[]
#csvoutpath = r"N:\Agrodem\Downscaling\Output_Data\FLAT_input\1km_Rice"
csvoutpath = r"C:\Oluchi\Downscaling\Output_Data\FLAT_Input\10km_Maize"

print ("Reading independent variables...")

for i in os.listdir(raster_path):
    if i.endswith('.tif'):
        raster_files.append(i)  

#shp_filename = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops\shapefiles\Moz_Rice_Harv_2000_admin2_1km.shp"
shp_filename = r'C:\Github\agrodem_new\agrodem_preprocessing\Downscaling\Downscaling_prepping\Ben_Maize_Harv_2018_admin2_10km_new.shp'
print ("Extracting raster values to points...")

for i in raster_files:
    print("Extracting " + i + " values...")
    src_filename = raster_path + "\\" + i 
    li_values = list()

    src_ds=gdal.Open(src_filename) 
    gt=src_ds.GetGeoTransform()
    rb=src_ds.GetRasterBand(1)
    ds=ogr.Open(shp_filename)    
    lyr=ds.GetLayer()
    
    
    for feat in lyr:
        geom = feat.GetGeometryRef()
        feat_id = feat.GetField('alloc_key')
        mx,my=geom.GetX(), geom.GetY()  #coord in map units

        #Convert from map to pixel coordinates.
        #Only works for geotransforms with no rotation.
        px = int((mx - gt[0]) / gt[1]) #x pixel
        py = int((my - gt[3]) / gt[5]) #y pixel

        intval=rb.ReadAsArray(px,py,1,1)
        li_values.append([feat_id, intval[0]])
        
    print ("Writing " + i + " values to csv...")
    
    with open(csvoutpath + "\\" + i.split('.')[0] + '.csv', 'w') as csvfile:
        wr = csv.writer(csvfile)
        wr.writerows(li_values)

Reading independent variables...
Extracting raster values to points...
Extracting BDRICM.tif values...
Writing BDRICM.tif values to csv...
Extracting BLD.tif values...
Writing BLD.tif values to csv...
Extracting CLYPPT.tif values...
Writing CLYPPT.tif values to csv...
Extracting DRAINFAO.tif values...
Writing DRAINFAO.tif values to csv...
Extracting GFSAD-landcover.tif values...
Writing GFSAD-landcover.tif values to csv...
Extracting landcover.tif values...
Writing landcover.tif values to csv...
Extracting MODIS-Band2 (1).tif values...
Writing MODIS-Band2 (1).tif values to csv...
Extracting modis-evi.tif values...
Writing modis-evi.tif values to csv...
Extracting modis-ndvi.tif values...
Writing modis-ndvi.tif values to csv...
Extracting ORCDRC.tif values...
Writing ORCDRC.tif values to csv...
Extracting PHIHOX.tif values...
Writing PHIHOX.tif values to csv...
Extracting prec.tif values...
Writing prec.tif values to csv...
Extracting srad.tif values...
Writing srad.tif values to csv...

## Merge csv files with crop

In [327]:
#Import data 

#path = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops"
path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data"
name_of_file = "Ben_Maize_Harv_2018_admin2_10km_new.csv"

flat_input = pd.read_csv(path + "\\" + name_of_file)

In [328]:
#csvoutpath = r"N:\Agrodem\Downscaling\Output_Data\FLAT_input\1km_Rice"
csvoutpath = r"C:\Oluchi\Downscaling\Output_Data\FLAT_Input\10km_Maize"
csv_files = []

print ("Reading csv files...")

for i in os.listdir(csvoutpath):
    if i.endswith('.csv'):
        csv_files.append(i)  

for i in csv_files:
    print('Reading...'+ i)    
    df_csv = pd.read_csv(csvoutpath + "//" + i, index_col=None, header=None)
    df_csv.iloc[:,1] = df_csv.iloc[:,1].astype(str)
    df_csv.iloc[:,1] = df_csv.iloc[:,1].str.replace('[','')
    df_csv.iloc[:,1] = df_csv.iloc[:,1].str.replace(']','')
    columnName = i.split('.')[0]
    print("Merging..." + columnName)
    flat_input[columnName] = df_csv.iloc[:,1]

Reading csv files...
Reading...BDRICM.csv
Merging...BDRICM
Reading...BLD.csv
Merging...BLD
Reading...CLYPPT.csv
Merging...CLYPPT
Reading...DRAINFAO.csv
Merging...DRAINFAO
Reading...GFSAD-landcover.csv
Merging...GFSAD-landcover
Reading...landcover.csv
Merging...landcover
Reading...MODIS-Band2 (1).csv
Merging...MODIS-Band2 (1)
Reading...MODIS-Band2.csv
Merging...MODIS-Band2
Reading...modis-evi.csv
Merging...modis-evi
Reading...modis-ndvi.csv
Merging...modis-ndvi
Reading...ORCDRC.csv
Merging...ORCDRC
Reading...PHIHOX.csv
Merging...PHIHOX
Reading...prec.csv
Merging...prec
Reading...srad.csv
Merging...srad
Reading...SRTM-elevation.csv
Merging...SRTM-elevation
Reading...tavg.csv
Merging...tavg
Reading...TEXMHT.csv
Merging...TEXMHT
Reading...wind.csv
Merging...wind


## Cleaning and normalizing dataframe

In [329]:

flat_input.head(10)

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,statearea_ha,district_area_ha,harv_area_ha,admin2,...,modis-evi,modis-ndvi,ORCDRC,PHIHOX,prec,srad,SRTM-elevation,tavg,TEXMHT,wind
0,0,9412189,2.375,11.625,Benin,BEN,0.0,3.6e-05,605.834085,Banikoara,...,1799.0,2839.0,9.0,66.0,71.666664,19468.834,274,28.008337,7.0,2.9
1,1,9412190,2.458333,11.625,Benin,BEN,0.0,3.6e-05,854.285747,Banikoara,...,2177.0,3512.0,7.0,65.0,72.0,19466.334,301,27.950003,9.0,2.8
2,2,9412191,2.541667,11.625,Benin,BEN,0.0,3.6e-05,619.796834,Banikoara,...,2140.0,3783.0,11.0,64.0,71.833336,19471.084,311,28.083336,9.0,3.1
3,3,9422188,2.291667,11.541667,Benin,BEN,0.0,3.6e-05,0.0,Banikoara,...,2412.0,4778.0,10.0,65.0,73.083336,19440.5,267,27.900002,7.0,3.0
4,4,9422189,2.375,11.541667,Benin,BEN,0.0,3.6e-05,645.164271,Banikoara,...,2311.0,3777.0,8.0,65.0,73.333336,19390.166,316,27.966667,9.0,3.0
5,5,9422190,2.458333,11.541667,Benin,BEN,0.0,3.6e-05,862.173102,Banikoara,...,2416.0,4052.0,8.0,66.0,73.583336,19364.75,294,28.033335,9.0,2.8
6,6,9422191,2.541667,11.541667,Benin,BEN,0.0,3.6e-05,948.187899,Banikoara,...,1788.0,2966.0,8.0,64.0,73.166664,19296.584,286,28.133331,9.0,2.8
7,7,9422192,2.625,11.541667,Benin,BEN,0.0,3.6e-05,907.258925,Banikoara,...,1900.0,3016.0,8.0,64.0,73.333336,19269.916,275,28.025002,9.0,2.8
8,8,9432188,2.291667,11.458333,Benin,BEN,0.0,3.6e-05,28.991357,Banikoara,...,2535.0,4171.0,9.0,64.0,74.833336,19331.666,274,27.891665,9.0,3.0
9,9,9432189,2.375,11.458333,Benin,BEN,0.0,3.6e-05,890.205186,Banikoara,...,2151.0,3890.0,7.0,64.0,74.75,19284.584,288,27.966667,6.0,2.9


In [330]:
#Show columns
flat_input.columns

Index(['Unnamed: 0', 'alloc_key', 'lon', 'lat', 'country', 'c_code',
       'statearea_ha', 'district_area_ha', 'harv_area_ha', 'admin2',
       'geometry', 'Harvest_Area_By_District', 'iso3', 'prod_level', 'cell5m',
       'harea_spam_ha', 'shareofsum', 'prod_tonnes', 'Test', 'shareofdistrict',
       'Production_kg', 'yield', 'perimeter', 'BDRICM', 'BLD', 'CLYPPT',
       'DRAINFAO', 'GFSAD-landcover', 'landcover', 'MODIS-Band2 (1)',
       'MODIS-Band2', 'modis-evi', 'modis-ndvi', 'ORCDRC', 'PHIHOX', 'prec',
       'srad', 'SRTM-elevation', 'tavg', 'TEXMHT', 'wind'],
      dtype='object')

In [331]:
#Delete geometry column
flat_input = flat_input.drop(["geometry","Test"], axis=1)

#### Convert values in newly added columns into float 

In [332]:
flat_input["harv_area_ha"] = flat_input["harv_area_ha"].astype(float)
flat_input["BDRICM"] = flat_input["BDRICM"].astype(float)
flat_input["BLD"] = flat_input["BLD"].astype(float)
flat_input["CLYPPT"] = flat_input["CLYPPT"].astype(float)
flat_input["DRAINFAO"] = flat_input["DRAINFAO"].astype(float)
flat_input["GFSAD-landcover"] = flat_input["GFSAD-landcover"].astype(float)
flat_input["landcover"] = flat_input["landcover"].astype(float)
flat_input["MODIS-Band2 (1)"] = flat_input["MODIS-Band2 (1)"].astype(float)
flat_input["modis-evi"] = flat_input["modis-evi"].astype(float)
flat_input["modis-ndvi"] = flat_input["modis-ndvi"].astype(float)
flat_input["ORCDRC"] = flat_input["ORCDRC"].astype(float)
flat_input["PHIHOX"] = flat_input["PHIHOX"].astype(float)
flat_input["prec"] = flat_input["prec"].astype(float)
flat_input["srad"] = flat_input["srad"].astype(float)
flat_input["SRTM-elevation"] = flat_input["SRTM-elevation"].astype(float)
flat_input["tavg"] = flat_input["tavg"].astype(float)
flat_input["TEXMHT"] = flat_input["TEXMHT"].astype(float)
flat_input["wind"] = flat_input["wind"].astype(float)

In [333]:
flat_input.dtypes

Unnamed: 0                    int64
alloc_key                     int64
lon                         float64
lat                         float64
country                      object
c_code                       object
statearea_ha                float64
district_area_ha            float64
harv_area_ha                float64
admin2                       object
Harvest_Area_By_District    float64
iso3                         object
prod_level                   object
cell5m                        int64
harea_spam_ha               float64
shareofsum                  float64
prod_tonnes                 float64
shareofdistrict             float64
Production_kg               float64
yield                       float64
perimeter                   float64
BDRICM                      float64
BLD                         float64
CLYPPT                      float64
DRAINFAO                    float64
GFSAD-landcover             float64
landcover                   float64
MODIS-Band2 (1)             

#### Normalize EVI and NDVI 
See [reference](https://vip.arizona.edu/documents/MODIS/MODIS_VI_UsersGuide_June_2015_C6.pdf) page 9

In [334]:
flat_input['modis-evi'] = flat_input['modis-evi']*0.0001
flat_input['modis-ndvi'] = flat_input['modis-ndvi']*0.0001

In [335]:
flat_input.head()

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,statearea_ha,district_area_ha,harv_area_ha,admin2,...,modis-evi,modis-ndvi,ORCDRC,PHIHOX,prec,srad,SRTM-elevation,tavg,TEXMHT,wind
0,0,9412189,2.375,11.625,Benin,BEN,0.0,3.6e-05,605.834085,Banikoara,...,0.1799,0.2839,9.0,66.0,71.666664,19468.834,274.0,28.008337,7.0,2.9
1,1,9412190,2.458333,11.625,Benin,BEN,0.0,3.6e-05,854.285747,Banikoara,...,0.2177,0.3512,7.0,65.0,72.0,19466.334,301.0,27.950003,9.0,2.8
2,2,9412191,2.541667,11.625,Benin,BEN,0.0,3.6e-05,619.796834,Banikoara,...,0.214,0.3783,11.0,64.0,71.833336,19471.084,311.0,28.083336,9.0,3.1
3,3,9422188,2.291667,11.541667,Benin,BEN,0.0,3.6e-05,0.0,Banikoara,...,0.2412,0.4778,10.0,65.0,73.083336,19440.5,267.0,27.900002,7.0,3.0
4,4,9422189,2.375,11.541667,Benin,BEN,0.0,3.6e-05,645.164271,Banikoara,...,0.2311,0.3777,8.0,65.0,73.333336,19390.166,316.0,27.966667,9.0,3.0


#### Drop  areas that are indicated as non-cropland in Global Food Security-support Analysis Data at 30m(GFSAD30) [here](https://developers.google.com/earth-engine/datasets/catalog/USGS_GFSAD1000_V0#bands)

In [336]:
flat_input.drop(flat_input[flat_input["GFSAD-landcover"] == 0].index, inplace=True)
flat_input["GFSAD-landcover"]

0       8.0
1       8.0
2       8.0
3       8.0
4       8.0
       ... 
1178    6.0
1179    6.0
1180    6.0
1181    6.0
1182    6.0
Name: GFSAD-landcover, Length: 517, dtype: float64

#### Fixing out-of-range values

In [337]:
# remove blank spaces in state names    
flat_input["country"].replace('\s+', '_',regex=True,inplace=True)
flat_input["country"].replace("'", '_',regex=True,inplace=True)

In [338]:
# Re-indexing allocation keys to avoid duplicates
flat_input = flat_input.assign(alloc_key=np.arange(len(flat_input))).reset_index(drop=True)

In [339]:
# Turning NaN rows to 0
flat_input.fillna(0,inplace=True)

In [340]:
# Turn all negative values to 0
pred_columns = ['tavg', 'srad', 'prec', 'wind', 'PHIHOX',
                'BDRICM', 'BLD', 'CLYPPT', 'TEXMHT', 'ORCDRC',
                'DRAINFAO', 'MODIS-Band2 (1)', 'SRTM-elevation']

for col in pred_columns:
    mvalue = flat_input[col].mean()
    print (mvalue)
    flat_input[col][flat_input[col]<0] = 0

27.305867114119923
17629.828321083172
86.23613807350097
2.151644100580271
61.963249516441
141.53191489361703
1356.7117988394584
18.14119922630561
8.088974854932301
12.938104448742747
3.7214700193423598
3413.6754096399613
248.45261121856868


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [341]:
for col in pred_columns:
    mvalue = flat_input[col].mean()
    print (mvalue)

27.305867114119923
17629.828321083172
86.23613807350097
2.151644100580271
61.963249516441
141.53191489361703
1356.7117988394584
18.14119922630561
8.088974854932301
12.938104448742747
3.7214700193423598
3413.6754096399613
248.45261121856868


In [342]:
flat_input.columns

Index(['Unnamed: 0', 'alloc_key', 'lon', 'lat', 'country', 'c_code',
       'statearea_ha', 'district_area_ha', 'harv_area_ha', 'admin2',
       'Harvest_Area_By_District', 'iso3', 'prod_level', 'cell5m',
       'harea_spam_ha', 'shareofsum', 'prod_tonnes', 'shareofdistrict',
       'Production_kg', 'yield', 'perimeter', 'BDRICM', 'BLD', 'CLYPPT',
       'DRAINFAO', 'GFSAD-landcover', 'landcover', 'MODIS-Band2 (1)',
       'MODIS-Band2', 'modis-evi', 'modis-ndvi', 'ORCDRC', 'PHIHOX', 'prec',
       'srad', 'SRTM-elevation', 'tavg', 'TEXMHT', 'wind'],
      dtype='object')

In [343]:
flat_input = change_column_order(flat_input, 'alloc_key', 0)
flat_input = change_column_order(flat_input, 'admin2', 1)
flat_input = change_column_order(flat_input, 'lon', 2)
flat_input = change_column_order(flat_input, 'lat', 3)
flat_input = change_column_order(flat_input, 'statearea_ha', 4)
flat_input = change_column_order(flat_input, 'harv_area_ha', 5)
flat_input = change_column_order(flat_input, 'country', 6)
flat_input = change_column_order(flat_input, 'c_code', 7)

flat_input = change_column_order(flat_input, 'district_area_ha',8 )


flat_input = change_column_order(flat_input, 'prod_tonnes', 12)

flat_input = change_column_order(flat_input, 'tavg', 13)
flat_input = change_column_order(flat_input, 'srad', 14)
flat_input = change_column_order(flat_input, 'prec', 15)
flat_input = change_column_order(flat_input, 'wind', 16)

flat_input = change_column_order(flat_input, 'PHIHOX', 17)
flat_input = change_column_order(flat_input, 'BDRICM', 18)
flat_input = change_column_order(flat_input, 'BLD', 19)
flat_input = change_column_order(flat_input, 'CLYPPT', 20)
flat_input = change_column_order(flat_input, 'TEXMHT', 21)
flat_input = change_column_order(flat_input, 'ORCDRC', 22)
flat_input = change_column_order(flat_input, 'DRAINFAO', 23)

flat_input = change_column_order(flat_input, 'modis-evi', 24)
flat_input = change_column_order(flat_input, 'modis-ndvi', 25)

flat_input = change_column_order(flat_input, 'MODIS-Band2 (1)', 26)
flat_input = change_column_order(flat_input, 'SRTM-elevation', 27)
flat_input = change_column_order(flat_input, 'landcover', 28)
flat_input = change_column_order(flat_input, 'GFSAD-landcover', 28)

In [344]:
flat_input.head()

Unnamed: 0.1,alloc_key,admin2,lon,lat,statearea_ha,harv_area_ha,country,c_code,district_area_ha,Unnamed: 0,...,landcover,prod_level,cell5m,harea_spam_ha,shareofsum,shareofdistrict,Production_kg,yield,perimeter,MODIS-Band2
0,0,Banikoara,2.375,11.625,0.0,605.834085,Benin,BEN,3.6e-05,0,...,9.0,BN01001,4062988,568.4,0.000523,16701680.0,748784.6,1235.956605,0.0,3102.28571429
1,1,Banikoara,2.458333,11.625,0.0,854.285747,Benin,BEN,3.6e-05,1,...,14.0,BN01001,4062989,801.5,0.000738,23551010.0,1055860.0,1235.956605,0.0,3361.16666667
2,2,Banikoara,2.541667,11.625,0.0,619.796834,Benin,BEN,3.6e-05,2,...,9.0,BN01001,4062990,581.5,0.000535,17086600.0,766042.0,1235.956605,0.0,3025.16666667
3,3,Banikoara,2.291667,11.541667,0.0,0.0,Benin,BEN,3.6e-05,3,...,9.0,BN01001,4067307,0.0,0.0,0.0,0.0,0.0,0.0,2864.66666667
4,4,Banikoara,2.375,11.541667,0.0,645.164271,Benin,BEN,3.6e-05,4,...,9.0,BN01001,4067308,605.3,0.000557,17785930.0,797395.0,1235.956605,0.0,2916.72727273


In [345]:
flat_input.columns

Index(['alloc_key', 'admin2', 'lon', 'lat', 'statearea_ha', 'harv_area_ha',
       'country', 'c_code', 'district_area_ha', 'Unnamed: 0',
       'Harvest_Area_By_District', 'iso3', 'prod_tonnes', 'tavg', 'srad',
       'prec', 'wind', 'PHIHOX', 'BDRICM', 'BLD', 'CLYPPT', 'TEXMHT', 'ORCDRC',
       'DRAINFAO', 'modis-evi', 'modis-ndvi', 'MODIS-Band2 (1)',
       'SRTM-elevation', 'GFSAD-landcover', 'landcover', 'prod_level',
       'cell5m', 'harea_spam_ha', 'shareofsum', 'shareofdistrict',
       'Production_kg', 'yield', 'perimeter', 'MODIS-Band2'],
      dtype='object')

#### Final column fixing

In [346]:
# give crop name
crop_modelled ="Maize"

# Dropping columns
flat_input = flat_input.drop(["Unnamed: 0"], axis=1)
flat_input = flat_input.drop(["prod_level"], axis=1)
flat_input = flat_input.drop(["iso3"], axis=1)
flat_input = flat_input.drop(["harea_spam_ha"], axis=1)
flat_input = flat_input.drop(["c_code"], axis=1)
flat_input = flat_input.drop(["landcover"], axis=1)
flat_input = flat_input.drop(["shareofsum"], axis=1)  
flat_input = flat_input.drop(["district_area_ha"], axis=1)  
flat_input = flat_input.drop(["shareofdistrict"], axis=1)
flat_input = flat_input.drop(["prod_tonnes"],axis = 1)
flat_input = flat_input.drop(["country"],axis = 1)
flat_input = flat_input.drop(["Harvest_Area_By_District"],axis = 1)
flat_input = flat_input.drop(["cell5m"],axis = 1)
flat_input = flat_input.drop(["Production_kg"],axis = 1)
flat_input = flat_input.drop(["yield"],axis = 1)
flat_input = flat_input.drop(["perimeter"],axis = 1)
flat_input = flat_input.drop(["MODIS-Band2"],axis = 1)

#Remaning columns
flat_input.rename(columns={'statearea_ha': 'statearea'}, inplace=True)
flat_input.rename(columns={'harv_area_ha': 'maize'}, inplace=True)
flat_input.rename(columns={'admin_2': 'NAME'}, inplace=True)

In [347]:
flat_input

Unnamed: 0,alloc_key,admin2,lon,lat,statearea,maize,tavg,srad,prec,wind,...,BLD,CLYPPT,TEXMHT,ORCDRC,DRAINFAO,modis-evi,modis-ndvi,MODIS-Band2 (1),SRTM-elevation,GFSAD-landcover
0,0,Banikoara,2.375000,11.625000,0.0,605.834085,28.008337,19468.834,71.666664,2.9,...,1520.0,22.0,7.0,9.0,3.0,0.1799,0.2839,3102.285714,274.0,8.0
1,1,Banikoara,2.458333,11.625000,0.0,854.285747,27.950003,19466.334,72.000000,2.8,...,1440.0,16.0,9.0,7.0,4.0,0.2177,0.3512,3361.166667,301.0,8.0
2,2,Banikoara,2.541667,11.625000,0.0,619.796834,28.083336,19471.084,71.833336,3.1,...,1430.0,17.0,9.0,11.0,5.0,0.2140,0.3783,3025.166667,311.0,8.0
3,3,Banikoara,2.291667,11.541667,0.0,0.000000,27.900002,19440.500,73.083336,3.0,...,1400.0,20.0,7.0,10.0,3.0,0.2412,0.4778,2943.500000,267.0,8.0
4,4,Banikoara,2.375000,11.541667,0.0,645.164271,27.966667,19390.166,73.333336,3.0,...,1500.0,19.0,9.0,8.0,5.0,0.2311,0.3777,3050.400000,316.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,512,Zogbodome,2.041667,6.958333,0.0,1173.936771,27.175001,16438.750,81.583336,1.6,...,1380.0,31.0,6.0,24.0,3.0,0.4363,0.6668,4164.045455,55.0,6.0
513,513,Zogbodome,2.125000,6.958333,0.0,0.000000,26.999998,16449.750,82.916664,1.5,...,1470.0,40.0,4.0,29.0,5.0,0.4245,0.6853,4127.800000,67.0,6.0
514,514,Zogbodome,2.208333,6.958333,0.0,130.674277,27.275002,16422.334,83.500000,1.5,...,1440.0,49.0,1.0,26.0,4.0,0.4340,0.6863,4125.818182,42.0,6.0
515,515,Zogbodome,2.291667,6.958333,0.0,0.000000,27.424997,16209.833,85.666664,1.3,...,1490.0,33.0,6.0,28.0,3.0,0.4467,0.5948,4194.375000,32.0,6.0


In [348]:
flat_input.columns

Index(['alloc_key', 'admin2', 'lon', 'lat', 'statearea', 'maize', 'tavg',
       'srad', 'prec', 'wind', 'PHIHOX', 'BDRICM', 'BLD', 'CLYPPT', 'TEXMHT',
       'ORCDRC', 'DRAINFAO', 'modis-evi', 'modis-ndvi', 'MODIS-Band2 (1)',
       'SRTM-elevation', 'GFSAD-landcover'],
      dtype='object')

In [349]:
# This part prints full results

#path = r"N:\Agrodem\Downscaling\Output_Data\FLAT_input"
path =r"C:\Oluchi\Downscaling\Output_Data\FLAT_Input"
name_of_flat_input_file = "flat_input_Maize_10km"

flat_input.to_csv(os.path.join(path,"{c}.csv".format(c=name_of_flat_input_file)), index=False)