# FLAT input data prepping

This code prepares the final input file to the FLAT model. 

**Original code:** [Konstantinos Pegios](https://github.com/kopegios) <br />
**Conceptualization & Methodological review :** [Alexandros Korkovelos](https://github.com/akorkovelos) & [Konstantinos Pegios](https://github.com/kopegios)<br />
**Updates, Modifications:** [Alexandros Korkovelos](https://github.com/akorkovelos) & [Oluchi Monwe](https://github.com/oluchee)<br />
**Funding:** The World Bank (contract number: 7190531), [KTH](https://www.kth.se/en/itm/inst/energiteknik/forskning/desa/welcome-to-the-unit-of-energy-systems-analysis-kth-desa-1.197296)

In [34]:
#Import modules and libraries

import geopandas as gpd
import rasterio as rio
import pandas as pd
import fiona
import gdal
import osr
import ogr
import rasterio.mask
import time
import os
import ogr, gdal, osr, os
import numpy as np
import itertools
import re

from osgeo import gdal,ogr
import struct
import csv

import glob

from rasterio.warp import calculate_default_transform, reproject
from rasterio.enums import Resampling
from rasterstats import point_query, zonal_stats
from pyproj import Proj
from shapely.geometry import Point, Polygon

In [35]:
# Import data 

#path = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops"
path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data\Crop_Maps"
name_of_file = "Ben_Maize_2018_admin2.csv"

In [36]:
# Import csv as pandas dataframe
df = pd.read_csv(path + "\\" + name_of_file)
df.head(3)

Unnamed: 0.1,Unnamed: 0,country_code,country,admin2,geometry,Harvest_Area_By_District,iso3,prod_level,alloc_key,cell5m,x,y,harea_spam_ha,shareofsum,harv_area_ha,prod_tonnes,district_area_ha,Test,shareofdistrict
0,0,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412189,4062988,2.375,11.625,568.4,0.000523,605.834085,748.784639,437677.965476,True,0.001384
1,1,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412190,4062989,2.458333,11.625,801.5,0.000738,854.285747,1055.860113,437677.965476,True,0.001952
2,2,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412191,4062990,2.541667,11.625,581.5,0.000535,619.796834,766.041991,437677.965476,True,0.001416


In [37]:
df.columns

Index(['Unnamed: 0', 'country_code', 'country', 'admin2', 'geometry',
       'Harvest_Area_By_District', 'iso3', 'prod_level', 'alloc_key', 'cell5m',
       'x', 'y', 'harea_spam_ha', 'shareofsum', 'harv_area_ha', 'prod_tonnes',
       'district_area_ha', 'Test', 'shareofdistrict'],
      dtype='object')

In [38]:
df.rename(columns={'x': 'lon'}, inplace=True)
df.rename(columns={'y': 'lat'}, inplace=True)
#df.rename(columns={'area_ha': 'statearea_ha'}, inplace=True)
df.rename(columns={'country_code': 'c_code'}, inplace=True)

In [39]:
df.head()

Unnamed: 0.1,Unnamed: 0,c_code,country,admin2,geometry,Harvest_Area_By_District,iso3,prod_level,alloc_key,cell5m,lon,lat,harea_spam_ha,shareofsum,harv_area_ha,prod_tonnes,district_area_ha,Test,shareofdistrict
0,0,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412189,4062988,2.375,11.625,568.4,0.000523,605.834085,748.784639,437677.965476,True,0.001384
1,1,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412190,4062989,2.458333,11.625,801.5,0.000738,854.285747,1055.860113,437677.965476,True,0.001952
2,2,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9412191,4062990,2.541667,11.625,581.5,0.000535,619.796834,766.041991,437677.965476,True,0.001416
3,3,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9422188,4067307,2.291667,11.541667,0.0,0.0,0.0,0.0,437677.965476,True,0.0
4,4,BEN,Benin,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,9422189,4067308,2.375,11.541667,605.3,0.000557,645.164271,797.395042,437677.965476,True,0.001474


In [40]:
#Function to change the order of columns in the dataframe 
def change_column_order(df, col_name, index):
    cols = df.columns.tolist()
    cols.remove(col_name)
    cols.insert(index, col_name)
    return df[cols]

In [41]:
# Allocate order in dataframe columns
df = change_column_order(df, 'alloc_key', 1)
df = change_column_order(df, 'lon', 2)
df = change_column_order(df, 'lat', 3)
df = change_column_order(df, 'country', 4)
df = change_column_order(df, 'c_code', 5)
df = change_column_order(df, 'district_area_ha', 6)
df = change_column_order(df, 'harv_area_ha', 7)

In [42]:
df.head()

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,district_area_ha,harv_area_ha,admin2,geometry,Harvest_Area_By_District,iso3,prod_level,cell5m,harea_spam_ha,shareofsum,prod_tonnes,Test,shareofdistrict
0,0,9412189,2.375,11.625,Benin,BEN,437677.965476,605.834085,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,4062988,568.4,0.000523,748.784639,True,0.001384
1,1,9412190,2.458333,11.625,Benin,BEN,437677.965476,854.285747,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,4062989,801.5,0.000738,1055.860113,True,0.001952
2,2,9412191,2.541667,11.625,Benin,BEN,437677.965476,619.796834,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,4062990,581.5,0.000535,766.041991,True,0.001416
3,3,9422188,2.291667,11.541667,Benin,BEN,437677.965476,0.0,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,4067307,0.0,0.0,0.0,True,0.0
4,4,9422189,2.375,11.541667,Benin,BEN,437677.965476,645.164271,Banikoara,"POLYGON ((483285.0952511251 1262585.663534007,...",40793.503413,BEN,BN01001,4067308,605.3,0.000557,797.395042,True,0.001474


In [43]:
# Add geometry and convert to spatial dataframe in source CRS
df['geometry'] = list(zip(df['lon'], df['lat']))
df['geometry'] = df['geometry'].apply(Point)
df = gpd.GeoDataFrame(df, geometry='geometry', crs={'init': 'epsg:3395'})

# Reproject data in to Ordnance Survey GB coordinates
df_osgb = df.to_crs({'init': 'epsg:3395'})

In [44]:
df_osgb.head()

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,district_area_ha,harv_area_ha,admin2,geometry,Harvest_Area_By_District,iso3,prod_level,cell5m,harea_spam_ha,shareofsum,prod_tonnes,Test,shareofdistrict
0,0,9412189,2.375,11.625,Benin,BEN,437677.965476,605.834085,Banikoara,POINT (2.37500 11.62500),40793.503413,BEN,BN01001,4062988,568.4,0.000523,748.784639,True,0.001384
1,1,9412190,2.458333,11.625,Benin,BEN,437677.965476,854.285747,Banikoara,POINT (2.45833 11.62500),40793.503413,BEN,BN01001,4062989,801.5,0.000738,1055.860113,True,0.001952
2,2,9412191,2.541667,11.625,Benin,BEN,437677.965476,619.796834,Banikoara,POINT (2.54167 11.62500),40793.503413,BEN,BN01001,4062990,581.5,0.000535,766.041991,True,0.001416
3,3,9422188,2.291667,11.541667,Benin,BEN,437677.965476,0.0,Banikoara,POINT (2.29167 11.54167),40793.503413,BEN,BN01001,4067307,0.0,0.0,0.0,True,0.0
4,4,9422189,2.375,11.541667,Benin,BEN,437677.965476,645.164271,Banikoara,POINT (2.37500 11.54167),40793.503413,BEN,BN01001,4067308,605.3,0.000557,797.395042,True,0.001474


<div class="alert alert-block alert-warning">
<b>Note:</b> The geodataframe, contains crop points with needed attributes including a geometry column. It needs to be exported as shapefile. However, when this is done with geopandas, the column geometry (Point(x,y)) is not transfered in the attribute table. This is however needed for the extraction process below, otherwise "None" values are returned. In order to overcome this issue, the csv is imported into Qgis and then exported as feature layer in the same folder. This solves the issue. 
</div>

In [45]:
# Define output path
# Overwriting the csv file
#path = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops"
path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data"
shpname = "Ben_Maize_Harv_2018_admin2_10km_new.csv"

#MAIZE
df_osgb.to_csv(os.path.join(path,"{c}".format(c=shpname)), index=False)
df_osgb.to_file('Ben_Maize_Harv_2018_admin2_10km_new.shp',driver = 'ESRI Shapefile')
# Define output path
#path = r"N:\Agrodem\Downscaling\Output_Data\Crop_Maps"
#path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data\Crop_Maps"

#Maize


#### >>> Please open Qgis and act as suggested above<<<

In [46]:
y = gpd.read_file(r'C:\Github\agrodem_new\agrodem_preprocessing\Downscaling\Downscaling_prepping\Ben_Maize_Harv_2018_admin2_10km_new.shp')

In [33]:
y.head()

Unnamed: 0,Unnamed_ 0,alloc_key,lon,lat,country,c_code,district_a,harv_area_,admin2,Harvest_Ar,iso3,prod_level,cell5m,harea_spam,shareofsum,prod_tonne,Test,shareofdis,geometry
0,0,9412189,2.375,11.625,Benin,BEN,437677.965476,605.834085,Banikoara,40793.503413,BEN,BN01001,4062988,568.4,0.000523,748.784639,1,0.001384,POINT (2.37500 11.62500)
1,1,9412190,2.458333,11.625,Benin,BEN,437677.965476,854.285747,Banikoara,40793.503413,BEN,BN01001,4062989,801.5,0.000738,1055.860113,1,0.001952,POINT (2.45833 11.62500)
2,2,9412191,2.541667,11.625,Benin,BEN,437677.965476,619.796834,Banikoara,40793.503413,BEN,BN01001,4062990,581.5,0.000535,766.041991,1,0.001416,POINT (2.54167 11.62500)
3,3,9422188,2.291667,11.541667,Benin,BEN,437677.965476,0.0,Banikoara,40793.503413,BEN,BN01001,4067307,0.0,0.0,0.0,1,0.0,POINT (2.29167 11.54167)
4,4,9422189,2.375,11.541667,Benin,BEN,437677.965476,645.164271,Banikoara,40793.503413,BEN,BN01001,4067308,605.3,0.000557,797.395042,1,0.001474,POINT (2.37500 11.54167)


## Extracting raster input to csv 

In [55]:
#raster_path = r"N:\Agrodem\Downscaling\Input_Data\rasters"
raster_path = r"C:\Oluchi\Downscaling\Input_Data\raster_reprojected"
raster_files =[]
#csvoutpath = r"N:\Agrodem\Downscaling\Output_Data\FLAT_input\1km_Rice"
csvoutpath = r"C:\Oluchi\Downscaling\Output_Data\FLAT_Input\10km_Maize"

print ("Reading independent variables...")

for i in os.listdir(raster_path):
    if i.endswith('.tif'):
        raster_files.append(i)  

#shp_filename = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops\shapefiles\Moz_Rice_Harv_2000_admin2_1km.shp"
shp_filename = r'C:\Github\agrodem_new\agrodem_preprocessing\Downscaling\Downscaling_prepping\Ben_Maize_Harv_2018_admin2_10km_new.shp'
print ("Extracting raster values to points...")

for i in raster_files:
    print("Extracting " + i + " values...")
    src_filename = raster_path + "\\" + i 
    li_values = list()

    src_ds=gdal.Open(src_filename) 
    gt=src_ds.GetGeoTransform()
    rb=src_ds.GetRasterBand(1)
    ds=ogr.Open(shp_filename)    
    lyr=ds.GetLayer()
    
    
    for feat in lyr:
        geom = feat.GetGeometryRef()
        feat_id = feat.GetField('alloc_key')
        mx,my=geom.GetX(), geom.GetY()  #coord in map units

        #Convert from map to pixel coordinates.
        #Only works for geotransforms with no rotation.
        px = int((mx - gt[0]) / gt[1]) #x pixel
        py = int((my - gt[3]) / gt[5]) #y pixel

        intval=rb.ReadAsArray(px,py,1,1)
        li_values.append([feat_id, intval[0]])
        
    print ("Writing " + i + " values to csv...")
    
    with open(csvoutpath + "\\" + i.split('.')[0] + '.csv', 'w') as csvfile:
        wr = csv.writer(csvfile)
        wr.writerows(li_values)

Reading independent variables...
Extracting raster values to points...
Extracting BDRICM.tif values...
Writing BDRICM.tif values to csv...
Extracting BLD.tif values...
Writing BLD.tif values to csv...
Extracting CLYPPT.tif values...
Writing CLYPPT.tif values to csv...
Extracting DRAINFAO.tif values...
Writing DRAINFAO.tif values to csv...
Extracting GFSAD-landcover.tif values...
Writing GFSAD-landcover.tif values to csv...
Extracting landcover.tif values...
Writing landcover.tif values to csv...
Extracting MODIS-Band2 (1).tif values...
Writing MODIS-Band2 (1).tif values to csv...
Extracting modis-evi.tif values...
Writing modis-evi.tif values to csv...
Extracting modis-ndvi.tif values...
Writing modis-ndvi.tif values to csv...
Extracting ORCDRC.tif values...
Writing ORCDRC.tif values to csv...
Extracting PHIHOX.tif values...
Writing PHIHOX.tif values to csv...
Extracting prec.tif values...
Writing prec.tif values to csv...
Extracting srad.tif values...
Writing srad.tif values to csv...

## Merge csv files with crop

In [56]:
#Import data 

#path = r"N:\Agrodem\Downscaling\Output_Data\FAO_AgroMap_Crops"
path = r"C:\Benin\agrodem_preprocessing\Downscaling\Output_Data"
name_of_file = "Ben_Maize_Harv_2018_admin2_10km_new.csv"

flat_input = pd.read_csv(path + "\\" + name_of_file)

In [57]:
#csvoutpath = r"N:\Agrodem\Downscaling\Output_Data\FLAT_input\1km_Rice"
csvoutpath = r"C:\Oluchi\Downscaling\Output_Data\FLAT_Input\10km_Maize"
csv_files = []

print ("Reading csv files...")

for i in os.listdir(csvoutpath):
    if i.endswith('.csv'):
        csv_files.append(i)  

for i in csv_files:
    print('Reading...'+ i)    
    df_csv = pd.read_csv(csvoutpath + "//" + i, index_col=None, header=None)
    df_csv.iloc[:,1] = df_csv.iloc[:,1].astype(str)
    df_csv.iloc[:,1] = df_csv.iloc[:,1].str.replace('[','')
    df_csv.iloc[:,1] = df_csv.iloc[:,1].str.replace(']','')
    columnName = i.split('.')[0]
    print("Merging..." + columnName)
    flat_input[columnName] = df_csv.iloc[:,1]

Reading csv files...
Reading...BDRICM.csv
Merging...BDRICM
Reading...BLD.csv
Merging...BLD
Reading...CLYPPT.csv
Merging...CLYPPT
Reading...DRAINFAO.csv
Merging...DRAINFAO
Reading...GFSAD-landcover.csv
Merging...GFSAD-landcover
Reading...landcover.csv
Merging...landcover
Reading...MODIS-Band2 (1).csv
Merging...MODIS-Band2 (1)
Reading...modis-evi.csv
Merging...modis-evi
Reading...modis-ndvi.csv
Merging...modis-ndvi
Reading...ORCDRC.csv
Merging...ORCDRC
Reading...PHIHOX.csv
Merging...PHIHOX
Reading...prec.csv
Merging...prec
Reading...srad.csv
Merging...srad
Reading...SRTM-elevation.csv
Merging...SRTM-elevation
Reading...tavg.csv
Merging...tavg
Reading...TEXMHT.csv
Merging...TEXMHT
Reading...wind.csv
Merging...wind


## Cleaning and normalizing dataframe

In [58]:
#Show columns
flat_input.columns

Index(['Unnamed: 0', 'alloc_key', 'lon', 'lat', 'country', 'c_code',
       'district_area_ha', 'harv_area_ha', 'admin2', 'geometry',
       'Harvest_Area_By_District', 'iso3', 'prod_level', 'cell5m',
       'harea_spam_ha', 'shareofsum', 'prod_tonnes', 'Test', 'shareofdistrict',
       'BDRICM', 'BLD', 'CLYPPT', 'DRAINFAO', 'GFSAD-landcover', 'landcover',
       'MODIS-Band2 (1)', 'modis-evi', 'modis-ndvi', 'ORCDRC', 'PHIHOX',
       'prec', 'srad', 'SRTM-elevation', 'tavg', 'TEXMHT', 'wind'],
      dtype='object')

In [59]:

flat_input.head(10)

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,district_area_ha,harv_area_ha,admin2,geometry,...,modis-evi,modis-ndvi,ORCDRC,PHIHOX,prec,srad,SRTM-elevation,tavg,TEXMHT,wind
0,0,9412189,2.375,11.625,Benin,BEN,437677.965476,605.834085,Banikoara,POINT (2.374999999 11.625),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
1,1,9412190,2.458333,11.625,Benin,BEN,437677.965476,854.285747,Banikoara,POINT (2.458333333 11.625),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
2,2,9412191,2.541667,11.625,Benin,BEN,437677.965476,619.796834,Banikoara,POINT (2.541666666 11.625),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
3,3,9422188,2.291667,11.541667,Benin,BEN,437677.965476,0.0,Banikoara,POINT (2.291666666 11.54166667),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
4,4,9422189,2.375,11.541667,Benin,BEN,437677.965476,645.164271,Banikoara,POINT (2.374999999 11.54166667),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
5,5,9422190,2.458333,11.541667,Benin,BEN,437677.965476,862.173102,Banikoara,POINT (2.458333333 11.54166667),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
6,6,9422191,2.541667,11.541667,Benin,BEN,437677.965476,948.187899,Banikoara,POINT (2.541666666 11.54166667),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
7,7,9422192,2.625,11.541667,Benin,BEN,437677.965476,907.258925,Banikoara,POINT (2.624999999 11.54166667),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
8,8,9432188,2.291667,11.458333,Benin,BEN,437677.965476,28.991357,Banikoara,POINT (2.291666666 11.45833333),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0
9,9,9432189,2.375,11.458333,Benin,BEN,437677.965476,890.205186,Banikoara,POINT (2.374999999 11.45833333),...,,,0.0,0.0,0.0,0.0,0,-inf,0.0,0.0


In [65]:
#Delete geometry column
flat_input = flat_input.drop(["Geometry","Test"], axis=1)

#### Convert values in newly added columns into float 

In [66]:
flat_input["harv_area_ha"] = flat_input["harv_area_ha"].astype(float)
flat_input["BDRICM"] = flat_input["BDRICM"].astype(float)
flat_input["BLD"] = flat_input["BLD"].astype(float)
flat_input["CLYPPT"] = flat_input["CLYPPT"].astype(float)
flat_input["DRAINFAO"] = flat_input["DRAINFAO"].astype(float)
flat_input["GFSAD-landcover"] = flat_input["GFSAD-landcover"].astype(float)
flat_input["landcover"] = flat_input["landcover"].astype(float)
flat_input["MODIS-Band2 (1)"] = flat_input["MODIS-Band2 (1)"].astype(float)
flat_input["modis-evi"] = flat_input["modis-evi"].astype(float)
flat_input["modis-ndvi"] = flat_input["modis-ndvi"].astype(float)
flat_input["ORCDRC"] = flat_input["ORCDRC"].astype(float)
flat_input["PHIHOX"] = flat_input["PHIHOX"].astype(float)
flat_input["prec"] = flat_input["prec"].astype(float)
flat_input["srad"] = flat_input["srad"].astype(float)
flat_input["SRTM-elevation"] = flat_input["SRTM-elevation"].astype(float)
flat_input["tavg"] = flat_input["tavg"].astype(float)
flat_input["TEXMHT"] = flat_input["TEXMHT"].astype(float)
flat_input["wind"] = flat_input["wind"].astype(float)

In [67]:
flat_input.dtypes

Unnamed: 0                    int64
alloc_key                     int64
lon                         float64
lat                         float64
country                      object
c_code                       object
district_area_ha            float64
harv_area_ha                float64
admin2                       object
Harvest_Area_By_District    float64
iso3                         object
prod_level                   object
cell5m                        int64
harea_spam_ha               float64
shareofsum                  float64
prod_tonnes                 float64
shareofdistrict             float64
BDRICM                      float64
BLD                         float64
CLYPPT                      float64
DRAINFAO                    float64
GFSAD-landcover             float64
landcover                   float64
MODIS-Band2 (1)             float64
modis-evi                   float64
modis-ndvi                  float64
ORCDRC                      float64
PHIHOX                      

#### Normalize EVI and NDVI 
See [reference](https://vip.arizona.edu/documents/MODIS/MODIS_VI_UsersGuide_June_2015_C6.pdf) page 9

In [68]:
flat_input['modis-evi'] = flat_input['modis-evi']*0.0001
flat_input['modis-ndvi'] = flat_input['modis-ndvi']*0.0001

In [69]:
flat_input.head()

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,district_area_ha,harv_area_ha,admin2,Harvest_Area_By_District,...,modis-evi,modis-ndvi,ORCDRC,PHIHOX,prec,srad,SRTM-elevation,tavg,TEXMHT,wind
0,0,9412189,2.375,11.625,Benin,BEN,437677.965476,605.834085,Banikoara,40793.503413,...,,,0.0,0.0,0.0,0.0,0.0,-inf,0.0,0.0
1,1,9412190,2.458333,11.625,Benin,BEN,437677.965476,854.285747,Banikoara,40793.503413,...,,,0.0,0.0,0.0,0.0,0.0,-inf,0.0,0.0
2,2,9412191,2.541667,11.625,Benin,BEN,437677.965476,619.796834,Banikoara,40793.503413,...,,,0.0,0.0,0.0,0.0,0.0,-inf,0.0,0.0
3,3,9422188,2.291667,11.541667,Benin,BEN,437677.965476,0.0,Banikoara,40793.503413,...,,,0.0,0.0,0.0,0.0,0.0,-inf,0.0,0.0
4,4,9422189,2.375,11.541667,Benin,BEN,437677.965476,645.164271,Banikoara,40793.503413,...,,,0.0,0.0,0.0,0.0,0.0,-inf,0.0,0.0


#### Drop  areas that are indicated as non-cropland in Global Food Security-support Analysis Data at 30m(GFSAD30) [here](https://developers.google.com/earth-engine/datasets/catalog/USGS_GFSAD1000_V0#bands)

In [70]:
flat_input.drop(flat_input[flat_input["GFSAD-landcover"] == 0].index, inplace=True)
flat_input["GFSAD-landcover"]

Series([], Name: GFSAD-landcover, dtype: float64)

#### Fixing out-of-range values

In [72]:
# remove blank spaces in state names    
flat_input["country"].replace('\s+', '_',regex=True,inplace=True)
flat_input["country"].replace("'", '_',regex=True,inplace=True)

Series([], Name: country, dtype: object)

In [73]:
# Re-indexing allocation keys to avoid duplicates
flat_input = flat_input.assign(alloc_key=np.arange(len(flat_input))).reset_index(drop=True)

In [74]:
# Turning NaN rows to 0
flat_input.fillna(0,inplace=True)

Unnamed: 0.1,Unnamed: 0,alloc_key,lon,lat,country,c_code,district_area_ha,harv_area_ha,admin2,Harvest_Area_By_District,...,modis-evi,modis-ndvi,ORCDRC,PHIHOX,prec,srad,SRTM-elevation,tavg,TEXMHT,wind


In [76]:
# Turn all negative values to 0
pred_columns = ['tavg', 'srad', 'prec', 'wind', 'PHIHOX',
                'BDRICM', 'BLD', 'CLYPPT', 'TEXMHT', 'ORCDRC',
                'DRAINFAO', 'MODIS-Band2 (1)', 'SRTM-elevation']

for col in pred_columns:
    mvalue = flat_input[col].mean()
    print (mvalue)
    flat_input[col][flat_input[col]<0] = 0

nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [77]:
for col in pred_columns:
    mvalue = flat_input[col].mean()
    print (mvalue)

nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan


In [78]:
flat_input.columns

Index(['Unnamed: 0', 'alloc_key', 'lon', 'lat', 'country', 'c_code',
       'district_area_ha', 'harv_area_ha', 'admin2',
       'Harvest_Area_By_District', 'iso3', 'prod_level', 'cell5m',
       'harea_spam_ha', 'shareofsum', 'prod_tonnes', 'shareofdistrict',
       'BDRICM', 'BLD', 'CLYPPT', 'DRAINFAO', 'GFSAD-landcover', 'landcover',
       'MODIS-Band2 (1)', 'modis-evi', 'modis-ndvi', 'ORCDRC', 'PHIHOX',
       'prec', 'srad', 'SRTM-elevation', 'tavg', 'TEXMHT', 'wind'],
      dtype='object')

In [84]:
flat_input = change_column_order(flat_input, 'alloc_key', 0)
flat_input = change_column_order(flat_input, 'country', 1)
flat_input = change_column_order(flat_input, 'lon', 2)
flat_input = change_column_order(flat_input, 'lat', 3)
flat_input = change_column_order(flat_input, 'country', 4)
flat_input = change_column_order(flat_input, 'c_code', 5)

flat_input = change_column_order(flat_input, 'district_area_ha', 6)
flat_input = change_column_order(flat_input, 'harv_area_ha', 10)

flat_input = change_column_order(flat_input, 'prod_tonnes', 12)

flat_input = change_column_order(flat_input, 'tavg', 13)
flat_input = change_column_order(flat_input, 'srad', 14)
flat_input = change_column_order(flat_input, 'prec', 15)
flat_input = change_column_order(flat_input, 'wind', 16)

flat_input = change_column_order(flat_input, 'PHIHOX', 17)
flat_input = change_column_order(flat_input, 'BDRICM', 18)
flat_input = change_column_order(flat_input, 'BLD', 19)
flat_input = change_column_order(flat_input, 'CLYPPT', 20)
flat_input = change_column_order(flat_input, 'TEXMHT', 21)
flat_input = change_column_order(flat_input, 'ORCDRC', 22)
flat_input = change_column_order(flat_input, 'DRAINFAO', 23)

flat_input = change_column_order(flat_input, 'modis-evi', 24)
flat_input = change_column_order(flat_input, 'modis-ndvi', 25)

flat_input = change_column_order(flat_input, 'MODIS-Band2 (1)', 26)
flat_input = change_column_order(flat_input, 'SRTM-elevation', 27)
flat_input = change_column_order(flat_input, 'landcover', 28)
flat_input = change_column_order(flat_input, 'GFSAD-landcover', 28)

In [85]:
flat_input.head()

Unnamed: 0.1,alloc_key,lon,lat,Unnamed: 0,country,c_code,district_area_ha,admin2,Harvest_Area_By_District,iso3,...,modis-evi,modis-ndvi,MODIS-Band2 (1),SRTM-elevation,GFSAD-landcover,landcover,cell5m,harea_spam_ha,shareofsum,shareofdistrict


In [86]:
flat_input.columns

Index(['alloc_key', 'lon', 'lat', 'Unnamed: 0', 'country', 'c_code',
       'district_area_ha', 'admin2', 'Harvest_Area_By_District', 'iso3',
       'harv_area_ha', 'prod_level', 'prod_tonnes', 'tavg', 'srad', 'prec',
       'wind', 'PHIHOX', 'BDRICM', 'BLD', 'CLYPPT', 'TEXMHT', 'ORCDRC',
       'DRAINFAO', 'modis-evi', 'modis-ndvi', 'MODIS-Band2 (1)',
       'SRTM-elevation', 'GFSAD-landcover', 'landcover', 'cell5m',
       'harea_spam_ha', 'shareofsum', 'shareofdistrict'],
      dtype='object')

#### Final column fixing

In [91]:
# give crop name
crop_modelled ="Maize"

# Dropping columns
flat_input = flat_input.drop(["country"], axis=1)
flat_input = flat_input.drop(["c_code"], axis=1)
flat_input = flat_input.drop(["year"], axis=1)
flat_input = flat_input.drop(["crop"], axis=1)
flat_input = flat_input.drop(["yield"], axis=1)
flat_input = flat_input.drop(["prduction_ha"], axis=1)
flat_input = flat_input.drop(["landcover"], axis=1)
flat_input = flat_input.drop(["harea_2000"], axis=1)
flat_input = flat_input.drop(["shareofsum"], axis=1)  
flat_input = flat_input.drop(["district_a"], axis=1)  
flat_input = flat_input.drop(["shareofdistrict"], axis=1)

#Remaning columns
flat_input.rename(columns={'state': 'NAME'}, inplace=True)
flat_input.rename(columns={'statearea_ha': 'statearea'}, inplace=True)
flat_input.rename(columns={'harv_area_ha': 'crop_modelled_area'}, inplace=True)


KeyError: "['c_code'] not found in axis"

In [92]:
flat_input

Unnamed: 0.1,alloc_key,lon,lat,Unnamed: 0,district_area_ha,admin2,Harvest_Area_By_District,iso3,harv_area_ha,prod_level,...,modis-evi,modis-ndvi,MODIS-Band2 (1),SRTM-elevation,GFSAD-landcover,landcover,cell5m,harea_spam_ha,shareofsum,shareofdistrict


In [None]:
flat_input.columns

In [93]:
# This part prints full results

#path = r"N:\Agrodem\Downscaling\Output_Data\FLAT_input"
path =r"C:\Oluchi\Downscaling\Output_Data\FLAT_Input"
name_of_flat_input_file = "flat_input_Maize_10km"

flat_input.to_csv(os.path.join(path,"{c}.csv".format(c=name_of_flat_input_file)), index=False)