# Creating input data

Here we import and filter the SPAM dataset so as to retrieve 10x10 km allocation of rainfed maize for a region.
Note that the "spam2010V1r1_global_H_TR.csv" file was selected - other options are available.

In [1]:
import os
import geopandas as gpd
import pandas as pd
# Import data 

#path = r"N:\Agrodem\spam2010v1r1_global_harv_area.csv"
path = r"C:\Users\oluchi\Downloads\spam2017v1r1_ssa_harv_area.csv"
name_of_file = "spam2017V1r1_SSA_gr_H_TA.csv"

# Import csv as pandas dataframe
SPAM_database_df = pd.read_csv(path + "\\" + name_of_file, encoding='latin1')

In [2]:
SPAM_database_df.columns

Index(['iso3', 'prod_level', 'alloc_key', 'cell5m', 'x', 'y', 'rec_type',
       'tech_type', 'unit', 'whea_a', 'rice_a', 'maiz_a', 'barl_a', 'pmil_a',
       'smil_a', 'sorg_a', 'ocer_a', 'pota_a', 'swpo_a', 'yams_a', 'cass_a',
       'orts_a', 'bean_a', 'chic_a', 'cowp_a', 'pige_a', 'lent_a', 'opul_a',
       'soyb_a', 'grou_a', 'cnut_a', 'oilp_a', 'sunf_a', 'rape_a', 'sesa_a',
       'ooil_a', 'sugc_a', 'sugb_a', 'cott_a', 'ofib_a', 'acof_a', 'rcof_a',
       'coco_a', 'teas_a', 'toba_a', 'bana_a', 'plnt_a', 'trof_a', 'temf_a',
       'vege_a', 'rest_a', 'total_a', 'cere_a', 'root_a', 'puls_a', 'oilc_a',
       'mill_a', 'coff_a', 'fruit_a', 'banpl_a', 'crea_date', 'year_data',
       'source', 'name_cntr', 'name_adm1', 'name_adm2'],
      dtype='object')

In [3]:
# Keep only Benin - filter rows using Pandas chaining?
BEN_SPAM_df = SPAM_database_df[SPAM_database_df.name_cntr.eq("Benin")]

In [5]:
# Keeping only columns indicating harv area for maize 
Maize_BEN_SPAM_df = BEN_SPAM_df[BEN_SPAM_df['maiz_a'] >= 0]

# Filtering out any NaN values
Maize_BEN_SPAM_df = Maize_BEN_SPAM_df[Maize_BEN_SPAM_df.maiz_a.notnull()]

# droping other crop or an-necessary columns
Maize_BEN_SPAM_df = Maize_BEN_SPAM_df.drop(['iso3', 'prod_level','cell5m','rec_type',
       'tech_type', 'unit', 'whea_a', 'rice_a','barl_a', 'pmil_a',
       'smil_a', 'sorg_a', 'ocer_a', 'pota_a', 'swpo_a', 'yams_a', 'cass_a',
       'orts_a', 'bean_a', 'chic_a', 'cowp_a', 'pige_a', 'lent_a', 'opul_a',
       'soyb_a', 'grou_a', 'cnut_a', 'oilp_a', 'sunf_a', 'rape_a', 'sesa_a',
       'ooil_a', 'sugc_a', 'sugb_a', 'cott_a', 'ofib_a', 'acof_a', 'rcof_a',
       'coco_a', 'teas_a', 'toba_a', 'bana_a', 'plnt_a', 'trof_a', 'temf_a',
       'vege_a', 'rest_a', 'total_a', 'cere_a', 'root_a', 'puls_a', 'oilc_a',
       'mill_a', 'coff_a', 'fruit_a', 'banpl_a', 'crea_date', 'source', 'name_cntr', 'name_adm1', 'year_data'], axis=1)

In [6]:
Maize_BEN_SPAM_df.head(4)

Unnamed: 0,alloc_key,x,y,maiz_a,name_adm2
14249,9322194,2.791667,12.375,144.5,Karimama
14250,9322195,2.875,12.375,370.9,Karimama
14731,9332194,2.791667,12.291667,92.5,Karimama
14732,9332195,2.875,12.291667,49.6,Karimama


In [7]:
# Adding few columns as needed

Maize_BEN_SPAM_df["Crop"] = "Maize"
Maize_BEN_SPAM_df["Fraction"] = 1

In [11]:
# Rename columns as needed for agrodem process

Maize_BEN_SPAM_df.rename(columns={"alloc_key": "Pixel", 
                                  "x": "lon", 
                                  "y": "lat", 
                                  "maiz_a": "CropArea", 
                                  "name_adm2": "State"},inplace = True)

In [12]:
Maize_BEN_SPAM_df.columns

Index(['Pixel', 'lon', 'lat', 'CropArea', 'State', 'Crop', 'Fraction'], dtype='object')

In [14]:
# Define output path
#path = r"N:\Agrodem\Irrigation_model\Input_data"
path = r"C:\Benin\agrodem_preprocessing\QGIS_input_SPAM"
csvname = "SPAM_Benin_Maize_Harv_2017_admin2_10km.csv"

#drybeans
Maize_BEN_SPAM_df.to_csv(os.path.join(path,"{c}".format(c=csvname)), index=False)

In [None]:
## Create a sample dataset
#sample_crop_df = sample_crop_df.sample(1000)