In [1]:
import os, glob, sys
import pandas as pd
import geopandas as gpd
import fiona
from shapely.geometry import Polygon, mapping
import matplotlib.pyplot as plt
#import folium
import rasterio
import rasterio.mask
import rasterio.plot
import numpy as np
#import plotly.express as px
#from plotly.subplots import make_subplots

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#tools_path = '/export/miro/ndeffense/LBRAT2104/GIT/eo-toolbox/tools/'
tools_path = '/Users/Nicolas/Documents/Git/eo-toolbox/tools/'

sys.path.insert(0, tools_path)

#import folium_def
import remove_overlap
import crop_distribution

# Input / Output

## Input

In [4]:
path = '/export/projects/FAO-EOStat-Senegal/'

gps_xlsx = f'{path}GPS/base_nioro.xlsx'

sophie_shp = f'{path}Noncrop_SophieL/noncrop_tot_senegal_calibration_nioro.shp'

odk_crop_csv     = f'{path}ODK/FormSenegal05.csv'
odk_non_crop_csv = f'{path}ODK/FormSenegal07.csv'

roi_shp = f'{path}ROI/Senegal_DepartementNioro.shp'

lut_crop_xlsx         = f'{path}LUT/LUT_crop.xlsx'
lut_non_crop_xlsx     = f'{path}LUT/LUT_non_crop.xlsx'
lut_non_crop_lox_xlsx = f'{path}LUT/LUT_non_crop_lox.xlsx'

crop_dictionary_xlsx  = f'{path}LUT/crop_dictionary_new.xlsx'

## Output

In [5]:
gps_inner_shp    = f'{path}shp_temp/gps_with_data_inner.shp'
gps_left_shp    = f'{path}shp_temp/gps_with_data_left.shp'
odk_crop_shp     = f'{path}shp_temp/odk_crop_with_data.shp'
odk_crop_dup_shp = f'{path}shp_temp/odk_crop_with_data_duplicates.shp'
odk_non_crop_shp = f'{path}shp_temp/odk_non_crop_with_data.shp'
lox_non_crop_shp = f'{path}shp_temp/lox_non_crop_with_data.shp'

full_shp_output = f'{path}Sen4Stat/IN_SITU/SITE_41/SEN_2021_SITE_41_ori.shp'

#map_html_output = f'{path}shp_temp/odk_gps_with_data.html'

# Parameters

In [6]:
buf_size = -10  # meter
min_area = 500  # square meter
max_area = 100000 # square meter

# Look-Up-Table

In [7]:
lut_crop_df     = pd.read_excel(lut_crop_xlsx)
lut_non_crop_df = pd.read_excel(lut_non_crop_xlsx)
lut_non_crop_lox_df = pd.read_excel(lut_non_crop_lox_xlsx)

lut_crop_df         = lut_crop_df.astype({"sub_nb": int})
lut_non_crop_df     = lut_non_crop_df.astype({"sub_nb": int})
lut_non_crop_lox_df = lut_non_crop_lox_df.astype({"sub_nb": int})

display(lut_non_crop_df)
print('-------------------')
display(lut_non_crop_lox_df)
print('-------------------')
lut_crop_df.head()

Unnamed: 0,lc_code,lc_name,sub_nb,sub
0,1,Prairie,3199,Grassland and meadows
1,2,Jachère,4111,Fallows 1 year
2,3,Broussailes,5111,Shrub land
3,4,Forêt,6999,Forest
4,5,Sol nu,7211,Bare soils
5,6,Zone bâtie,8111,Urban
6,7,Plan d'eau,9199,Water bodies


-------------------


Unnamed: 0,lc_code,lc_name,sub_nb,sub
0,4001,zone arbustive,5111,Shrub land
1,4002,zone arbustive arboree,5111,Shrub land
2,7001,construction,8111,Urban
3,8001,eau de surface,9199,Water bodies
4,9001,zone humide,9211,"Non-current inland waters (reservoirs, swamps,..."


-------------------


Unnamed: 0,crop_code,crop_name,sub_nb,sub
0,1,Arachide,1421,Groundnuts
1,2,Aubergine,1222,Eggplants (aubergines)
2,3,Béréf,1229,Other fruit-bearing vegetables
3,4,Bissap,1941,Flowers crops
4,5,Coton,1921,Cotton


# Geometries : retrieval & cleaning

## 1. GPS

In [8]:
dict_gpx_list = []

for gpx_file in glob.glob(f'{path}GPS/*/*.gpx'):

    filename = os.path.basename(gpx_file)

    pid = filename[filename.find("Piste")+5:-4].lstrip('_ ')
    pid = pid.replace(' ','-')
    pid = pid.replace('---','-')
    pid = pid.replace('--','-')
    pid = pid.split('-')

    if len(pid) <= 5:
        valid_id = 1
    else:
        valid_id = 0
    
    pid = ''.join(pid)

    layer = fiona.open(gpx_file, layer='tracks')
    crs = layer.crs

    if crs['init'] == 'epsg:4326':

        dict_gpx = {'id_gps': int(pid),
                    'valid_id' : valid_id,
                    'gpx' : str(gpx_file),
                    'coordinates': layer[0]['geometry']['coordinates'][0]}

        dict_gpx_list.append(dict_gpx)
    
    else:
        print("Wrong CRS !")


df = pd.DataFrame.from_dict(dict_gpx_list).drop_duplicates('id_gps')

print(f'Number of GPX files : {len(glob.glob(f"{path}GPS/*/*.gpx"))} --> {len(df)} (without duplicates)')
print(f'---- Good PID (whitout duplicates)  : {len(df.loc[df["valid_id"] == 1])}')
print(f'---- Wrong PID (whitout duplicates) : {len(df.loc[df["valid_id"] == 0])}')

df['geometry'] = df.coordinates.apply(Polygon)

df = df.drop('coordinates', axis=1)

gps_gdf = gpd.GeoDataFrame(df, crs='epsg:4326', geometry='geometry').to_crs(epsg=32628).sort_values(by=['valid_id','id_gps'])

####################
##### CLEANING #####
####################

gps_gdf['area'] = gps_gdf['geometry'].area.round(2)

gps_gdf = gps_gdf.loc[gps_gdf['area'] < max_area]

print(f'After removing polygons where area is greater than {max_area} square meters : {len(gps_gdf)}')

gps_gdf = gps_gdf.loc[gps_gdf['area'] >= min_area]

print(f'After removing polygons where area is less than {min_area} square meters : {len(gps_gdf)}')

#gps_gdf = remove_overlap.remove_overlap(gps_gdf, id_column='id_gps')

#print(f'After removing overlapping parts of polygons : {len(gps_gdf)}')

#gps_gdf['geometry'] = gps_gdf.buffer(buf_size)

#gps_gdf['area_buf'] = gps_gdf['geometry'].area.round(2)

#gps_gdf = gps_gdf.loc[gps_gdf['area_buf'] >= min_area]

#print(f'After removing polygons where area (after negative buffer) is less than {min_area} square meters : {len(gps_gdf)}')

gps_gdf['collect'] = 'GPS'

print(f'--> There are {len(gps_gdf)} polygons obtained with the GPS')

#gps_gdf.to_file(f'{path}shp_temp/gps_ori.shp')

gps_gdf.head()

Number of GPX files : 900 --> 349 (without duplicates)
---- Good PID (whitout duplicates)  : 335
---- Wrong PID (whitout duplicates) : 14
After removing polygons where area is greater than 100000 square meters : 349
After removing polygons where area is less than 500 square meters : 349
--> There are 349 polygons obtained with the GPS


Unnamed: 0,id_gps,valid_id,gpx,geometry,area,collect
317,6220101142541,0,/export/projects/FAO-EOStat-Senegal/GPS/NIOR_0...,"POLYGON ((437993.758 1522537.691, 437992.288 1...",21397.46,GPS
299,6220103386239,0,/export/projects/FAO-EOStat-Senegal/GPS/nioro2...,"POLYGON ((447853.968 1520918.558, 447855.165 1...",6768.25,GPS
300,6220103862311,0,/export/projects/FAO-EOStat-Senegal/GPS/nioro2...,"POLYGON ((449591.030 1522133.396, 449616.400 1...",19402.26,GPS
301,6220103862312,0,/export/projects/FAO-EOStat-Senegal/GPS/nioro2...,"POLYGON ((446453.730 1521363.001, 446453.585 1...",26921.28,GPS
304,6220103862331,0,/export/projects/FAO-EOStat-Senegal/GPS/nioro2...,"POLYGON ((446248.504 1521250.578, 446248.468 1...",9191.36,GPS


## 2. ODK (tablet) - cropland

- remove in the CSV because there is a duplicate ID overlapping
    - ID : 622020557134
    - area : 8226.20 m2

In [9]:
df = pd.read_csv(odk_crop_csv)

dict_odk_list = []

for i, row in df.iterrows():

    pid  = row['meta-instanceName'].replace('_', '').replace(' ', '')
    poly = row['polygone']
    culture = row['Culture']
    surface = row['Surface']

    poly = poly.split(';')

    lat_point_list = []
    lon_point_list = []

    for point in poly:
        list_point = point.split(' ')

        lat_point = list_point[0]
        lon_point = list_point[1]
        
        if lat_point == '':
            lat_point = list_point[1]
            lon_point = list_point[2]
        
        try:
            lat_point_list.append(float(lat_point))
            lon_point_list.append(float(lon_point))
        except:
            print(f"error for polygon : {pid}")
    
    polygon_geom = Polygon(zip(lon_point_list, lat_point_list))

    dict_odk = {'id_odk': int(pid),
                'crop_code': culture,
                'area_db' : round(surface, 2),
                'geometry': polygon_geom}

    dict_odk_list.append(dict_odk)


odk_df = pd.DataFrame.from_dict(dict_odk_list)

odk_gdf = gpd.GeoDataFrame(odk_df, crs='epsg:4326', geometry='geometry').to_crs(epsg=32628)

print(f'Number of ODK polygons : {len(odk_gdf)} --> {len(odk_gdf.drop_duplicates("id_odk"))} (without duplicates)')

####################
##### CLEANING #####
####################

odk_gdf = odk_gdf.merge(lut_crop_df, on='crop_code')

#odk_gdf = odk_gdf.drop_duplicates('id_odk')  --> Duplicates have not always the same geometry !!

odk_dup_gdf = odk_gdf[odk_gdf.duplicated('id_odk', keep=False)]

odk_gdf['area'] = odk_gdf['geometry'].area.round(2)

odk_gdf = odk_gdf.loc[odk_gdf['area'] < max_area]

print(f'After removing polygons where area is greater than {max_area} square meters : {len(odk_gdf)}')

odk_gdf = odk_gdf.loc[odk_gdf['area'] >= min_area]

print(f'After removing polygons where area is less than {min_area} square meters : {len(odk_gdf)}')

odk_gdf = remove_overlap.remove_overlap(odk_gdf, id_column='id_odk')

print(f'After removing overlapping parts of polygons : {len(odk_gdf)}')

#odk_gdf['geometry'] = odk_gdf.buffer(buf_size)

#odk_gdf['area_buf'] = odk_gdf['geometry'].area.round(2)

#odk_gdf = odk_gdf.loc[odk_gdf['area'] >= min_area]

#print(f'After removing polygons where area (after negative buffer) is less than {min_area} square meters : {len(odk_gdf)}')

odk_gdf = odk_gdf[['id_odk','sub_nb','sub','area_db','area','geometry']]

odk_gdf['collect'] = 'ODK_polygon'


print(f'There are {len(odk_gdf)} polygons obtained with the tablet (ODK)')
#odk_gdf[['area','area_db']].sort_values('area_db')

#odk_dup_gdf.to_file(odk_crop_dup_shp)

#odk_gdf.to_file(odk_crop_shp)

odk_gdf.head()


Number of ODK polygons : 234 --> 231 (without duplicates)
After removing polygons where area is greater than 100000 square meters : 233
After removing polygons where area is less than 500 square meters : 230
After removing overlapping parts of polygons : 230
There are 230 polygons obtained with the tablet (ODK)


Unnamed: 0,id_odk,sub_nb,sub,area_db,area,geometry,collect
1,6220206179224,1121,Maize,689.47,684.93,"POLYGON ((427630.218 1526789.410, 427541.345 1...",ODK_polygon
2,6220103302531,1121,Maize,2234.3,2219.4,"POLYGON ((446760.480 1506615.871, 446759.522 1...",ODK_polygon
3,6220101154733,1121,Maize,2287.83,2272.69,"POLYGON ((433447.203 1520075.541, 433416.557 1...",ODK_polygon
4,62201022414,1121,Maize,3493.89,3470.8,"POLYGON ((430058.979 1516286.785, 430021.502 1...",ODK_polygon
5,6220102112741,1121,Maize,3903.75,3877.83,"POLYGON ((438788.002 1513436.858, 438786.885 1...",ODK_polygon


## 3. ODK (tablet) - non cropland

In [10]:
odk_nc_df = pd.read_csv(odk_non_crop_csv)

odk_nc_df = odk_nc_df[['Picture-LC_Class','Picture-Other_Non_Cropland','start-geopoint-Latitude','start-geopoint-Longitude']]

odk_nc_df = odk_nc_df.rename(columns={"start-geopoint-Longitude": "longitude", "start-geopoint-Latitude": "latitude", "Picture-LC_Class": 'lc_code'})

odk_nc_gdf = gpd.GeoDataFrame(odk_nc_df, geometry=gpd.points_from_xy(odk_nc_df.longitude, odk_nc_df.latitude, crs="EPSG:4326")).to_crs(epsg=32628)

odk_nc_gdf = odk_nc_gdf.drop(columns=['longitude', 'latitude'])

# Add positive buffer 15 meters

odk_nc_gdf['geometry'] = odk_nc_gdf.buffer(15, resolution=24)

join = 'inner'

print(f'---- Before {join} join with LUT {len(odk_nc_gdf)}')

odk_nc_gdf = odk_nc_gdf.merge(lut_non_crop_df, on='lc_code', how=join)

print(f'---- After {join} join with LUT {len(odk_nc_gdf)}')


odk_nc_gdf['area'] = odk_nc_gdf['geometry'].area.round(2)

odk_nc_gdf = odk_nc_gdf[['sub_nb','sub','area','geometry']]

odk_nc_gdf['collect'] = 'ODK_point'

print(f'There are {len(odk_nc_gdf)} points obtained with the tablet (ODK)')

#odk_nc_gdf.to_file(odk_non_crop_shp)

odk_nc_gdf.head()

#odk_nc_gdf.sort_values('lc_code')

---- Before inner join with LUT 71
---- After inner join with LUT 52
There are 52 points obtained with the tablet (ODK)


Unnamed: 0,sub_nb,sub,area,geometry,collect
0,7211,Bare soils,706.35,"POLYGON ((394718.679 1527943.790, 394718.647 1...",ODK_point
1,7211,Bare soils,706.35,"POLYGON ((393932.658 1529546.910, 393932.626 1...",ODK_point
2,7211,Bare soils,706.35,"POLYGON ((394294.803 1529607.426, 394294.771 1...",ODK_point
3,7211,Bare soils,706.35,"POLYGON ((440162.486 1521520.015, 440162.454 1...",ODK_point
4,7211,Bare soils,706.35,"POLYGON ((451203.027 1521623.032, 451202.995 1...",ODK_point


## 4. From Sophie Lox - non cropland

In [9]:
lox_nc_gdf = gpd.read_file(sophie_shp)

print(f'There are {len(lox_nc_gdf)} polygons obtained by Sophie Lox')

lox_nc_gdf = lox_nc_gdf.astype({"CODE": int})

lox_nc_gdf['area'] = lox_nc_gdf['geometry'].area.round(2)

lox_nc_gdf = lox_nc_gdf.merge(lut_non_crop_lox_df, left_on='CODE', right_on='lc_code', how='inner')

lox_nc_gdf = lox_nc_gdf[['sub_nb','sub','area','geometry']]

lox_nc_gdf.to_file(lox_non_crop_shp)

lox_nc_gdf

There are 6 polygons obtained by Sophie Lox


Unnamed: 0,sub_nb,sub,area,geometry
0,5111,Shrub land,70385.53,"POLYGON ((386401.119 1533683.130, 386416.324 1..."
1,8111,Urban,48556.14,"POLYGON ((385682.455 1533328.758, 385769.254 1..."
2,5111,Shrub land,3445454.51,"POLYGON ((379015.176 1510958.073, 380000.040 1..."
3,9211,"Non-current inland waters (reservoirs, swamps,...",943069.37,"POLYGON ((381794.852 1502816.303, 381750.519 1..."
4,9199,Water bodies,34539.72,"POLYGON ((382164.839 1504892.789, 382178.269 1..."
5,9199,Water bodies,3302743.89,"POLYGON ((424992.319 1509531.441, 425885.208 1..."


# Load GPS data

### 1.1. Extract GPS data

In [11]:
gps_data_df = pd.read_excel(gps_xlsx)

gps_data_df.head()

Unnamed: 0,interview__key,interview__id,id_reg,id_dep,id_arr,nom_commune,village,id_dr,id_con,id_nomCC,id_men,id_nomCM,Q1_1_1__1,Q1_1_1__2,Q1_1_1a,Q1_1_1c,Q1_1_1d,Q1_1_1d_aut,Q1_1_2a,Q1_1_2c,Q1_1_2d,Q1_1_2d_aut,consentement,Q1_1_4,Q1_1_5__Latitude,Q1_1_5__Longitude,Q1_1_5__Accuracy,Q1_1_5__Altitude,Q1_1_5__Timestamp,repondant,strate,PARCELLE__id,Q1_3a_1,Q1_3a_2,Q1_3a_3,Q1_3a_4,Q1_3a_5a,Q1_3a_5aut,Q1_3a_5c,Q1_3a_5b,Q1_3a_7,Q1_3a_8a__1,Q1_3a_8a__4,Q1_3a_8a__2,Q1_3a_8a__3,Q1_3a_8b,Q1_3a_9,Q1_3a_10a__1,Q1_3a_10a__4,Q1_3a_10a__2,Q1_3a_10a__3,Q1_3a_10b,Q1_3a_12,Q1_3a_14,Q1_3a_15,Q1_3a_16,Q1_3a_17,Q1_3a_18,Q1_4a_01,Q1_4a_02,Q1_4a_03,Q1_4a_0,Q1_4a_2,Q1_4a_3a__1,Q1_4a_3a__2,Q1_4a_3a__3,Q1_4a_3a__4,Q1_4a_3a__5,Q1_4a_3a__6,Q1_4a_3a__7,Q1_4a_3a__8,Q1_4a_3a__9,Q1_4a_3a__10,Q1_4a_7__1,Q1_4a_7__3,Q1_4a_7__4,Q1_4a_7__2,Q1_4a_7__5,Q1_4a_7__6,Q1_4a_3b,Q1_4a_4__1,Q1_4a_4__2,Q1_4a_4__3,Q1_4a_5,Q1_4a_6,sup_est,dose_semence,Q1_4a_8,Q1_4a_9a,Q1_4a_9b,Q1_4a_9c,Q1_4a_10,Q1_4a_10a,Q1_4a_10b,Q1_4a_10c,Q1_4a_11,Q1_4a_12,Q1_4a_13__1,Q1_4a_13__2,Q1_4a_13__3,Q1_4a_14,Q1_4a_15,Q1_4a_16a,Q1_4a_16b,Q1_4a_16c,Q1_4a_17a,Q1_4a_17b,Q1_4a_17c,Q1_4a_18,dateSemis,Q1_4a1_2,Q1_4a1_3a__1,Q1_4a1_3a__2,Q1_4a1_3a__3,Q1_4a1_3a__4,Q1_4a1_3a__5,Q1_4a1_3a__6,Q1_4a1_3a__7,Q1_4a1_3a__8,Q1_4a1_3a__9,Q1_4a1_3a__10,Q1_4a1_7__1,Q1_4a1_7__3,Q1_4a1_7__4,Q1_4a1_7__2,Q1_4a1_7__5,Q1_4a1_7__6,Q1_4a1_3b,Q1_4a1_4__1,Q1_4a1_4__2,Q1_4a1_4__3,Q1_4a1_5,Q1_4a1_6,sup_est1,dose_semence1,Q1_4a1_8,Q1_4a1_9a,Q1_4a1_9b,Q1_4a1_9c,Q1_4a1_10,Q1_4a1_10a,Q1_4a1_10b,Q1_4a1_10c,Q1_4a1_11,Q1_4a1_12,Q1_4a1_13__1,Q1_4a1_13__2,Q1_4a1_13__3,Q1_4a1_14,Q1_4a1_15,Q1_4a1_16a,Q1_4a1_16b,Q1_4a1_16c,Q1_4a1_17a,Q1_4a1_17b,Q1_4a1_17c,Q1_4a1_18,dateSemis1,Q1_4b_1,Q1_4b_2__1,Q1_4b_2__2,Q1_4b_2__3,Q1_4b_2__4,Q1_4b_3,Q1_4b_4__1,Q1_4b_4__2,Q1_4b_4__3,Q1_4b_5__1,Q1_4b_5__2,Q1_4b_5__3,Q1_4b_5a,Q1_4b_5b,Q1_4b_5c,Q1_4b_6__1,Q1_4b_6__2,Q1_4b_6__3,Q1_4b_6a,Q1_4b_6b,Q1_4b_6c,quant_npk,dose_npk,Q1_4b_7__1,Q1_4b_7__2,Q1_4b_7__3,Q1_4b_7a,Q1_4b_7b,Q1_4b_7c,quant_uree,dose_uree,Q1_4b_8,Q1_4b_8a__1,Q1_4b_8a__2,Q1_4b_8b,quant_npk_tot,dose_npk_epan,Q1_4b_8c,quant_uree_tot,dose_uree_epan,Q1_4b_9a,Q1_4b_9b__1,Q1_4b_9b__2,Q1_4b_9b__3,Q1_4b_9b__5,Q1_4b_9b__6,Q1_4b_9b__7,Q1_4b_9b__4,Q1_4b_10,Q1_4b_11__1,Q1_4b_11__2,Q1_4b_11__3,Q1_4b_11__4,Q1_4b_11__5,Q1_4b_11__6,Q1_4b_11__7,Q1_4b_11__8,Q1_4b_11__9,Q1_4b_11__10,Q1_4b_11__11,Q1_4b_11__12,Q1_4b_11__13,Q1_4b_11__14,Q1_4b_11__15,Q1_4b_11__16,Q1_4b_11__17,Q1_4b_11__18,Q1_4b_11__19,Q1_4b_11__20,Q1_4b_11__21,Q1_4b_11__22,Q1_4b_11__23,Q1_4b_11__24,Q1_4b_11__25,Q1_4b_11__26,Q1_4b_11__27,Q1_4b_11__28,Q1_4b_11__29,Q1_4b_11__30,Q1_4b_11__31,Q1_4b_11__32,Q1_4b_11__33,Q1_4c_1,Q1_4c_2,Q1_4c_3__1,Q1_4c_3__2,Q1_4c_3__3,Q1_4c_3__4,Q1_4c_3__5,Q1_4c_3__6,Q1_4c_4__1,Q1_4c_4__2,Q1_4c_4__3,Q1_4c_4__4,Q1_4c_4__5,Q1_4c_5__1,Q1_4c_5__2,Q1_4c_5__3,Q1_4c_5__4,Q1_4c_5__5,Q1_4c_5__7,Q1_4c_5__8,Q1_4c_5__9,Q1_4c_5__10,Q1_4c_5__11,Q1_4c_5__12,Q1_4c_5__14,Q1_4c_5__15,Q1_4c_5__13,Q1_4d_1__1,Q1_4d_1__2,Q1_4d_1__3,Q1_4d_2__1,Q1_4d_2__2,Q1_4d_2__3,Q1_4d_3__1,Q1_4d_3__2,Q1_4d_3__3,Q1_4d_4__1,Q1_4d_4__2,Q1_4d_4__3,Q1_5_0__Latitude,Q1_5_0__Longitude,Q1_5_0__Accuracy,Q1_5_0__Altitude,Q1_5_0__Timestamp,Q1_5_1,Q1_5_2a,sup_ha,Q1_5_2b,Q1_6_2c,idTraceParcelle,rendement1,rendement2,rendement3,rendement4,rendement5,rendement6,rendement7,id_com,Q1_2_1__0,Q1_2_1__1,Q1_2_1__2,Q1_2_1__3,Q1_2_1__4,Q1_2_1__5,Q1_2_1__6,Q1_2_1__7,Q1_2_1__8,Q1_2_1__9,Q1_2_1__10,Q1_2_1__11,Q1_2_1__12,Q1_2_1__13,Q1_2_1__14,Q1_2_1__15,Q1_2_1__16,Q1_2_1__17,Q1_2_1__18,Q1_2_1__19,Q1_2_1__20,Q1_2_1__21,Q1_2_1__22,Q1_2_1__23,Q1_2_1__24,Q1_2_1__25,Q1_2_1__26,Q1_2_1__27,Q1_2_1__28,Q1_2_1__29,Q1_2_1__30,Q1_2_1__31,Q1_2_1__32,Q1_2_1__33,Q1_2_1__34,Q1_2_1__35,Q1_2_1__36,Q1_2_1__37,Q1_2_1__38,Q1_2_1__39,Q1_2_1__40,Q1_2_1__41,Q1_2_1__42,Q1_2_1__43,Q1_2_1__44,Q1_2_1__45,Q1_2_1__46,Q1_2_1__47,Q1_2_1__48,Q1_2_1__49,Q1_2_1__50,Q1_2_1__51,Q1_2_1__52,Q1_2_1__53,Q1_2_1__54,Q1_2_1__55,Q1_2_1__56,Q1_2_1__57,Q1_2_1__58,Q1_2_1__59,Q1_2_10,Q1_3_0a,Q1_3a_0a,Q1_3a_1__0,Q1_3a_1__1,Q1_3a_1__2,Q1_3a_1__3,Q1_3a_1__4,Q1_3a_1__5,Q1_3a_1__6,Q1_3a_1__7,Q1_3a_1__8,Q1_3a_1__9,Q1_3a_1__10,Q1_3a_1__11,Q1_3a_1__12,Q1_3a_1__13,Q1_3a_1__14,Q1_3a_1__15,Q1_3a_1__16,Q1_3a_1__17,Q1_3a_1__18,Q1_3a_1__19,Q1_3a_1__20,Q1_3a_1__21,Q1_3a_1__22,Q1_3a_1__23,Q1_3a_1__24,Q1_3a_1__25,Q1_3a_1__26,Q1_3a_1__27,Q1_3a_1__28,Q1_3a_1__29,Q1_3a_1__30,Q1_3a_1__31,Q1_3a_1__32,Q1_3a_1__33,Q1_3a_1__34,Q1_3a_1__35,Q1_3a_1__36,Q1_3a_1__37,Q1_3a_1__38,Q1_3a_1__39,Q1_3a_1__40,Q1_3a_1__41,Q1_3a_1__42,Q1_3a_1__43,Q1_3a_1__44,Q1_3a_1__45,Q1_3a_1__46,Q1_3a_1__47,Q1_3a_1__48,Q1_3a_1__49,Q1_3b_0a,Q1_3b_1__0,Q1_3b_1__1,Q1_3b_1__2,Q1_3b_1__3,Q1_3b_1__4,Q1_3b_1__5,Q1_3b_1__6,Q1_3b_1__7,Q1_3b_1__8,Q1_3b_1__9,Q1_3b_1__10,Q1_3b_1__11,Q1_3b_1__12,Q1_3b_1__13,Q1_3b_1__14,Q1_3b_1__15,Q1_3b_1__16,Q1_3b_1__17,Q1_3b_1__18,Q1_3b_1__19,Q1_3b_1__20,Q1_3b_1__21,Q1_3b_1__22,Q1_3b_1__23,Q1_3b_1__24,Q1_3b_1__25,Q1_3b_1__26,Q1_3b_1__27,Q1_3b_1__28,Q1_3b_1__29,Q1_3b_1__30,Q1_3b_1__31,Q1_3b_1__32,Q1_3b_1__33,Q1_3b_1__34,Q1_3b_1__35,Q1_3b_1__36,Q1_3b_1__37,Q1_3b_1__38,Q1_3b_1__39,Q1_3b_1__40,Q1_3b_1__41,Q1_3b_1__42,Q1_3b_1__43,Q1_3b_1__44,Q1_3b_1__45,Q1_3b_1__46,Q1_3b_1__47,Q1_3b_1__48,Q1_3b_1__49,Q1_3c_0a,Q1_3c_0aa__0,Q1_3c_0aa__1,Q1_3c_0aa__2,Q1_3c_0aa__3,Q1_3c_0aa__4,Q1_3c_0aa__5,Q1_3c_0aa__6,Q1_3c_0aa__7,Q1_3c_0aa__8,Q1_3c_0aa__9,Q1_3c_0aa__10,Q1_3c_0aa__11,Q1_3c_0aa__12,Q1_3c_0aa__13,Q1_3c_0aa__14,Q1_3c_0aa__15,Q1_3c_0aa__16,Q1_3c_0aa__17,Q1_3c_0aa__18,Q1_3c_0aa__19,Q1_3c_0aa__20,Q1_3c_0aa__21,Q1_3c_0aa__22,Q1_3c_0aa__23,Q1_3c_0aa__24,Q1_3c_0aa__25,Q1_3c_0aa__26,Q1_3c_0aa__27,Q1_3c_0aa__28,Q1_3c_0aa__29,Q1_3c_0aa__30,Q1_3c_0aa__31,Q1_3c_0aa__32,Q1_3c_0aa__33,Q1_3c_0aa__34,Q1_3c_0aa__35,Q1_3c_0aa__36,Q1_3c_0aa__37,Q1_3c_0aa__38,Q1_3c_0aa__39,Q1_3c_0aa__40,Q1_3c_0aa__41,Q1_3c_0aa__42,Q1_3c_0aa__43,Q1_3c_0aa__44,Q1_3c_0aa__45,Q1_3c_0aa__46,Q1_3c_0aa__47,Q1_3c_0aa__48,Q1_3c_0aa__49,Q1_3a_24,Q1_3b_24,Q1_3c_24b,Q1_6_0_0,Q1_6_0_1,Q1_6_3_1,Q1_6_5_1,Q1_6_6_1,Q1_6_7_1,poids_moy_ara,Q1_6_8_1,poids_recolte_ara,Q1_6_9_1,rend_ara_cal,Q1_6_1_0,Q1_6_0_2,Q1_6_1_2,Q1_6_3_2,Q1_6_4_2,Q1_6_5_2,Q1_6_6_2,Q1_6_7_2,poids_moy_mil,Q1_6_8_2,poids_recolte_mil,Q1_6_9_2,rend_mil_cal,Q1_6_2_0,Q1_6_0_3,Q1_6_3_3,Q1_6_3a_3,Q1_6_3b_3,Q1_6_4_3,Q1_6_5_3,Q1_6_6_3,Q1_6_7_3,poids_moy_niebe,Q1_6_8_3,poids_recolte_niebe,Q1_6_9_3,rend_niebe_cal,Q1_6_3_0,Q1_6_0_4,Q1_6_1_4,Q1_6_3_4,Q1_6_4_4,Q1_6_5_4,Q1_6_6_4,Q1_6_7_4,poids_moy_mais,Q1_6_8_4,poids_recolte_mais,Q1_6_9_4,rend_mais_cal,Q1_6_4_0,Q1_6_0_5,Q1_6_3_5,Q1_6_4_5,Q1_6_5_5,Q1_6_6_5,Q1_6_7_5,poids_moy_sor,Q1_6_8_5,poids_recolte_sor,Q1_6_9_5,rend_sor_cal,Q1_6_5_0,Q1_6_0_6,Q1_6_8_6,Q1_6_9_6,rend_fonio_cal,Q1_6_6_0,Q1_6_0_7,Q1_6_3_7,Q1_6_4_7,Q1_6_5_7,Q1_6_6_7,Q1_6_7_7,poids_moy_rizirr,Q1_6_8_7,poids_recolte_rizirr,Q1_6_9_7,rend_rizirr_cal,Q1_6_7_0,Q1_6_0_8,Q1_6_1_8,Q1_6_3_8,Q1_6_4_8,Q1_6_5_8,Q1_6_6_8,Q1_6_7_8,poids_moy_rizpluv,Q1_6_8_8,poids_recolte_rizpluv,Q1_6_9_8,rend_rizpluv_cal,Q1_7_1,Q1_7_2__9,Q1_7_2__12,Q1_7_2__19,Q1_7_2__34,Q1_7_2a,Q1_7_3,Q1_7_5,Q1_7_6,Q1_7_7,Q1_7_8,Q1_7_9__1,Q1_7_9__2,Q1_7_9__3,Q1_7_9__4,Q1_7_9__5,Q1_7_9__6,Q1_8_1,Q1_8_2,Q1_8_2_autre,sssys_irnd,has__errors,interview__status,assignment__id,responsible,interviewers,rejections__sup,rejections__hq,entities__errors,questions__comments,interview__duration,enqueteur,_merge
0,09-15-37-52,58fd33e0b0454a5c86100e95b5902e68,KAOLACK,NIORO DU RIP,MEDINA-SABAKH,MEDINA-SABAKH,MEDINA SABAKH,28,28,KHADY BA,Ménage5,AÏSSATOU DIALLO,1,0,2021-09-22T14:59:31,Oui,,,,,,,Oui,Logement du ménage,13.596947,-15.578123,5.36,68.765015,2021-09-22T14:59:18,Aïssatou Ndoura Diallo,Horticole et autre,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,Non,Non,Non,Non,Non,Non,MEDINA-SABAKH,Aïssatou Ndoura Diallo,Khadi Ba,pape diallo,Arame Diallo,Madior Soiré,Fanta Ndiaye,Gueda Bâ,Samba Diallo,Bamba Diallo,Papa Ndiaye,Aly moussa Diallo,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,11,Non,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,Oui,Non,Non,,,,,,,778864182.0,Entretien terminé,,0.084727,0,Completed,46038,Enqueteurnior2,1,1,0,0,0,00.00:26:15,Enqueteurnior2,only in using data
1,12-14-51-85,e6387fcb9bf74820976e676437368dd0,KAOLACK,NIORO DU RIP,MEDINA-SABAKH,MEDINA-SABAKH,KEUR AYIB,32,37,DEGUENE KANE,Ménage6,AÏDA KANE,1,0,2021-09-23T12:35:27,Oui,,,,,,,Oui,Logement du ménage,13.592716,-15.60837,3.216,82.673645,2021-09-23T12:23:04,Mariama Samb,Uniquement élevage,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,Non,Non,Non,Non,Non,Non,MEDINA-SABAKH,Aïda Kane,Mariama Samb,Baye Ass Niang,Mbaye Niang,Nogoye Niang,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,5,Non,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Oui,,Non,,,,,,,775228239.0,Entretien terminé,,0.375155,0,Completed,46029,Enqueteurnior2,1,1,0,0,0,00.00:13:29,Enqueteurnior2,only in using data
2,14-03-78-66,bb4c70682d0c4f0ba27d87bbd71a820e,KAOLACK,NIORO DU RIP,MEDINA-SABAKH,MEDINA-SABAKH,MEDINA SABAKH,28,51,ELY MENDY,Ménage4,ELY MENDY,1,0,2021-09-22T14:43:44,Oui,,,,,,,Oui,Logement du ménage,13.596244,-15.576275,6.432,78.591736,2021-09-22T14:46:40,Ely Mendy,Uniquement élevage,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,Non,Non,Non,Non,Non,Non,MEDINA-SABAKH,Rozé mendy,Alphonse mendy,Néné mendy,Fatou mendy,polonais mendy,pitrou mendy,moussé mendy,mama mendy,Michel mendy,Jean mendy,Ely mendy,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,11,Non,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Oui,,Non,,,,,,,776930814.0,Entretien terminé,,0.595556,0,Completed,46049,Enqueteurnior2,1,1,0,0,0,00.00:39:18,Enqueteurnior2,only in using data
3,15-19-72-37,adf89cefc3a949289732bef4cb8197ef,KAOLACK,NIORO DU RIP,MEDINA-SABAKH,MEDINA-SABAKH,MEDINA SABAKH,25,24,EL H MBAYE DIANKHA,Ménage6,ELHADJI MBAYE DIANKHA,1,0,2021-09-21T15:52:08,Oui,,,,,,,Oui,Logement du ménage,13.600979,-15.577993,9.648001,63.409851,2021-09-21T15:53:14,Elhadji Mbaye Diankha,Uniquement élevage,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,Non,Non,Non,Non,Non,Non,MEDINA-SABAKH,Elhadji mbayediankha,seydina omar diankha,keba seck,El .baye beye,mouhamed lo,mouhamed arabe beye,mouhamed lamine beye,ndiambe gueye,seynabou diankha,fatou lo,ndague thiam,awa diankha,rock niass,ndeye fatou gueye,cheikh baye diankha,aly diankha,Mariama Diankha,Ibrahim sall,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,18,Non,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Oui,,Non,,,,,,,776520802.0,Entretien terminé,,0.953138,0,Completed,46042,Enqueteurnior2,1,1,0,0,0,00.00:41:08,Enqueteurnior2,only in using data
4,15-45-11-43,8e30af33429c468da46a01dff39b290a,KAOLACK,NIORO DU RIP,PAOSKOTO,GAINTE KAYE,THIOUBENE,17,28,GORGUI SOW,Ménage5,GORGUI SOW,1,1,2021-10-12T11:14:46,Oui,,,2021-10-12T11:14:54,Oui,,,Oui,Logement du ménage,13.815419,-15.975391,17.152,69.670105,2021-10-12T11:15:04,Gorgui Sow,Uniquement élevage,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,Non,Non,Non,Non,Non,Non,GAINTE KAYE,Gorgui Sow,Maguette Sow,Sira Sow,Demba Wagne,Penda Wagne,Baye Niass Sow,Moussa Sow,Seynabou Sow,Amy Sow,Djiby Sow,Sidy Sow,Babacar Sow,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,12,Oui,Non,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Oui,Thioubene Gorgui Sow 1ha,"Thioubene Gorgui Sow 1,5ha",##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,##N/A##,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Oui,,Non,,,,,,,768784517.0,Entretien terminé,,0.036137,0,Completed,46109,Enqueteurnior3,1,0,0,0,0,00.01:34:51,Enqueteurnior3,only in using data


### 1.2. Clean GPS data

In [12]:
print(f'Initial rows {len(gps_data_df)}')

clean_gps_data_df = gps_data_df[['idTraceParcelle','Q1_3a_14','Q1_5_2a']]

clean_gps_data_df = clean_gps_data_df.rename(columns={"idTraceParcelle": "id_gps", "Q1_3a_14": "crop_name_2021", "Q1_5_2a": "area_db"})

clean_gps_data_df = clean_gps_data_df.dropna(subset=['id_gps','crop_name_2021'])

print(f'--> After removing rows without ID and crop name : {len(clean_gps_data_df)}')


clean_gps_data_df['id_gps'] = clean_gps_data_df['id_gps'].str.replace('_', '')

clean_gps_data_df = clean_gps_data_df.astype({"id_gps": int})

clean_gps_data_df = clean_gps_data_df.drop_duplicates('id_gps')

print(f'--> After removing duplicates ID : {len(clean_gps_data_df)}')

clean_gps_data_df = clean_gps_data_df.merge(lut_crop_df, left_on='crop_name_2021', right_on='crop_name', how='inner')

print(f'--> After inner join with LUT : {len(clean_gps_data_df)}')

clean_gps_data_df = clean_gps_data_df[['id_gps','sub_nb','sub', 'area_db']]

clean_gps_data_df.head()

Initial rows 396
--> After removing rows without ID and crop name : 367
--> After removing duplicates ID : 367
--> After inner join with LUT : 367


Unnamed: 0,id_gps,sub_nb,sub,area_db
0,622020420631,1421,Groundnuts,5470.8
1,6220205110041,1421,Groundnuts,10930.0
2,62201023262102,1421,Groundnuts,9849.0
3,62201023262101,1421,Groundnuts,15642.0
4,622020673414,1421,Groundnuts,9156.0


# Merge GPS polygons & GPS database

In [13]:
print(f'Number of polygon from GPX files   : {len(gps_gdf)}')
print(f'Number of row data in GPS database : {len(clean_gps_data_df)}')

gps_inner_gdf = gps_gdf.merge(clean_gps_data_df, on='id_gps', how='inner')

gps_left_gdf = gps_gdf.merge(clean_gps_data_df, on='id_gps', how='left')


print(f'On the {len(gps_gdf)} GPS polygons, there are {len(gps_inner_gdf)} GPS polygons link to a row in the DB.')
print(f'--> {len(gps_gdf)-len(gps_inner_gdf)} GPS polygons are not linked to a row in the DB!')
print(f'--> {len(clean_gps_data_df)-len(gps_inner_gdf)} rows in the DB are not linked to a GPS polygon!')

print(f'left join : {len(gps_left_gdf)}')

#gps_crops_all_gdf = gps_crops_all_gdf.merge(odk_data_df, left_on='id_gps', right_on='id_odk', how='inner')

#gps_inner_gdf.to_file(gps_inner_shp)
#gps_left_gdf.to_file(gps_left_shp)

gps_inner_gdf.head()


#gps_crops_all_gdf.to_excel(f'{path}Output_Nico/gps_data_outer_join.xlsx')

#display(gps_crops_all_gdf.sort_values(by='valid_id'))

Number of polygon from GPX files   : 349
Number of row data in GPS database : 367
On the 349 GPS polygons, there are 317 GPS polygons link to a row in the DB.
--> 32 GPS polygons are not linked to a row in the DB!
--> 50 rows in the DB are not linked to a GPS polygon!
left join : 349


Unnamed: 0,id_gps,valid_id,gpx,geometry,area,collect,sub_nb,sub,area_db
0,6220301171211,0,/export/projects/FAO-EOStat-Senegal/GPS/nior5_...,"POLYGON ((396701.304 1505979.583, 396700.974 1...",14995.33,GPS,1421,Groundnuts,15093.0
1,62201022411,1,/export/projects/FAO-EOStat-Senegal/GPS/NIOR1_...,"POLYGON ((429548.498 1516286.092, 429548.825 1...",8842.45,GPS,1421,Groundnuts,8901.4
2,62201022412,1,/export/projects/FAO-EOStat-Senegal/GPS/NIOR1_...,"POLYGON ((429863.904 1516202.422, 429863.342 1...",17477.66,GPS,1181,Millets,17754.0
3,62201022413,1,/export/projects/FAO-EOStat-Senegal/GPS/NIOR1_...,"POLYGON ((429972.245 1516435.077, 429973.528 1...",9808.82,GPS,1181,Millets,9874.2
4,62201022414,1,/export/projects/FAO-EOStat-Senegal/GPS/NIOR1_...,"POLYGON ((429974.731 1516287.245, 429976.051 1...",5276.49,GPS,1121,Maize,5311.7


# Distribution figures

In [3]:
import importlib
#sys.path.insert(0, '/export/miro/ndeffense/LBRAT2104/GIT/eo-toolbox/tools/')

importlib.reload(crop_distribution)

#display(lox_nc_gdf)

path = '/Users/Nicolas/Documents/EOStat_senegal/'

crop_dictionary_xlsx = f'{path}crop_dictionary_new.xlsx'

prop_csv      = f'{path}histograms/histo_odk_prop.csv'


histo_odk_png = f'{path}histograms/histo_odk_with_cumsum.png'
pie_png       = f'{path}histograms/pie_1.png'

pie_plotly_filename = f'{path}histograms/pie_chart_plotly'
bar_plotly_html   = f'{path}histograms/bar_chart_plotly.html'
plotly_2_bar_html = f'{path}histograms/2_bar_plot.html'
plotly_2_bar_nc_html = f'{path}histograms/2_bar_plot_nc.html'



gps_gdf = gpd.read_file(f'{path}gps_with_data_inner.shp')

odk_gdf = gpd.read_file(f'{path}odk_crop_with_data.shp')

odk_nc_gdf = gpd.read_file(f'{path}odk_non_crop_with_data.shp')

lox_nc_gdf = gpd.read_file(f'{path}lox_non_crop_with_data.shp')

#lox_nc_gdf
#odk_nc_gdf
#odk_gdf
#gps_inner_gdf

level = 'lc'

gps_df_plot = crop_distribution.prepare_dataframe(gps_gdf, crop_dictionary_xlsx, level, 'GPS')

odk_df_plot = crop_distribution.prepare_dataframe(odk_gdf, crop_dictionary_xlsx, level, 'ODK')

#odk_nc_df_plot = crop_distribution.prepare_dataframe(odk_nc_gdf, crop_dictionary_xlsx, level, 'ODK')

lox_df_plot = crop_distribution.prepare_dataframe(lox_nc_gdf, crop_dictionary_xlsx, level, 'LOX')

#display(gps_df_plot)

display(lox_df_plot)

# Plotly
# ------

crop_distribution.pie_chart_plotly(lox_df_plot, pie_plotly_filename, level)

crop_distribution.bar_chart_plotly(lox_df_plot, bar_plotly_html, level)

#crop_distribution.grouped_bar_chart_plotly(gps_df_plot, odk_df_plot, plotly_2_bar_html, level)

#crop_distribution.grouped_bar_chart_plotly(odk_nc_df_plot, lox_df_plot, plotly_2_bar_nc_html, level)


# Matplotlib
# ----------

#crop_distribution.build_histogram_matplotlib(lox_df_plot, histo_odk_png, level, distribution='area', cumsum=False)

#crop_distribution.get_proportion_lc(lox_df_plot, pie_png, level)

Unnamed: 0,lc_nb,area,count,lc,grp_nb,grp,class_nb,class,sub_nb,sub,grp_1_nb,grp_1,pr_5_nb,pr_5,pr_6_nb,pr_6,pr_61_nb,pr_61,pr_7_nb,pr_7,pr_71_nb,pr_71,pr_8_nb,pr_8,grp_A_nb,grp_A,ratio,cumsum,cumsum_ratio,color,name
2,9,428.04,3,Water bodies,91,Water bodies,911,Sea and coastal lagoons,9111,Sea and coastal lagoons,9,Water bodies,2,Non cropland,0,Remove,3,Non cropland,3,Non cropland,3,Non cropland,3,Non cropland,9,Water bodies,54.56,428.04,54.56,#0046c8,LOX
0,5,351.58,2,Shrub land,51,Shrub land,511,Shrub land,5111,Shrub land,5,Shrub land,2,Non cropland,0,Remove,3,Non cropland,3,Non cropland,3,Non cropland,3,Non cropland,5,Shrub land,44.82,779.62,99.38,#966400,LOX
1,8,4.86,1,Build-up surface,81,Urban,811,Urban,8111,Urban,81,Urban,2,Non cropland,0,Remove,3,Non cropland,3,Non cropland,3,Non cropland,3,Non cropland,8,Build-up surface,0.62,784.48,100.0,#c31400,LOX


# Concatenate ODK polygons / ODK points / GPS

In [None]:
gdf_list = [odk_crops_all_gdf, odk_nc_all_gdf, gps_crops_all_gdf]

full_polygons_gdf = pd.concat(gdf_list, axis=0, ignore_index=True)

full_polygons_gdf['gid'] = np.arange(full_polygons_gdf.shape[0])

full_polygons_gdf = full_polygons_gdf.to_crs(epsg=32628)

full_polygons_gdf.head()


Unnamed: 0,id,crop_code,geometry,area,collect,crop_name,lc_code,Picture-Other_Non_Cropland,lc_name,crop_name_2021,Informations_champ-Culture,Informations_champ-Surface,gid
0,6220201174341,1.0,"POLYGON ((393917.866 1528882.489, 393901.785 1...",24112.55,ODK_polygon,Arachide,,,,,,,0
1,6220201171611,1.0,"POLYGON ((394069.867 1529538.943, 394187.774 1...",12635.46,ODK_polygon,Arachide,,,,,,,1
2,6220201171631,1.0,"POLYGON ((393943.188 1529558.847, 393938.597 1...",6242.19,ODK_polygon,Arachide,,,,,,,2
3,6220201171661,1.0,"POLYGON ((393939.067 1529497.632, 393938.507 1...",3883.07,ODK_polygon,Arachide,,,,,,,3
4,6220201173221,1.0,"POLYGON ((394240.275 1528421.642, 394234.092 1...",2927.02,ODK_polygon,Arachide,,,,,,,4


# Interactive plot with `folium`

https://leafletjs.com/reference-1.6.0.html#path-option

https://python-visualization.github.io/folium/quickstart.html

https://geopandas.org/gallery/polygon_plotting_with_folium.html

https://bikeshbade.com.np/tutorials/Detail/?title=Beginner+guide+to+python+Folium+module+to+integrate+google+earth+engine&code=8

https://nbviewer.org/github/python-visualization/folium/blob/master/examples/ImageOverlay.ipynb

In [None]:
#f = folium.Figure(width=1000, height=700)

m = folium.Map(location = [13.743747099563299, -15.772308355932424], zoom_start=11.5)#.add_to(f)

basemap_dict = folium_def.get_basemap()

basemap_dict['Google Satellite'].add_to(m)

# Plot ROI

#roi_gdf = gpd.read_file(roi_shp)

#sim_geo = gpd.GeoSeries(roi_gdf['geometry']).simplify(tolerance=0.001)
#geo_j = sim_geo.to_json()
#geo_j = folium.GeoJson(data=geo_j,
#                       style_function=lambda x: {'fillOpacity': 0, 'color': 'black'})
#geo_j.add_to(m)

# Plot GPX polygons in red

for _, r in gps_crops_all_gdf.to_crs(epsg=4326).iterrows():
    sim_geo = gpd.GeoSeries(r['geometry']).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j,
                           style_function=lambda x: {'fillOpacity': 0, 'color': 'red'})
    
    html = f'''<b>GPS</b><br>
    ID : {r['id']}<br>
    Crop type : {r['crop_code']} - {r['crop_name']}<br>
    Area : {r['area']}
    '''
    
    iframe = folium.IFrame(html, width=250, height=150)
    folium.Popup(iframe).add_to(geo_j)
    #folium.Popup('crop type ' + str(r['Informations_champ-Culture'])).add_to(geo_j)
    geo_j.add_to(m)

# Plot ODK cropland polygons
# --------------------------

for _, r in odk_crops_all_gdf.to_crs(epsg=4326).iterrows():
    sim_geo = gpd.GeoSeries(r['geometry']).simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j,
                           style_function=lambda x: {'fillOpacity': 0, 'color': 'magenta'})

    html = f'''<b>ODK - Cropland</b><br>
    ID : {r['id']}<br>
    Crop type : {r['crop_code']} - {r['crop_name']}<br>
    Area : {r['area']}
    '''
    
    iframe = folium.IFrame(html, width=250, height=150)
    folium.Popup(iframe).add_to(geo_j)
    geo_j.add_to(m)

# Plot ODK non-cropland points
# ----------------------------

for _, r in odk_nc_all_gdf.to_crs(epsg=4326).iterrows():
    sim_geo = gpd.GeoSeries(r['geometry'])#.simplify(tolerance=0.001)
    geo_j = sim_geo.to_json()
    geo_j = folium.GeoJson(data=geo_j,
                           style_function=lambda x: {'fillOpacity': 0, 'color': 'blue'})

    html = f'''<b>ODK - Non cropland</b><br>
    Land Cover : {r['lc_code']} - {r['lc_name']}<br>
    Comment : {r['Picture-Other_Non_Cropland']}<br>
    '''
    
    iframe = folium.IFrame(html, width=250, height=150)
    folium.Popup(iframe).add_to(geo_j)
    geo_j.add_to(m)



m = folium_def.add_categorical_legend(m, 'Legend',
                             colors = ['red','magenta','blue'],
                             labels = ['GPX', 'ODK - cropland','ODK - non cropland'])


#m.save(map_html_output)

m