In [1]:
import pandas as pd
import json

In [2]:
RICE_IDS = ['B101', 'B102', 'B103']
WHEAT_IDS = ['B104']
X_COORD = 'coord_obs_x'
Y_COORD = 'coord_obs_y'

In [3]:
def filter_na_coords(df):
    return df[~(df[X_COORD].isna() & df[Y_COORD].isna())]

In [4]:
data = pd.read_excel('data/point_survey_v1.xlsx')

In [5]:
data.head()

Unnamed: 0,district,segment_id,point_id,point_code,su_id,su_date,su_date_year,su_date_month,su_date_day,start_time,...,harvest,harvest_qualifier,d_harv,irrig,lc_remark,photo,remarks,entry_loc,entry_loc_qualifier,end_time
0,42,621,1,42-621-1,D2,2016-11-10,2016,11,10,15:40:00,...,2.0,,2017-02-15 00:00:00,2.0,,5402_37.jpg,,1,,15:43:00
1,42,621,2,42-621-2,D2,2016-11-10,2016,11,10,15:50:00,...,,,null/null/null,,River (flooded area),5442_37.jpg,River bank,1,,15:52:00
2,42,621,3,42-621-3,D1,2016-11-10,2016,11,10,15:35:00,...,1.0,,2016-10-25 00:00:00,1.0,,5159_37.jpg,,1,,15:37:00
3,42,621,4,42-621-4,D1,2016-11-10,2016,11,10,15:42:00,...,,,null/null/null,,River,5199_37.jpg,,1,,15:44:00
4,42,645,1,42-645-1,D2,2016-11-10,2016,11,10,14:15:00,...,,,null/null/null,,,5322_37.jpg,,1,,14:17:00


In [6]:
data.shape

(1585, 59)

In [7]:
rice_rows = data[data['lc_code1'].isin(RICE_IDS)]
wheat_rows = data[data['lc_code1'].isin(WHEAT_IDS)]
other_rows = data[~data['lc_code1'].isin(RICE_IDS + WHEAT_IDS)]

In [8]:
len(rice_rows), len(wheat_rows), len(other_rows)

(392, 20, 1173)

In [9]:
rice_filtered_coords = filter_na_coords(rice_rows)[[X_COORD, Y_COORD]]
wheat_filtered_coords = filter_na_coords(wheat_rows)[[X_COORD, Y_COORD]]
other_filtered_coords = filter_na_coords(other_rows)[[X_COORD, Y_COORD]]

In [10]:
len(rice_filtered_coords), len(wheat_filtered_coords), len(other_filtered_coords)

(389, 18, 1162)

In [11]:
points_map = {'rice': rice_filtered_coords.values.tolist(), 
              'wheat': wheat_filtered_coords.values.tolist(),
              'other': other_filtered_coords.values.tolist()
             }

In [12]:
json.dump(points_map, open('nepal_points.json', 'w'))