# Package and authentication dependency

In [6]:
!pip install earthengine-api



In [None]:
#Please replace the code below with the code you get from running 
#!earthengine authenticate
!earthengine authenticate --authorization-code 4/bwHUKK309MnLK3v90DWJmU1hyG8EWDPvDII2KaP_eEjhxmPGSXszuYY

In [1]:
import pandas as pd
import ee
from collections import defaultdict
import numpy as np
import datetime
ee.Initialize()

# Parameters

In [6]:
# LOCATION OF CSV FILE
DATA = '../../task4_labeling_preprocessing/data_analysis/'

# Download only pictures of rice and wheat
DOWNLOAD_ONLY_POSITIVE_EXAMPLE = True

# NUMBER OF FILES TO DOWNLOAD
# set to None to download all files
DOWNLOAD_NB_FILES = 500

# RADIUS AROUND COORD IN METERS
# This is the number of meter around the point coordinate to include in the picture
RADIUS_AROUND = 200

# RANGES FOR BANDS
# Values tested on different location in nepal
# Hypothesis: Should be the same for all images
RANGE_MIN = 0
RANGE_MAX = 2000

# RANGE FOR DATES
# CSV shows one date per observation.
# However, we need to take several pictures to select ones without clouds
# We look for RANDE_DATE weeks around the date
RANDE_DATE = 4

#GDRIVE or GCS 
#As we move forward we might avail of a GCS bucket for keeping temp data/tf data etc
GDRIVE = 0
# GOOGLE DRIVE FOLDER
GDRIVE_FOLDER = 'wfp'

# GCS BUCKET
# replace the bucket name with something you own
GCS_BUCKET  = "satimages" 

# Import CSV data a nd remove NaNs

In [7]:
df = pd.read_csv(DATA+'point_survey_v1.csv')
print(f'Len of input data: {len(df)}')

Len of input data: 1585


In [8]:
columns_suggestion = ['index', 'coord_obs_x', 'coord_obs_y', 'lc_code1', 'su_date']
codes_monitored_crops = ['B101', 'B102', 'B103', 'B104']
df = df.reset_index()

In [9]:
df = df[columns_suggestion]

In [10]:
print(f'size df {len(df)}')
for col in columns_suggestion:
    print(f'Number of NaN for {col}: {df[col].isna().sum()}')

size df 1585
Number of NaN for index: 0
Number of NaN for coord_obs_x: 16
Number of NaN for coord_obs_y: 16
Number of NaN for lc_code1: 136
Number of NaN for su_date: 0


In [11]:
df.dropna(inplace=True)

In [12]:
print(f'size df {len(df)}')
for col in columns_suggestion:
    print(f'Number of NaN for {col}: {df[col].isna().sum()}')

size df 1433
Number of NaN for index: 0
Number of NaN for coord_obs_x: 0
Number of NaN for coord_obs_y: 0
Number of NaN for lc_code1: 0
Number of NaN for su_date: 0


In [13]:

name_map = defaultdict(lambda: 'other')

name_map['B104'] = 'wheat'
name_map['B101'] = 'rice'
name_map['B102'] = 'rice'

df['type'] = df['lc_code1'].apply(lambda x: name_map[x])
if DOWNLOAD_ONLY_POSITIVE_EXAMPLE:
    df = df[df['type'] != 'other']

In [14]:
print(f'Will download {len(df)} pictures')

Will download 407 pictures


In [15]:
df['type'].value_counts()

rice     389
wheat     18
Name: type, dtype: int64

# Google Earth Engine download


In [16]:
dataset = ee.ImageCollection('COPERNICUS/S2')\
    .filterDate('2016-10-01', '2016-11-30')\
    .select(['B4', 'B3', 'B2'])

In [17]:
# Create list with a tuple per point: File name - x - y - date
coords = [(str(point[1][0])+'-'+str(point[1][3]), point[1][1], point[1][2], point[1][4]) for point in df.loc[:, ['index', 'coord_obs_x', 'coord_obs_y', 'lc_code1', 'su_date']].iterrows()]




In [18]:
# File name - x - y - date
coords[0]

('2-B102', 84.28578211, 27.42716335, '10/11/2016')

In [21]:
# Generate a rectangle containing the circle (centered on the coordinate) with radius RADIUS_AROUND
def get_geometry_radius(geometry_point):
    coord = np.array(geometry_point.getInfo()['coordinates'][0])
    return ee.Geometry.Rectangle([coord[:, 0].min(), coord[:, 1].min(), coord[:, 0].max(), coord[:, 1].max()])

# Generate the dates around the observation date
def date_range_to_collect(input_date):
  d, m, y = input_date.split('/')
  observation_date = datetime.date(int(y), int(m), int(d))
  delta = datetime.timedelta(weeks=RANDE_DATE)
  return observation_date-delta, observation_date+delta

def generate_image(image_collection, x, y, date, image_name, debug=True):
    if debug: print(f'Working on {image_name}: ({x}, {y}) on {date}')
    geo = ee.Geometry.Point(x, y);
    radius = geo.buffer(RADIUS_AROUND);
    geometry_radius= get_geometry_radius(radius)
    
    spatialFiltered = image_collection.filterBounds(geo)

    date_range = date_range_to_collect(date)
    if debug: print('date range:' + str(date_range[0]) + str(date_range[1]))
    temporalFiltered = spatialFiltered.filterDate(str(date_range[0]), str(date_range[1]))

    # The darker pixels have the least clouds
    least_clouds = temporalFiltered.min()

    testimg = least_clouds.visualize(bands=['B4', 'B3', 'B2'], min=RANGE_MIN, max=RANGE_MAX)
    if GDRIVE == 1:
      task = ee.batch.Export.image.toDrive(testimg, folder=GDRIVE_FOLDER, region=geometry_radius.getInfo()['coordinates'][0], description=image_name, scale=10)
    else:
      task = ee.batch.Export.image.toCloudStorage(testimg, bucket=GCS_BUCKET, region=geometry_radius.getInfo()['coordinates'][0], description=image_name, scale=10)
    task.start()


In [22]:
dataset_collection = ee.ImageCollection('COPERNICUS/S2')


for i, point in enumerate(coords):
    if DOWNLOAD_NB_FILES is not None and i > DOWNLOAD_NB_FILES:
        break
    generate_image(dataset_collection, point[1], point[2], point[3], point[0])

Working on 2-B102: (84.28578211, 27.42716335) on 10/11/2016
date range:2016-10-132016-12-08
Working on 6-B102: (84.40694692, 27.42943646) on 10/11/2016
date range:2016-10-132016-12-08
Working on 7-B102: (84.40851276, 27.42944815) on 10/11/2016
date range:2016-10-132016-12-08
Working on 12-B102: (84.31585037, 27.43806162) on 09/11/2016
date range:2016-10-122016-12-07
Working on 13-B102: (84.31737319, 27.43811606) on 09/11/2016
date range:2016-10-122016-12-07
Working on 14-B102: (84.31589526, 27.43682279) on 09/11/2016
date range:2016-10-122016-12-07
Working on 17-B102: (84.33761965, 27.43853205) on 10/11/2016
date range:2016-10-132016-12-08
Working on 19-B102: (84.33767689, 27.43715581) on 10/11/2016
date range:2016-10-132016-12-08
Working on 20-B102: (84.3410999, 27.438616999999997) on 10/11/2016
date range:2016-10-132016-12-08
Working on 21-B102: (84.30068409999998, 27.477949900000002) on 10/11/2016
date range:2016-10-132016-12-08
Working on 23-B102: (84.29545423, 27.44221338) on 09/1