In [147]:
import ee
import geemap
import ipywidgets as widgets
from ipyleaflet import WidgetControl
import os
import csv
import pandas as pd
#
import geopandas as gpd
#For min/max in geopanda
from shapely.geometry import Polygon


In [173]:
#This inits EE
ee.Initialize()

In [655]:
#File with GBIF Data
CVS_FN=r"C:\Users\mike\Documents\OSU\GEOG581\Project\gbif\TreesInSf\sfTrees.csv"
#PRES_LIST = ["Quercus agrifolia", "Heteromeles arbutifolia", "Baccharis pilularis", "Frangula californica"]
PRES_LIST = ["Cupressus macrocarpa", "Eucalyptus globulus", "Eucalyptus polyanthemos"]

#Path to the map
GBIF_MAP_PATH="users/karasofm/geog581_proj/treeDetection_grey"
GBIF_MASK_PATH="users/karasofm/geog581_proj/trainingMaskPoly"

UNCERT_LBL="coordinateUncertaintyInMeters"
SPECIES_LBL="species"
GBIF_LAT_LBL="decimalLatitude"
GBIF_LONG_LBL="decimalLongitude"

LAT_LBL="latitude"
LONG_LBL="longitude"

#Absense data is more prevelent, so be more difficult about uncertainty.
MAX_ABS_UNCERT = 5
MAX_PRES_UNCERT = 10

In [659]:
treesDf=pd.read_csv(CVS_FN)

##Pick points that have uncertainty as defined above.
treesCloseDf = treesDf[(treesDf[SPECIES_LBL].isin(PRES_LIST)) & 
                    (treesDf[UNCERT_LBL] <=  MAX_PRES_UNCERT)]\
                [[SPECIES_LBL, GBIF_LAT_LBL, GBIF_LONG_LBL, UNCERT_LBL]]

treesCloseDf = treesCloseDf.append(treesDf[(~treesDf[SPECIES_LBL].isin(PRES_LIST)) & 
                    (treesDf[UNCERT_LBL] <=  MAX_ABS_UNCERT)]\
                [[SPECIES_LBL, GBIF_LAT_LBL, GBIF_LONG_LBL, UNCERT_LBL]])

treesCloseDf = treesCloseDf.rename(columns={GBIF_LAT_LBL: LAT_LBL, GBIF_LONG_LBL: LONG_LBL})

#Write out the file.  This will then be sent to GEE for elimination by polygon.
closeFn = "%s_premasked_training.csv" % os.path.splitext(CVS_FN)[0]
treesCloseDf.to_csv(closeFn, index_label="id")
print(closeFn)
treesCloseDf

C:\Users\mike\Documents\OSU\GEOG581\Project\gbif\TreesInSf\sfTrees_premasked_training.csv


Unnamed: 0,species,latitude,longitude,coordinateUncertaintyInMeters
2157,Eucalyptus globulus,37.813109,-122.370659,5.0
2159,Eucalyptus globulus,37.700589,-122.436828,6.0
2163,Eucalyptus globulus,37.662696,-122.413695,10.0
2165,Eucalyptus globulus,37.789433,-122.472078,6.0
2166,Eucalyptus globulus,37.806261,-122.428567,2.0
...,...,...,...,...
4412,Quercus agrifolia,37.787387,-122.482388,5.0
4413,Quercus agrifolia,37.790149,-122.473586,5.0
4418,Quercus agrifolia,37.774206,-122.458054,5.0
4421,Quercus agrifolia,37.720545,-122.417872,5.0


In [660]:
###Above just generates a CSV file for GEE
###This file then get shipped to GEE, and filtered against a polygon to get rid of points not near trees
###Next it a re-download and conversion to gdf
###It shouldn't be this way, but geemap seems buggy.

filtered = ee.FeatureCollection('users/karasofm/geog581_proj/maskedTraining')
treesGdf = geemap.ee_to_geopandas(filtered)
treesGdf

Unnamed: 0,coordinate,species,id,geometry
0,4.0,Olea europaea,3350,POINT (-122.40856 37.80823)
1,5.0,Eucalyptus polyanthemos,2390,POINT (-122.47192 37.79496)
2,5.0,Eucalyptus polyanthemos,2394,POINT (-122.46738 37.74653)
3,5.0,Pittosporum crassifolium,3363,POINT (-122.48343 37.67931)
4,5.0,Prunus cerasifera,3379,POINT (-122.47134 37.63048)
...,...,...,...,...
607,5.0,Leptospermum laevigatum,3345,POINT (-122.50280 37.71747)
608,5.0,Umbellularia californica,3920,POINT (-122.43838 37.76592)
609,5.0,Umbellularia californica,3933,POINT (-122.41106 37.68176)
610,5.0,Umbellularia californica,3936,POINT (-122.46336 37.76944)


In [661]:
#treesGdf = gpd.GeoDataFrame(treesCloseDf, 
#                    geometry=gpd.points_from_xy(treesCloseDf[LAT_LBL], treesCloseDf[LONG_LBL]), 
#                    crs='EPSG:4326')
boundingPoly = Polygon(treesGdf["geometry"]).bounds
minLong = boundingPoly[0]
minLat = boundingPoly[1]
maxLong = boundingPoly[2]
maxLat = boundingPoly[3]

#Get ceneter
center_lat = minLat + (maxLat-minLat)/2 
center_long = minLong + (maxLong-minLong)/2  

#Reorder for distance from maxLat and maxLong, set a spatial index
#treesGdf = treesGdf.reindex(treesGdf.distance(Point(maxLat,maxLong)).sort_values().index)



In [662]:
#Kind of a poor human's KNN.  This just iterative finds the closest point to the last point
# and adds that point to a new df.  It also remove duplicates :)
treesCompareGdf = treesGdf

if 'spatialIdx' not in treesCompareGdf:
    treesCompareGdf.insert(0, 'spatialIdx', range(0, len(treesCompareGdf)))

treesOrderedGdf = treesCompareGdf.iloc[0:0]
compPt = Point(0,0)

spatialIdx = 0

while treesCompareGdf.count()[0] > 1:
    treesCompareGdf = treesCompareGdf.reindex(treesCompareGdf.distance(compPt).sort_values().index)
    topObs = treesCompareGdf.iloc[0]
    topPt = topObs['geometry']
    #print(spatialIdx, topPt, compPt, treesGdf.distance(compPt).sort_values()[0:5], treesCompareGdf.head(5))
    if topPt != compPt:
        topObs.at['spatialIdx']=spatialIdx
        spatialIdx = spatialIdx+1
        treesOrderedGdf = treesOrderedGdf.append(topObs)
        compPt = topPt
    treesCompareGdf = treesCompareGdf.iloc[1:]
    print(treesOrderedGdf.count()[0], treesCompareGdf.count()[0])



1 611
2 610
3 609
4 608
5 607
6 606
7 605
7 604
8 603
8 602
9 601
10 600
11 599
12 598
13 597
14 596
15 595
15 594
16 593
17 592
18 591
19 590
20 589
21 588
22 587
23 586
24 585
25 584
26 583
27 582
28 581
29 580
30 579
31 578
32 577
32 576
33 575
34 574
35 573
35 572
36 571
37 570
38 569
38 568
39 567
39 566
40 565
41 564
42 563
43 562
44 561
44 560
45 559
46 558
47 557
48 556
49 555
50 554
50 553
51 552
52 551
52 550
53 549
54 548
54 547
55 546
56 545
57 544
58 543
59 542
60 541
61 540
61 539
62 538
63 537
63 536
64 535
64 534
65 533
65 532
66 531
66 530
67 529
68 528
69 527
69 526
70 525
71 524
72 523
73 522
73 521
74 520
75 519
75 518
76 517
77 516
78 515
79 514
80 513
80 512
81 511
82 510
83 509
84 508
85 507
86 506
87 505
88 504
89 503
90 502
91 501
92 500
93 499
94 498
95 497
95 496
96 495
97 494
98 493
99 492
100 491
100 490
101 489
102 488
103 487
104 486
104 485
105 484
105 483
106 482
106 481
107 480
108 479
109 478
110 477
111 476
112 475
113 474
114 473
115 472
116 471
117

In [663]:
absDf = treesOrderedGdf[~treesOrderedGdf[SPECIES_LBL].isin(PRES_LIST)]
presDf = treesOrderedGdf[treesOrderedGdf[SPECIES_LBL].isin(PRES_LIST)] 
treesPresOrderedGdf = treesOrderedGdf.iloc[0:0]

for species in PRES_LIST:
    treesPresOrderedGdf = \
        treesPresOrderedGdf.append(
            treesOrderedGdf[treesOrderedGdf[SPECIES_LBL] == species])

treesPresOrderedGdf = treesPresOrderedGdf.append(absDf)
treesPresOrderedGdf = treesPresOrderedGdf.drop(columns=['spatialIdx'])
treesPresOrderedGdf.insert(0, 'spatialIdx', range(0, len(treesPresOrderedGdf)))

In [664]:
absDf.head(60)

Unnamed: 0,spatialIdx,coordinate,species,id,geometry
568,0,5.0,Heteromeles arbutifolia,3125,POINT (-122.38804 37.66515)
556,1,5.0,Heteromeles arbutifolia,2995,POINT (-122.38836 37.66522)
61,3,5.0,Prunus ilicifolia,3509,POINT (-122.40581 37.67762)
579,4,5.0,Heteromeles arbutifolia,3209,POINT (-122.40593 37.67825)
439,5,5.0,Frangula californica,2730,POINT (-122.40951 37.67744)
481,6,5.0,Ceanothus thyrsiflorus,1813,POINT (-122.41089 37.67992)
495,7,5.0,Ceanothus thyrsiflorus,1852,POINT (-122.41144 37.68088)
609,8,5.0,Umbellularia californica,3933,POINT (-122.41106 37.68176)
539,9,4.0,Heteromeles arbutifolia,2921,POINT (-122.41194 37.68359)
447,10,5.0,Frangula californica,2399,POINT (-122.41133 37.68444)


In [665]:
presDf.head(100)

Unnamed: 0,spatialIdx,coordinate,species,id,geometry
321,2,5.0,Eucalyptus globulus,2252,POINT (-122.39223 37.66865)
394,15,5.0,Cupressus macrocarpa,4584,POINT (-122.42435 37.71176)
345,16,9.0,Cupressus macrocarpa,4432,POINT (-122.42539 37.71283)
317,18,5.0,Eucalyptus globulus,2236,POINT (-122.42281 37.71709)
324,19,5.0,Eucalyptus globulus,2269,POINT (-122.42290 37.71718)
...,...,...,...,...,...
284,327,10.0,Eucalyptus globulus,2268,POINT (-122.45916 37.77305)
350,329,10.0,Cupressus macrocarpa,4456,POINT (-122.45702 37.77336)
333,334,5.0,Eucalyptus globulus,2385,POINT (-122.46123 37.77051)
359,337,10.0,Cupressus macrocarpa,4573,POINT (-122.45170 37.77188)


In [666]:
treesPresOrderedGdf.head(60)

Unnamed: 0,spatialIdx,coordinate,species,id,geometry
394,0,5.0,Cupressus macrocarpa,4584,POINT (-122.42435 37.71176)
345,1,9.0,Cupressus macrocarpa,4432,POINT (-122.42539 37.71283)
396,2,5.0,Cupressus macrocarpa,4623,POINT (-122.42024 37.71967)
365,3,10.0,Cupressus macrocarpa,4649,POINT (-122.41933 37.72087)
401,4,5.0,Cupressus macrocarpa,4667,POINT (-122.38212 37.70918)
388,5,5.0,Cupressus macrocarpa,4522,POINT (-122.38001 37.71049)
390,6,5.0,Cupressus macrocarpa,4524,POINT (-122.37945 37.71268)
389,7,5.0,Cupressus macrocarpa,4523,POINT (-122.37486 37.70891)
407,8,5.0,Cupressus macrocarpa,4671,POINT (-122.44662 37.75430)
414,9,6.0,Cupressus macrocarpa,4757,POINT (-122.44702 37.76068)


In [667]:
absFc = geemap.geopandas_to_ee(absDf)
presFc = geemap.geopandas_to_ee(presDf)

In [677]:
Map = geemap.Map()
#naip_url = 'https://services.nationalmap.gov/arcgis/services/USGSNAIPImagery/ImageServer/WMSServer?'
#Map.add_wms_layer(url=naip_url, layers='0', name='NAIP Imagery', format='image/png', shown=True)

gbifAreaMap = ee.Image(GBIF_MAP_PATH)
Map.addLayer(gbifAreaMap);
Map.addLayer(absFc, {'color': 'red'}, 'Absense');
Map.addLayer(presFc, {'color': 'green'}, 'Presense');

Map.setCenter(center_long, center_lat, 10000)
Map

Map(center=[37.71340008804914, -122.44662285650482], controls=(WidgetControl(options=['position'], widget=HBox…

In [678]:
allObs = []
allObs.append(treesPresOrderedGdf)

currObs = []
currObs.append(allObs[0][allObs[0]['spatialIdx']==0])
selectedDf = currObs[0]

selectedDf = selectedDf.iloc[0:0]
fixedDfObs = []
fixedDfObs.append(selectedDf)

# Add an output widget to the map
output_widget = widgets.Output(layout={'border': '1px solid black'})
output_control = WidgetControl(widget=output_widget, position='topleft')
Map.add_control(output_control)

keepBtn = widgets.Button(description="Keep")
keepBtn_control = WidgetControl(widget=keepBtn, position='topleft')
Map.add_control(keepBtn_control)

backBtn = widgets.Button(description="Back")
backBtn_control = WidgetControl(widget=backBtn, position='bottomright')
Map.add_control(backBtn_control)

skipBtn = widgets.Button(description="Skip")
skipBtn_control = WidgetControl(widget=skipBtn, position='topleft')
Map.add_control(skipBtn_control)

def show_coord(obs):
    with output_widget:
        output_widget.clear_output()
        print(obs)
        
    long = float(obs['geometry'].x)
    lat  = float(obs['geometry'].y)
   
    Map.addLayer(ee.Geometry.Point(long, lat), 
                 {'color': 'yellow', 'pointRadius' : 3}, 
                 'Select')
    Map.setCenter(long, lat, 30)

def next_obs():
    if not currObs[0].empty:
        nextSpIdx = currObs[0]['spatialIdx'].values[0] + 1
        currObs[0] = allObs[0][allObs[0]['spatialIdx']== nextSpIdx]
    return currObs[0]

def last_obs():
    nextSpIdx = currObs[0]['spatialIdx'].values[0] - 1
    nextObs = allObs[0][allObs[0]['spatialIdx']== nextSpIdx]
    
    if not nextObs.empty:
        currObs[0] = nextObs
        
    return currObs[0]

def get_obs():
    return currObs[0]

def on_skip_button_clicked(b):
    obs = next_obs()
    if(obs.empty):
        print("At End.")
    else:
        show_coord(obs)

def on_keep_button_clicked(b):
    obs = get_obs()
    fixedDfObs[0].append(obs)
    obs = next_obs()
    if(obs.empty):
        print("At End.")
    else:
        show_coord(obs)
        
def on_back_button_clicked(b):
    obs = last_obs()
    try:
        #If spatial index has been added to fixedDF, remove
        fixedDfRow = fixedDfObs[0]\
            [fixedDfObs[0]['spatialIdx'] == int(obs['spatialIdx'])]
        fixedDfObs[0].drop(fixedDfRow.index[0])
    except IndexError:
        pass

    show_coord(obs)

skipBtn.on_click(on_skip_button_clicked)
backBtn.on_click(on_back_button_clicked)
keepBtn.on_click(on_keep_button_clicked)

# Capture user interaction with the map
def handle_interaction(**kwargs):
    latlon = kwargs.get('coordinates')
    if kwargs.get('type') == 'click':
        Map.default_style = {'cursor': 'wait'}
        
        obs = get_obs()
        obs['geometry']=Point(latlon[1],latlon[0])
        fixedDfObs[0] = fixedDfObs[0].append(obs)
        
        obs = next_obs()

        if(obs.empty):
            with output_widget:
                output_widget.clear_output()
                print("At End.")
        else:
            show_coord(obs)



    Map.default_style = {'cursor': 'pointer'}

Map.on_interaction(handle_interaction)

show_coord(currObs[0])


In [672]:
fixedDfObs[0]

Unnamed: 0,spatialIdx,coordinate,species,id,geometry
345,1,9.0,Cupressus macrocarpa,4432,POINT (-122.42423 37.71168)
396,2,5.0,Cupressus macrocarpa,4623,POINT (-122.41996 37.71975)
390,6,5.0,Cupressus macrocarpa,4524,POINT (-122.37975 37.71271)
389,7,5.0,Cupressus macrocarpa,4523,POINT (-122.37482 37.70902)
407,8,5.0,Cupressus macrocarpa,4671,POINT (-122.44641 37.75432)
...,...,...,...,...,...
256,483,5.0,Baccharis pilularis,716,POINT (-122.44581 37.69757)
5,484,5.0,Quercus suber,3525,POINT (-122.44805 37.67616)
113,493,5.0,Quercus agrifolia,346,POINT (-122.37190 37.81082)
35,494,5.0,Salix lasiolepis,3592,POINT (-122.37188 37.81089)


In [673]:
#Fix up for GEE
selDf = fixedDfObs[0]
selDf.index.name = 'ID'

selDf['logitude'] = float(selDf['geometry'].x.values[0])
selDf['latitude'] = float(selDf['geometry'].y.values[0])
selDf

Unnamed: 0_level_0,spatialIdx,coordinate,species,id,geometry,logitude,latitude
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
345,1,9.0,Cupressus macrocarpa,4432,POINT (-122.42423 37.71168),-122.424233,37.71168
396,2,5.0,Cupressus macrocarpa,4623,POINT (-122.41996 37.71975),-122.424233,37.71168
390,6,5.0,Cupressus macrocarpa,4524,POINT (-122.37975 37.71271),-122.424233,37.71168
389,7,5.0,Cupressus macrocarpa,4523,POINT (-122.37482 37.70902),-122.424233,37.71168
407,8,5.0,Cupressus macrocarpa,4671,POINT (-122.44641 37.75432),-122.424233,37.71168
...,...,...,...,...,...,...,...
256,483,5.0,Baccharis pilularis,716,POINT (-122.44581 37.69757),-122.424233,37.71168
5,484,5.0,Quercus suber,3525,POINT (-122.44805 37.67616),-122.424233,37.71168
113,493,5.0,Quercus agrifolia,346,POINT (-122.37190 37.81082),-122.424233,37.71168
35,494,5.0,Salix lasiolepis,3592,POINT (-122.37188 37.81089),-122.424233,37.71168


In [674]:
fnPath = os.path.splitext(CVS_FN)[0]

for pres in PRES_LIST:
    presSelDf = selDf[selDf[SPECIES_LBL] == pres]
    selectedFn = str("%s_%s_sel.csv" % (fnPath, pres)).replace(" ", "_")
    print(selectedFn)
    presSelDf.to_csv(selectedFn)

C:\Users\mike\Documents\OSU\GEOG581\Project\gbif\TreesInSf\sfTrees_Cupressus_macrocarpa_sel.csv
C:\Users\mike\Documents\OSU\GEOG581\Project\gbif\TreesInSf\sfTrees_Eucalyptus_globulus_sel.csv
C:\Users\mike\Documents\OSU\GEOG581\Project\gbif\TreesInSf\sfTrees_Eucalyptus_polyanthemos_sel.csv


In [675]:
absSelDf = selDf[~selDf[SPECIES_LBL].isin(PRES_LIST)]
absFn = str("%s_absense.csv" % (fnPath)).replace(" ", "_")
absSelDf.to_csv(absFn)

In [676]:
allFn = "%s_sel.csv" % (fnPath)
selDf.to_csv(allFn)