In [2]:
#before running the code, it's important that you have downloaded the google earth enging api to python or r
import ee
import pandas as pd

# Trigger the authentication flow. This only needs to be done once,
# and ensures that you have access to google earth engine through your google account
# ee.Authenticate()

# Initialize the library.
# ee.Initialize()

In [31]:
#read in the latlong csv
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])

In [32]:
latlong.size

19422

In [33]:
#get the frequency of each species in the latlong dataframe, if you want to see
count = latlong['scientificName'].value_counts().rename_axis('scientificName').to_frame('frequency')
# count.to_csv('ACL Data/count.csv')
#make sure you disregard the sandflies with less than 5 points
sandfly_names = count[count['frequency'] > 4].reset_index()['scientificName']
latlong = latlong[latlong.scientificName.isin(sandfly_names)]

In [34]:
latlong.size

18717

In [10]:
#tester point!
point = ee.Geometry.Point(18.5672, -88.2569)

# TEMPERATURE

In [62]:
#TEMPERATURE

#read the latlong into a dataframe
meantemp = latlong
#load the gee image collection
#filter it by the dates you want, and the data you are looking for
temp = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate('2009-01-01', '2019-01-01').select('mean_2m_air_temperature')

#create a for loop that gets the mean temperature across the chosen time for each lat long point
for x in range(len(meantemp)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    #get the mean temp and put it in the dataframe under the column "mean monthly temp"
    meantemp.loc[x, "temp"] = temp.mean().sample(point, 1).first().get('mean_2m_air_temperature').getInfo()

#convert to celcius
meantemp['temp'] = meantemp['temp']-273.15


In [63]:
#print the first 30 rows of this dataframe to check if its all good
meantemp.head()
#take the average temperature for each species of sandfly
latlongtemp = meantemp.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongtemp.to_csv('final2/latlongtemp.csv')

# PRECIPITATION

In [64]:
#PRECIPITATION

#load latlongs into a dataframe
precip = latlong
#filter it by the dates you want, and the data you are looking for
#in this case we want the precipitation, so we .select(total_precipitation)
prec = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate('2009-01-01', '2019-01-01').select('total_precipitation')

#create a for loop that gets the mean rainfall across the chosen time for each lat long point
for x in range(len(precip)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    #get the mean rainfall and put it in the dataframe under the column "rainfall"
    precip.loc[x, "rainfall"] = prec.mean().sample(point, 1).first().get('total_precipitation').getInfo()

In [65]:
#convert rainfall from meters to millimeters
precip['rainfall'] = precip['rainfall']*1000
#take the average temperature for each species of sandfly
latlongprecip = precip.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongprecip.to_csv('final2/latlongprecip.csv')
#check
latlongprecip.head()

Unnamed: 0_level_0,decimalLatitude,decimalLongitude,temp,rainfall
scientificName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bichromomyia flaviscutellata,-6.897675,-54.948463,26.013084,170.290238
Bichromomyia inornata,-10.7828,-65.3394,26.093103,165.437773
Bichromomyia reducta,-11.96475,-69.162833,25.324915,174.914315
Brumptomyia avellari,-18.277705,-53.41456,24.529622,115.744536
Brumptomyia beaupertuyi,5.81185,-75.79975,20.326776,235.551447


# WIND

In [66]:
#WIND

#filter it by the dates you want, and the data you are looking for
#in this case we want the wind speed, so we .select(u_component_of_wind_10m) and .select(v_component_of_wind_10m)
u_wind = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate('2009-01-01', '2019-01-01').select('u_component_of_wind_10m')
v_wind = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate('2009-01-01', '2019-01-01').select('v_component_of_wind_10m')

#load in latlong
wind = latlong

#create a for loop that gets the mean windspeed across the chosen time for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    #for simplicity, store the data in variables
    u = u_wind.mean().sample(point, 1).first().get('u_component_of_wind_10m').getInfo()
    v = v_wind.mean().sample(point, 1).first().get('v_component_of_wind_10m').getInfo()
    #get the mean wind speed and put it in the dataframe under the column "wind.speed"
    wind.loc[x, "wind.speed.u"] = u
    wind.loc[x, "wind.speed.v"] = v

#outside of the for loop, do the calculations for the total
wind['wind.speed'] = (wind['wind.speed.u']**2 + wind["wind.speed.v"]**2)**0.5

In [67]:
#take the average temperature for each species of sandfly
latlongwind = wind.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongwind.to_csv('final2/latlongwind.csv')

# TEMPERATURE RANGE

In [68]:
#TEMPERATURE RANGE

#load the latlong csv
temprange = latlong
#filter the google earth engine datasets by the dates you want, and the data you are looking for
#in this case we want the max and min temps, so we select those
mintemp = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate('2009-01-01', '2019-01-01').select('minimum_2m_air_temperature')
maxtemp = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate('2009-01-01', '2019-01-01').select('maximum_2m_air_temperature')

#MEAN MONTHLY TEMP MAX AND MIN
#create a for loop that gets the mean value across the chosen time for each lat long point
for x in range(len(temprange)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(temprange['decimalLongitude'][x], temprange['decimalLatitude'][x])
    #get the mean range and put it in the dataframe 
    temprange.loc[x, "temp.rangemax"] = maxtemp.mean().sample(point, 1).first().get('maximum_2m_air_temperature').getInfo()
    temprange.loc[x, "temp.rangemin"] = mintemp.mean().sample(point, 1).first().get('minimum_2m_air_temperature').getInfo()

In [69]:
#get the range by doing max minus min and put it into a new column
temprange['temp.range'] = temprange['temp.rangemax'] - temprange['temp.rangemin']
#check it before exporting
temprange.head()

Unnamed: 0,scientificName,decimalLatitude,decimalLongitude,temp,rainfall,wind.speed.u,wind.speed.v,wind.speed,temp.rangemax,temp.rangemin,temp.range
0,Warileya rotundipennis,5.8137,-75.8225,20.326776,235.551447,0.053705,0.036583,0.064981,301.598511,287.460388,14.138123
1,Warileya rotundipennis,5.81,-75.82,20.326776,235.551447,0.053705,0.036583,0.064981,301.598511,287.460388,14.138123
2,Brumptomyia avellari,-14.858692,-44.229808,25.611108,61.875254,-1.736552,0.320977,1.765967,307.954071,291.199158,16.754913
3,Brumptomyia avellari,-14.863325,-44.231039,25.611108,61.875254,-1.736552,0.320977,1.765967,307.954071,291.199158,16.754913
4,Brumptomyia avellari,-14.845041,-44.218608,25.611108,61.875254,-1.736552,0.320977,1.765967,307.954071,291.199158,16.754913


In [70]:
#take the average range for each species of sandfly
latlongtemprange = temprange.groupby("scientificName").mean()
#export it to your computer
latlongtemprange.to_csv('final2/latlongtemprange.csv')

# ELEVATION

In [72]:
#ELEVATION

#create a dataframe that we will put elevation values into
elevation = latlong
#load the gee image collection for elevation
#filter it by the dates you want, and the data you are looking for
elev = ee.Image("NOAA/NGDC/ETOPO1").select('bedrock')

#create a for loop that gets the elevation across the chosen time for each lat long point
for x in range(len(elevation)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(elevation['decimalLongitude'][x], elevation['decimalLatitude'][x])
    #get the elevation and put it in the dataframe under the column "elevation"
    elevation.loc[x, "elevation"] = elev.sample(point, 1).first().get('bedrock').getInfo()


In [73]:
#take the average range for each species of sandfly
latlongelevation = elevation.groupby("scientificName").mean()
#export it to your computer
latlongelevation.to_csv('final2/latlongelevation.csv')

# FOREST LANDSCAPE INTEGRITY INDEX

In [74]:
#FOREST LANDSCAPE INTEGRITY INDEX

#create a dataframe that we will put flii values into
latlongflii = latlong
#load the gee image collection for flii (READ BELOW)
#for these images specifically, you need to upload them to google earth engine as an *asset*
#when you go to https://code.earthengine.google.com/ and login to your account that has access
#to google earth engine, and you see the code editor, go to the assets tab and upload your image/tiff files there
#otherwise, you will not be able to access the data in the tiff files
flii = ee.Image("users/ironicallycallidus/flii_earth")

#create a for loop that gets the flii for each lat long point
for x in range(len(latlongflii)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlongflii['decimalLongitude'][x], latlongflii['decimalLatitude'][x])
    #get the flii and put it in the dataframe under the column "flii"
    #here we use an if statement because for some latlongs, there doesn't exist a flii number, so we get the ones that exist
    #this can get quite finnicky, so play around with the functions until it works, check the documentation as well
    if flii.sample(point, 1).first().getInfo() != None:
        latlongflii.loc[x, "flii"] = flii.sample(point, 1).first().get('b1').getInfo()


In [348]:
#testing a specific random point for the flii index
flii = ee.Image("users/ironicallycallidus/flii_earth")
point = ee.Geometry.Point(18.5672, -8.2569)
flii.sample(point, 1).first().get('b1').getInfo()

9684

In [75]:
#take the average range for each species of sandfly
latlongflii1 = latlongflii.groupby("scientificName").mean()

#this makes sure all negative values are rewritten as 0s
latlongflii1 = latlongflii1['flii'].clip(lower=0)
#export it to your computer
latlongflii1.to_csv('final2/latlongflii.csv')

# TEMPERATURE VARIANCE

In [20]:
import numpy as np
#create empty 
latlong = latlong
for x in range(len(latlong)):
    #create empty list
    variance = []
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    
    for j in range(10):
        #get 10 columns for the years 
        #get variance for the values in each column
        #get mean monthly temp for months in a year
        era = ee.ImageCollection("ECMWF/ERA5/MONTHLY").filterDate(str(j + 2009) + '-01-01', str(j + 2010) + '-01-01').select('mean_2m_air_temperature')
        #get the variance of each 10 years
        erareduced = era.reduce(ee.Reducer.variance())
        #append each of the 10 variances to a list
        variance.append(erareduced.sample(point, 1).first().get('mean_2m_air_temperature_variance').getInfo())
    #take mean of the variances and put it into the latlong csv
    latlong.loc[x, "temp.var"] = np.mean(variance)


In [6]:
#check
latlong

Unnamed: 0,scientificName,decimalLatitude,decimalLongitude,temp.var
0,Warileya rotundipennis,5.813700,-75.822500,0.575591
1,Warileya rotundipennis,5.810000,-75.820000,0.575591
2,Brumptomyia avellari,-14.858692,-44.229808,1.986265
3,Brumptomyia avellari,-14.863325,-44.231039,1.986265
4,Brumptomyia avellari,-14.845041,-44.218608,1.986265
...,...,...,...,...
6469,Micropygomyia (Sauromyia) trinidadensis,-0.084444,-79.278028,
6470,Lutzomyia (Helcocyrtomyia) hartmanni,-0.084444,-79.278028,
6471,Pressatia triacantha,-0.084444,-79.278028,
6472,Migonemyia (Migonemyia) migonei,-31.393889,-64.076667,


In [21]:
#take the average temperature for each species of sandfly
latlongtempvar = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongtempvar.to_csv('final2/latlongtempvar.csv')

# CANOPY HEIGHT

In [8]:
#CANOPY HEIGHT
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
canopy = ee.Image("NASA/JPL/global_forest_canopy_height_2005")

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance = 1000)
    #put value for each latlong in the latlong dataframe
    latlong.loc[x, "canopy"] = canopy.sample(buffer).first().get('1').getInfo()

In [9]:
#check
latlong.tail(40)
#take the average for each species of sandfly
latlongcanopy = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongcanopy.to_csv('final2/latlongcanopy.csv')

# GLOBAL HUMAN MODIFICATION

In [93]:
ghm = ee.ImageCollection('CSP/HM/GlobalHumanModification').select('gHM')
point = ee.Geometry.Point(-5.35782111, 8.874290794)

buffer = point.buffer(distance= 1000)
print(ghm.mean().sample(buffer, 1000).first().getInfo())

{'type': 'Feature', 'geometry': None, 'id': '0', 'properties': {'gHM': 0.5898281335830688}}


In [94]:
#GHM
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])

ghm = ee.ImageCollection('CSP/HM/GlobalHumanModification').select('gHM')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if ghm.mean().sample(buffer, 1000).first().getInfo() != None:
        latlong.loc[x, "ghm"] = ghm.mean().sample(buffer, 1000).first().get('gHM').getInfo()


In [95]:
#take the average for each species of sandfly
latlongghm = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongghm.to_csv('final2/latlongghm.csv')

# TREE COVER

In [3]:
#TREE COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('tree-coverfraction')


#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "tree.cover"] = coper.mean().sample(buffer, 100).first().get('tree-coverfraction').getInfo()

In [4]:
#take the average for each species of sandfly
latlongtreecover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongtreecover.to_csv('final2/latlongtreecover.csv')

# CROPS COVER

In [5]:
#CROPS COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('crops-coverfraction')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "crops.cover"] = coper.mean().sample(buffer, 100).first().get('crops-coverfraction').getInfo()

In [6]:
#take the average for each species of sandfly
latlongcropscover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongcropscover.to_csv('final2/latlongcropscover.csv')

# GRASS COVER

In [7]:
#GRASS COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('grass-coverfraction')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "grass.cover"] = coper.mean().sample(buffer, 100).first().get('grass-coverfraction').getInfo()

In [8]:
#take the average for each species of sandfly
latlonggrasscover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlonggrasscover.to_csv('final2/latlonggrasscover.csv')

# SHRUB COVER

In [9]:
#SHRUB COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('shrub-coverfraction')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "shrub.cover"] = coper.mean().sample(buffer, 100).first().get('shrub-coverfraction').getInfo()

In [10]:
#take the average for each species of sandfly
latlongshrubcover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongshrubcover.to_csv('final2/latlongshrubcover.csv')

# URBAN COVER

In [11]:
#URBAN COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('urban-coverfraction')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "urban.cover"] = coper.mean().sample(buffer, 100).first().get('urban-coverfraction').getInfo()

In [12]:
#take the average for each species of sandfly
latlongurbancover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongurbancover.to_csv('final2/latlongurbancover.csv')

# WATER PERMANENET COVER

In [13]:
#WATER PERMANENT COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('water-permanent-coverfraction')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "water.permanent.cover"] = coper.mean().sample(buffer, 100).first().get('water-permanent-coverfraction').getInfo()

In [14]:
#take the average for each species of sandfly
latlongwaterpermcover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongwaterpermcover.to_csv('final2/latlongwaterpermcover.csv')

# WATER SEASONAL COVER

In [15]:
#WATER SEASONAL COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
coper = ee.ImageCollection("COPERNICUS/Landcover/100m/Proba-V-C3/Global").select('water-seasonal-coverfraction')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if coper.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "water.seasonal.cover"] = coper.mean().sample(buffer, 100).first().get('water-seasonal-coverfraction').getInfo()

In [16]:
#take the average for each species of sandfly
latlongwaterseascover = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongwaterseascover.to_csv('final2/latlongwaterseascover.csv')

# ENHANCED VEGETATION INDEX

In [18]:
#WATER SEASONAL COVER
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])
vegetation = ee.ImageCollection("MODIS/006/MOD13A2").filterDate('2009-01-01', '2019-01-01').select('EVI')

#create a for loop that gets the data for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    buffer = point.buffer(distance= 1000)
    #put value for each latlong in the latlong dataframe
    if vegetation.mean().sample(buffer, 100).first().getInfo() != None:
        latlong.loc[x, "evi"] = vegetation.mean().sample(point, 1).first().get('EVI').getInfo()

In [19]:
#take the average for each species of sandfly
latlongevi = latlong.groupby("scientificName").mean()
#export the csv to your folder(might have to directly put the folder location)
latlongevi.to_csv('final2/latlongevi.csv')

# ECOREGIONS AND BIOMES

In [22]:
#ECOREGIONS AND BIOMES
latlong = pd.read_csv('ACL data/latlong.csv')
latlong = latlong.drop_duplicates().reset_index().drop(columns=['index'])

#read in the feature collection of ecoregions
ecoregions = ee.FeatureCollection("RESOLVE/ECOREGIONS/2017")

#convert the ecoregions feature collection into an image that contains the eco id numbers
ecoimage = ee.Image().paint(ecoregions, 'ECO_ID')
bioimage = ee.Image().paint(ecoregions, 'BIOME_NUM')

#tester point, can ignore
point = ee.Geometry.Point(-88.2569, 18.5672)


#create a for loop that gets the eco id for each lat long point
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    #get the eco id and put it in the dataframe under the column "eco id"
    if ecoimage.sample(point, 1).first().getInfo() != None:
        latlong.loc[x, "eco id"] = ecoimage.sample(point, 1).first().get('constant').getInfo()
    #get the bio me id and put it in the dataframe under the column "bio id"
    if bioimage.sample(point, 1).first().getInfo() != None:
        latlong.loc[x, "bio id"] = bioimage.sample(point, 1).first().get('constant').getInfo()



In [None]:
#specific code for getting the biome number from the feature collection
#not necessary to run, as it's incorporated into the previous cell's for loop
ecoimage = ee.Image().paint(ecoregions, 'BIOME_NUM')
for x in range(len(latlong)):
    #create a point in the earth engine api: check the documentation if you have any issues with this
    point = ee.Geometry.Point(latlong['decimalLongitude'][x], latlong['decimalLatitude'][x])
    #get the eco id and put it in the dataframe under the column "eco id"
    if ecoimage.sample(point, 1).first().getInfo() != None:
        latlong.loc[x, "bio id"] = ecoimage.sample(point, 1).first().get('constant').getInfo()


In [23]:
#fill in which biomes and ecoregions correspond to which eco and bio id
index = pd.read_csv('biome_eco_index.csv')
#match them for each lat long point
for i in range(len(latlong)):
    for j in range(len(index)):
        if latlong.loc[i, 'eco id'] == index.loc[j, 'ecoid']:
            latlong.loc[i, 'eco name'] = index.loc[j, 'econame']
        if latlong.loc[i, 'bio id'] == index.loc[j, 'bioid']:
            latlong.loc[i, 'biome name'] = index.loc[j, 'bioname']

In [24]:
#look through the first 60 rows to check
latlong.head(60)
#save as a dataframe
latlong.to_csv('final2/latlongbioeco.csv')

# FINAL

In [52]:
#BIOME BINARY FILL IN
#load in traits and bioeco data
traits = pd.read_csv('final2/vector traits.csv')
bioeco = pd.read_csv('final2/latlongbioeco.csv')
#list of all sandflies
species = traits['species']
#reset the index for easier indexing in the for loop
traits = traits.set_index('species')
#get all the unique biome names and filter out the nans
biome_unique = bioeco['biome name'].unique()
biome_unique = [biome for biome in biome_unique if pd.notnull(biome)]
#get the list of what the biome columns are in the trait csv, these are sorted according to the order in biome_unique
vector_biome_title = ['b.tsmbf', 'b.tsdbf', 'b.tsgss', 'b.mangrove', 'b.fgs', 'b.dxs', 'b.tscf', 'b.tgss', 'b.tcforest', 'b.tbmforest', 'b.montane']
#list of sandfly names that have biome data
names_unique = bioeco['scientificName'].unique()

#fill the cells with 0s for all names in names_unqiue
for x in range(len(names_unique)):
    for y in range(len(traits)):
        if names_unique[x] == species[y]:
            traits.loc[species[y], vector_biome_title] = 0

#fill in the cells with 1s where that biome ocurrs for the sandfly
for i in range(len(bioeco)):
    for j in range(len(biome_unique)):
        if bioeco.loc[i, 'biome name'] == biome_unique[j]:
            traits.loc[bioeco.loc[i, 'scientificName'], vector_biome_title[j]] = 1
            
#one weird outlier that I fill in by hand
traits.loc['Pintomyia (Pifanomyia) suapiensis', vector_biome_title] = 0
traits.loc['Pintomyia (Pifanomyia) suapiensis', 'b.tsmbf'] = 1
traits = traits.drop('Lutzomyia suapiensis')

#export to csv
traits.to_csv('final2/vector traits with biome.csv')            

In [96]:
#PUT THEM ALL TOGETHER
#concatenate the traits from the csvs into one file with all the traits

#load the csvs
temp = pd.read_csv('final2/latlongtemp.csv')
temprange = pd.read_csv('final2/latlongtemprange.csv')
elevation = pd.read_csv('final2/latlongelevation.csv')
rainfall = pd.read_csv('final2/latlongprecip.csv')
wind = pd.read_csv('final2/latlongwind.csv')
flii = pd.read_csv('final2/latlongflii.csv')
tempvar = pd.read_csv('final2/latlongtempvar.csv')
canopy = pd.read_csv('final2/latlongcanopy.csv')
ghm = pd.read_csv('final2/latlongghm.csv')
treecover = pd.read_csv('final2/latlongtreecover.csv')
cropscover = pd.read_csv('final2/latlongcropscover.csv')
grasscover = pd.read_csv('final2/latlonggrasscover.csv')
shrubcover = pd.read_csv('final2/latlongshrubcover.csv')
urbancover = pd.read_csv('final2/latlongurbancover.csv')
waterpermcover = pd.read_csv('final2/latlongwaterpermcover.csv')
waterseascover = pd.read_csv('final2/latlongwaterseascover.csv')
evi = pd.read_csv('final2/latlongevi.csv')


#read the csv with the biome variable filled in
traits = pd.read_csv('final2/vector traits with biome.csv')

#create a new dataframe that has the name, lat, long, and temp
latlong_5 = temp
#then add the traits from the loaded csvs into new columns in latlong_5
#now latlong5 has all the biogeography traits
latlong_5['temp.range'] = temprange['temp.range']
latlong_5['elevation'] = elevation['elevation']
latlong_5['rainfall'] = rainfall['rainfall']
latlong_5['wind.speed'] = wind['wind.speed']
latlong_5['flii'] = flii['flii']
latlong_5['temp.var'] = tempvar['temp.var']
latlong_5['canopy'] = canopy['canopy']
latlong_5['ghm'] = ghm['ghm']
latlong_5['tree.cover'] = treecover['tree.cover']
latlong_5['crops.cover'] = cropscover['crops.cover']
latlong_5['grass.cover'] = grasscover['grass.cover']
latlong_5['shrub.cover'] = shrubcover['shrub.cover']
latlong_5['urban.cover'] = urbancover['urban.cover']
latlong_5['water.perm.cover'] = waterpermcover['water.permanent.cover']
latlong_5['water.seas.cover'] = waterseascover['water.seasonal.cover']
latlong_5['evi'] = evi['evi']


#make an array of the variables
biogeo = ['temp', 'temp.range', 'elevation', 'rainfall', 'wind.speed', 'flii', 'temp.var', 'canopy', 'ghm', 'tree.cover', 'crops.cover', 'grass.cover', 'shrub.cover', 'urban.cover', 'water.perm.cover', 'water.seas.cover', 'evi']

#match the scientific names of the biogeo dataframe with the whole traits dataframe and write in the data into the traits df
for x in range(len(latlong_5)):
    for y in range(len(traits)):
        for z in range(len(biogeo)):
            if latlong_5.loc[x, 'scientificName'] == traits.loc[y, 'species']:
                traits.loc[y, biogeo[z]] = latlong_5.loc[x, biogeo[z]]

#export the dataframe to your computer
traits.to_csv('final2/vector traits with biogeo and biome.csv')

In [97]:
#COUNT ECOREGIONS
bioeco = pd.read_csv('final2/latlongbioeco.csv')
#array of sandfly names that have biome data
names_unique = bioeco['scientificName'].unique()
#turn the array into a dataframe and rename the column
names_df = pd.DataFrame(bioeco['scientificName'].unique())
names_df.columns = ['scientificName']

#create a for loop that counts how many different eco regions each sandfly appears in
for i in range(len(names_unique)):
    names_df.loc[i, 'ecoregion.breadth'] = len(bioeco[bioeco['scientificName'] == names_unique[i]]['eco id'].unique())

#read in the traits csv
traits = pd.read_csv('final2/vector traits with biogeo and biome.csv')
#match the count to the proper name in the traits csv and create a column for 'ecoregion.breadth' in traits for it
for x in range(len(names_df)):
    for y in range(len(traits)):
        if names_df.loc[x, 'scientificName'] == traits.loc[y, 'species']:
            traits.loc[y, 'ecoregion.breadth'] = names_df.loc[x, 'ecoregion.breadth']
            
#export to computer  
traits.to_csv('final2/vector trait data final.csv')