# Join data GBIF & Human Interference

In [1]:
import pandas as pd
import numpy as np
import os
from pygbif import species as species
from pygbif import occurrences as occ
from human_interference import HumanInterference

In [2]:
df_inv = pd.read_csv('../data/invasive_species.csv')
df_inv = df_inv[df_inv.invasiveness == 'Invasive'].copy()
hum_int = HumanInterference()

In [3]:
years = [2021, 2022]
brazil_species = ['Chrysocyon brachyurus', 'Callithrix jacchus',
                  'Euphractus sexcinctus', 'Nasua nasua',
                  'Hydrochoerus hydrochaeris']
uk_species = ['Erinaceus roumanicus', 'Vulpes vulpes',
             'Phoca vitulina', 'Lutra lutra',
             'Branta canadensis']

cols_of_interest = ['key', 'publishingCountry', 'basisOfRecord', 'scientificName',
                    'species', 'genericName', 'iucnRedListCategory',
                    'decimalLongitude', 'decimalLatitude', 'coordinateUncertaintyInMeters',
                    'eventDate', 'recordedBy', 'country', 'countryCode'] 
    
inv_dict = { 'BR': list(set(df_inv[df_inv.countryCode=='BRA'].scientificName.to_list())),
            'GB': list(set(df_inv[df_inv.countryCode=='GBR'].scientificName.to_list())) }

In [4]:
df_brazil = pd.DataFrame()
for year in years:
    for obs in brazil_species:
        temp = pd.DataFrame(occ.search(scientificName = obs, country='BR', limit=300, 
                    basisOfRecord='HUMAN_OBSERVATION',
                    year = year)['results'])
        df_brazil = pd.concat([df_brazil, temp])

df_uk = pd.DataFrame()
for year in years:
    for obs in uk_species:
        temp = pd.DataFrame(occ.search(scientificName = obs, country='GB', limit=300, 
                    basisOfRecord='HUMAN_OBSERVATION',
                    year = year,
                    hasGeospatialIssue= False )['results'])
        df_uk = pd.concat([df_uk, temp])

In [5]:
df_brazil = df_brazil[cols_of_interest].copy()
df_uk = df_uk[cols_of_interest].copy()
df_world = pd.concat([df_brazil,df_uk])
df_world['date'] = df_world['eventDate'].str[:10]
df_world['invasive'] = df_world[['countryCode','scientificName']].\
                    apply(lambda x: True if x['scientificName'] in inv_dict[x['countryCode']] else False
                          , axis=1)

In [6]:
df_world

Unnamed: 0,key,publishingCountry,basisOfRecord,scientificName,species,genericName,iucnRedListCategory,decimalLongitude,decimalLatitude,coordinateUncertaintyInMeters,eventDate,recordedBy,country,countryCode,date,invasive
0,3031987049,US,HUMAN_OBSERVATION,"Chrysocyon brachyurus (Illiger, 1815)",Chrysocyon brachyurus,Chrysocyon,NT,-47.491164,-22.258903,30298.0,2021-01-13T05:07:00,registroscomunitariosderioclarosp,Brazil,BR,2021-01-13,False
1,3307456876,US,HUMAN_OBSERVATION,"Chrysocyon brachyurus (Illiger, 1815)",Chrysocyon brachyurus,Chrysocyon,NT,-43.628159,-17.983999,587863.0,2021-06-17T13:21:01,Enrico A. R. Tosto,Brazil,BR,2021-06-17,False
2,3328022769,US,HUMAN_OBSERVATION,"Chrysocyon brachyurus (Illiger, 1815)",Chrysocyon brachyurus,Chrysocyon,NT,-47.509019,-22.525994,30278.0,2021-07-09T17:33:00,mariosoares,Brazil,BR,2021-07-09,False
3,3335201455,US,HUMAN_OBSERVATION,"Chrysocyon brachyurus (Illiger, 1815)",Chrysocyon brachyurus,Chrysocyon,NT,-46.934391,-21.306569,30396.0,2021-07-18T06:34:00,CRISTINA RAPPA,Brazil,BR,2021-07-18,False
4,3773393229,US,HUMAN_OBSERVATION,"Chrysocyon brachyurus (Illiger, 1815)",Chrysocyon brachyurus,Chrysocyon,NT,-46.874809,-21.219875,30396.0,2021-07-18T19:34:00,ademircarosia,Brazil,BR,2021-07-18,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,3759867011,US,HUMAN_OBSERVATION,"Branta canadensis (Linnaeus, 1758)",Branta canadensis,Branta,LC,-1.973953,52.640345,,2022-03-26T13:32:00,toad64,United Kingdom of Great Britain and Northern I...,GB,2022-03-26,True
296,3759869101,GB,HUMAN_OBSERVATION,"Branta canadensis (Linnaeus, 1758)",Branta canadensis,Branta,LC,-2.363292,53.547544,,2022-03-27T12:22:18,u45d,United Kingdom of Great Britain and Northern I...,GB,2022-03-27,True
297,3759881281,GB,HUMAN_OBSERVATION,"Branta canadensis (Linnaeus, 1758)",Branta canadensis,Branta,LC,-5.126466,50.157655,213.0,2022-03-28T15:20:07,duckindisguise,United Kingdom of Great Britain and Northern I...,GB,2022-03-28,True
298,3759925409,US,HUMAN_OBSERVATION,"Branta canadensis (Linnaeus, 1758)",Branta canadensis,Branta,LC,-1.613479,53.194290,152.0,2022-03-29T12:27:00,Trevor Marshall,United Kingdom of Great Britain and Northern I...,GB,2022-03-29,True


In [8]:
df_world['avg_radiance'] = df_world[['decimalLatitude',
                                     'decimalLongitude',
                                     'date']].\
                                    apply(lambda x: hum_int.get_avg_radiance(x.decimalLatitude,
                                                                        x.decimalLongitude,
                                                                       x.date),
                                         axis=1)

df_world['avg_deg_urban'] = df_world[['decimalLatitude',
                                      'decimalLongitude']].\
                                    apply(lambda x: hum_int.get_avg_deg_urban(x['decimalLatitude'],
                                                                        x['decimalLongitude']),
                                         axis=1)

In [10]:
df_world.to_csv('human_interference_sample.csv', index=False)

## Load from consolidated UK Brazil on bigquery

In [25]:
%load_ext google.cloud.bigquery
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../molten-kit-354506-57defdcd41ff.json'

The google.cloud.bigquery extension is already loaded. To reload it, use:
  %reload_ext google.cloud.bigquery


In [32]:
%%bigquery df_uk_occ
SELECT
    *
FROM `molten-kit-354506.sample_gbif_climate.consolidated_UK_2022`

Query complete after 0.00s: 100%|█████████████| 1/1 [00:00<00:00, 176.87query/s]
Downloading: 100%|██████████████████████| 2511/2511 [00:01<00:00, 2033.01rows/s]


In [33]:
%%bigquery df_br_occ
SELECT
    *
FROM `molten-kit-354506.sample_gbif_climate.consolidated_BZ_2022`

Query complete after 0.00s: 100%|█████████████| 1/1 [00:00<00:00, 460.15query/s]
Downloading: 100%|█████████████████████████| 198/198 [00:01<00:00, 183.37rows/s]


In [34]:
df_uk_occ

Unnamed: 0,closest_station,species,speciesKey,scientificName,decimalLatitude,decimalLongitude,coordinateUncertaintyInMeters,countryCode,year,month,...,station_lat_deg,station_lon_deg,altitude,mean_air_temp_C,mean_dew_point,wind_speed_mps,wind_direction_deg,sea_level_pressure_hPa,visibility_m,last_temp_recorded_days
0,Rhyl,Branta canadensis,5232437,"Branta canadensis (Linnaeus, 1758)",53.278920,-3.805963,190,GB,2022,3,...,53.25,-3.50,76.0,6.73157894736842,3.23157894736842,4.53684210526316,218.421052631579,1020.55263157895,37789.4736842105,0
1,Rhyl,Branta canadensis,5232437,"Branta canadensis (Linnaeus, 1758)",53.278617,-3.803989,2,GB,2022,3,...,53.25,-3.50,76.0,9.60434782608696,7.02608695652174,1.89130434782609,231.739130434783,1022.60869565217,,0
2,Culdrose,Branta canadensis,5232437,"Branta canadensis (Linnaeus, 1758)",50.157655,-5.126466,213,GB,2022,3,...,50.08,-5.25,84.0,9.58888888888889,8.7,7.56111111111111,70.5555555555556,1018.58333333333,2911.11111111111,0
3,Cranwell,Branta canadensis,5232437,"Branta canadensis (Linnaeus, 1758)",52.650413,-0.696958,70.7,GB,2022,3,...,53.03,-0.50,62.0,7.49166666666667,2.75833333333333,5.27916666666667,239.166666666667,1021.225,47833.4444444444,0
4,Cranwell,Branta canadensis,5232437,"Branta canadensis (Linnaeus, 1758)",52.643223,-0.697172,70.7,GB,2022,3,...,53.03,-0.50,62.0,7.49166666666667,2.75833333333333,5.27916666666667,239.166666666667,1021.225,47833.4444444444,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,Edinburgh Gogarbank,Phoca vitulina,2434793,"Phoca vitulina Linnaeus, 1758",55.977520,-3.146340,50,GB,2022,5,...,55.93,-3.35,57.0,9.54583333333333,7.2625,2.1,138.333333333333,1020.49166666667,18000,0
2507,Saint Bees Head,Phoca vitulina,2434793,"Phoca vitulina Linnaeus, 1758",54.400000,-3.400000,7071.1,GB,2022,3,...,54.52,-3.60,123.0,5.12083333333333,0.2875,12.525,143.75,1009.45,13379.1666666667,0
2508,Tulloch Bridge,Phoca vitulina,2434793,"Phoca vitulina Linnaeus, 1758",56.416857,-5.474380,7,GB,2022,1,...,56.87,-4.70,249.0,-0.3375,-1.02083333333333,1.72916666666667,252.083333333333,1000.70416666667,22266.6666666667,0
2509,Saint Catherine's Point,Phoca vitulina,2434793,"Phoca vitulina Linnaeus, 1758",50.752862,-1.363461,7071.1,GB,2022,2,...,50.58,-1.30,24.0,7.8625,5.4875,6.14583333333333,275.833333333333,1026.9375,,0


In [41]:
df_uk_occ['date'] = df_uk_occ['eventDate'].astype(str).str[:10]
df_uk_occ['invasive'] = df_uk_occ[['countryCode','scientificName']].\
                    apply(lambda x: True if x['scientificName'] in inv_dict[x['countryCode']] else False
                          , axis=1)

df_uk_occ['avg_radiance'] = df_uk_occ[['decimalLatitude',
                                     'decimalLongitude',
                                     'date']].\
                                    apply(lambda x: hum_int.get_avg_radiance(x.decimalLatitude,
                                                                        x.decimalLongitude,
                                                                       x.date),
                                         axis=1)

df_uk_occ['avg_deg_urban'] = df_uk_occ[['decimalLatitude',
                                      'decimalLongitude']].\
                                    apply(lambda x: hum_int.get_avg_deg_urban(x['decimalLatitude'],
                                                                        x['decimalLongitude']),
                                         axis=1)
df_uk_occ = df_uk_occ.drop('date', axis=1)

In [38]:
df_br_occ['date'] = df_br_occ['eventDate'].astype(str).str[:10]
df_br_occ['invasive'] = df_br_occ[['countryCode','scientificName']].\
                    apply(lambda x: True if x['scientificName'] in inv_dict[x['countryCode']] else False
                          , axis=1)

df_br_occ['avg_radiance'] = df_br_occ[['decimalLatitude',
                                     'decimalLongitude',
                                     'date']].\
                                    apply(lambda x: hum_int.get_avg_radiance(x.decimalLatitude,
                                                                        x.decimalLongitude,
                                                                       x.date),
                                         axis=1)

df_br_occ['avg_deg_urban'] = df_br_occ[['decimalLatitude',
                                      'decimalLongitude']].\
                                    apply(lambda x: hum_int.get_avg_deg_urban(x['decimalLatitude'],
                                                                        x['decimalLongitude']),
                                         axis=1)
df_br_occ = df_br_occ.drop('date', axis=1)

In [43]:
cols = ['scientificName', 'decimalLatitude', 'decimalLongitude', 
       'countryCode', 'eventDate', 'invasive', 'avg_radiance', 'avg_deg_urban']

In [44]:
df_br_occ[cols].to_csv('gbif_human_BR_2022.csv', index=False)

In [45]:
df_uk_occ[cols].to_csv('gbif_human_UK_2022.csv', index=False)