# Join data GBIF & Human Interference

In [3]:
import pandas as pd
from pygbif import species as species
from pygbif import occurrences as occ
from human_interference import HumanInterference

In [4]:
df_inv = pd.read_csv('../data/invasive_species.csv')
df_inv = df_inv[df_inv.invasiveness == 'Invasive'].copy()
hum_int = HumanInterference()

In [4]:
years = [2021, 2022]
brazil_species = ['Chrysocyon brachyurus', 'Callithrix jacchus',
                  'Euphractus sexcinctus', 'Nasua nasua',
                  'Hydrochoerus hydrochaeris']
uk_species = ['Erinaceus roumanicus', 'Vulpes vulpes',
             'Phoca vitulina', 'Lutra lutra',
             'Branta canadensis']

cols_of_interest = ['key', 'publishingCountry', 'basisOfRecord', 'scientificName',
                    'species', 'genericName', 'iucnRedListCategory',
                    'decimalLongitude', 'decimalLatitude', 'coordinateUncertaintyInMeters',
                    'eventDate', 'recordedBy', 'country', 'countryCode'] 
    
inv_dict = { 'BR': list(set(df_inv[df_inv.countryCode=='BRA'].scientificName.to_list())),
            'GB': list(set(df_inv[df_inv.countryCode=='GBR'].scientificName.to_list())) }

In [5]:
df_brazil = pd.DataFrame()
for year in years:
    for obs in brazil_species:
        temp = pd.DataFrame(occ.search(scientificName = obs, country='BR', limit=300, 
                    basisOfRecord='HUMAN_OBSERVATION',
                    year = year)['results'])
        df_brazil = pd.concat([df_brazil, temp])

df_uk = pd.DataFrame()
for year in years:
    for obs in uk_species:
        temp = pd.DataFrame(occ.search(scientificName = obs, country='GB', limit=300, 
                    basisOfRecord='HUMAN_OBSERVATION',
                    year = year,
                    hasGeospatialIssue= False )['results'])
        df_uk = pd.concat([df_uk, temp])

In [6]:
df_brazil = df_brazil[cols_of_interest].copy()
df_uk = df_uk[cols_of_interest].copy()
df_world = pd.concat([df_brazil,df_uk])
df_world['date'] = df_world['eventDate'].str[:10]
df_world['invasive'] = df_world[['countryCode','scientificName']].\
                    apply(lambda x: True if x['scientificName'] in inv_dict[x['countryCode']] else False
                          , axis=1)

In [8]:
df_world['avg_radiance'] = df_world[['decimalLatitude',
                                     'decimalLongitude',
                                     'date']].\
                                    apply(lambda x: hum_int.get_avg_radiance(x.decimalLatitude,
                                                                        x.decimalLongitude,
                                                                       x.date),
                                         axis=1)

df_world['avg_deg_urban'] = df_world[['decimalLatitude',
                                      'decimalLongitude']].\
                                    apply(lambda x: hum_int.get_avg_deg_urban(x['decimalLatitude'],
                                                                        x['decimalLongitude']),
                                         axis=1)

In [10]:
df_world.to_csv('human_interference_sample.csv', index=False)