In [104]:
!pip install --upgrade gspread

from bs4 import BeautifulSoup
import requests
import pandas as pd
import gspread
from google.colab import auth

#First Authenticate google colab
auth.authenticate_user()

#Authenticate google sheet
from google.auth import default
creds, _ = default()

gs = gspread.authorize(creds)

#Get google sheet
sh = gs.open("Dados compilados sobre aves catalogadas no Brasil")
sh_regions = sh.worksheet("Regiões com espécies em comum com o Brasil")
sh_birds = sh.worksheet("Espécies de aves no Brasil")
sh_birds_states = sh.worksheet("Espécies de aves por estado do Brasil")
sh_brazil_states = sh.worksheet("Estados")

Collecting gspread
  Downloading gspread-6.0.2-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.9/53.9 kB[0m [31m968.1 kB/s[0m eta [36m0:00:00[0m
Collecting StrEnum==0.4.15 (from gspread)
  Downloading StrEnum-0.4.15-py3-none-any.whl (8.9 kB)
Installing collected packages: StrEnum, gspread
  Attempting uninstall: gspread
    Found existing installation: gspread 3.4.2
    Uninstalling gspread-3.4.2:
      Successfully uninstalled gspread-3.4.2
Successfully installed StrEnum-0.4.15 gspread-6.0.2


# **Regiões do mundo com espécies em comum com o Brasil**

In [105]:
#Extract html
page_regions = requests.get('https://avibase.bsc-eoc.org/compare_regions.jsp?region=BR&list=clements')
bs = BeautifulSoup(page_regions.text, 'html.parser')

In [106]:
#Transform table into list
tb_regions = bs.find(attrs={'id':'compareRegions'}).find_all('tbody')[0].find_all('tr')
tb_regions = [ [td.string for td in tr.find_all('td')] for tr in tb_regions]

tb_regions_headers = bs.find(attrs={'id':'compareRegions'}).find('thead').find('tr').find_all('th')
tb_regions_headers = [ x.string for x in tb_regions_headers ]

tb_regions = [tb_regions_headers] + tb_regions

In [120]:
#Filter columns with pandas
df = pd.DataFrame(tb_regions[1:], columns=tb_regions[0])

df = df[['Region2','Shared','% shared/region1','%shared/region2']]

df[['Shared','% shared/region1','%shared/region2']] = df[['Shared','% shared/region1','%shared/region2']].apply(pd.to_numeric)

df = df.rename(columns={
                        'Region2': 'Região',
                        'Shared': 'Aves em comum',
                        '% shared/region1': '% Aves em comum/Total do Brasil',
                        '%shared/region2': '% Aves em comum/Total da Região'
                        })

data = [df.columns.values.tolist()] + df.fillna(-1).values.tolist()

In [121]:
# Calculate data dimension
num_lines = len(data)
num_cols = len(data[0])

# Calculate sheet interval
range_sheet = f'A1:{chr(ord("A") + num_cols - 1)}{num_lines}'

# Update Google sheet
sh_regions.clear()
sh_regions.update(range_sheet, data)

{'spreadsheetId': '1jkUNttrcJa_U75cVSiNhHXGSfWZbFmdK8E5CcJb7XMM',
 'updatedRange': "'Regiões com espécies em comum com o Brasil'!A1:D243",
 'updatedRows': 243,
 'updatedColumns': 4,
 'updatedCells': 972}

# **Espécies de aves por estados do Brasil**

In [117]:
states = sh_brazil_states.col_values(1)[1:] # codes of states
states_codes = sh_brazil_states.col_values(4)[1:]

birds = [['Nome científico', 'Nome popular', 'Família', 'Situação', 'Estado']]

for i, state in enumerate(states):
  # Extract html of each state
  page = requests.get(f"https://avibase.bsc-eoc.org/checklist.jsp?region=BR{states_codes[i]}")
  bs = BeautifulSoup(page.text, 'html.parser')

  # transform table into lists and append to birds
  if bs.find('table'):
    tb = bs.find('table').find_all('tr')

    family = ''
    for row in tb:
      if row.has_attr('valign'):
        family = row.find("td").get_text().strip()
      else:
        row_content = [ col.get_text().strip() for col in row.find_all('td') ]
        row_content.insert(2,family)
        row_content.append(state)
        birds.append(row_content)

In [118]:
# Filter columns with pandas
df = pd.DataFrame(birds[1:], columns=birds[0])

df_birds = df[['Nome científico', 'Nome popular', 'Família']].drop_duplicates()
df_birds_states = df[['Nome científico','Estado','Situação']].drop_duplicates()

df_birds_states.Estado = df_birds_states.Estado

data_birds = [df_birds.columns.values.tolist()] + df_birds.fillna(-1).values.tolist()
data_birds_states = [df_birds_states.columns.values.tolist()] + df_birds_states.fillna(-1).values.tolist()

In [119]:
# Calculate data dimension
num_lines = len(data_birds)
num_cols = len(data_birds[0])

# Calculate sheet interval
range_sheet = f'A1:{chr(ord("A") + num_cols - 1)}{num_lines}'

# Update Google sheet
sh_birds.clear()
sh_birds.update(range_sheet, data_birds)


# Calculate data dimension
num_lines = len(data_birds_states)
num_cols = len(data_birds_states[0])

# Calculate sheet interval
range_sheet = f'A1:{chr(ord("A") + num_cols - 1)}{num_lines}'

# Update Google sheet
sh_birds_states.clear()
sh_birds_states.update(range_sheet, data_birds_states)

{'spreadsheetId': '1jkUNttrcJa_U75cVSiNhHXGSfWZbFmdK8E5CcJb7XMM',
 'updatedRange': "'Espécies de aves por estado do Brasil'!A1:C22366",
 'updatedRows': 22366,
 'updatedColumns': 3,
 'updatedCells': 67098}