In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [3]:
dists = pd.read_csv("data/atlas_open_data_files/distributions.csv")
dist_changes = pd.read_csv("data/atlas_open_data_files/distribution_changes.csv")
grid_coords = pd.read_csv("data/atlas_open_data_files/grid_square_coordinates_lookup.csv")
pct_bench_species = pd.read_csv("data/atlas_open_data_files/percent_benchmark_species_detected.csv")
species_lookup = pd.read_csv("data/atlas_open_data_files/species_lookup_nocase.csv", encoding="ISO-8859-1")
conservation_status = pd.read_csv("data/conservation/conservation_status.csv")

#### Misc stuff

In [4]:
# The number of atlases in the dataset
dists["period"].unique()

array(['1968-72', '2008-11', '2007/08-10/11', '1980/81-1982/83',
       '1988-91'], dtype=object)

In [5]:
# These are the top 10 birds which were spotted most in the Big Garden 
# Birdwatch (2023) https://www.rspb.org.uk/whats-happening/big-garden-birdwatch
HOUSE_SPARROW = 459
BLUE_TIT = 436
STARLING = 457
WOODPIDGEON = 270
BLACKBIRD = 371
ROBIN = 345
GOLDFINCH = 471
GREAT_TIT = 437
MAGPIE = 450
LONG_TAILED_TIT = 431

# Ideas
- Regression of distribution changes of common garden birds?
- Classifying birds that need special concern?
- To what extent does the presence/movement of passerines affect the movement of birds of prey?
    - Regression where y = bird of prey and x = passerine

- **Feed data in to NN which predicts whether a bird will be on the latest Red List or not**

# Predicting conservation status using citizen science bird atlases

[Classifications are from the BTO birds of conservation concern](https://www.bto.org/our-science/publications/birds-conservation-concern)

### Data preprocessing

- Got atlas from Gillings et al.
- Got conservation status from BTO
- Only birds which have data in the atlas are included in study
- Formatted conservation status with atlas species lookup codes in CSV
- Corrected erroneous records where no species code was filled in caused by difference in bird names between lists
- The Great Auk went extinct in the 19th century -- there's no data for it in the atlases!
    - This record has been removed from the former breeding birds list
- Removed species aggregates from lookup

In [6]:
conservation_status[conservation_status["speccode"].isna()]

Unnamed: 0,speccode,english_name,status


In [7]:
# Stuff that's in the conservation list that's not in the atlas
merged_df = pd.merge(conservation_status, species_lookup, on=['speccode', 'english_name'], how='outer', indicator=True)

values_only_in_df1 = merged_df.loc[merged_df['_merge'] == 'left_only', ['speccode', 'english_name']]
values_only_in_df1

Unnamed: 0,speccode,english_name
92,107,montagu’s harrier
113,23,leach's storm-petrel
168,49,bean goose
189,320,shorelark
212,410,common whitethroat
265,353,common redstart
290,22,european storm-petrel
295,12,northern fulmar


In [12]:
# Remove species aggregate (removes 6 entries)
species_lookup[species_lookup["taxonomic_rank"] != "species aggregate"]

Unnamed: 0,speccode,english_name,scientific_name,taxonomic_rank
0,1,red-throated diver,gavia stellata,species
1,2,black-throated diver,gavia arctica,species
2,3,great northern diver,gavia immer,species
3,4,white-billed diver,gavia adamsii,species
4,5,pied-billed grebe,podilymbus podiceps,species
...,...,...,...,...
459,1582,grey parrot,psittacus erithacus,species
461,1584,white-cheeked turaco,tauraco leucotis,species
462,1586,violet turaco,musophaga violacea,species
463,1603,house crow,corvus splendens,species
