In [61]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [62]:
dists = pd.read_csv("data/atlas_open_data_files/distributions.csv")
dist_changes = pd.read_csv("data/atlas_open_data_files/distribution_changes.csv")
grid_coords = pd.read_csv("data/atlas_open_data_files/grid_square_coordinates_lookup.csv")
pct_bench_species = pd.read_csv("data/atlas_open_data_files/percent_benchmark_species_detected.csv")
species_lookup = pd.read_csv("data/atlas_open_data_files/species_lookup_nocase.csv", encoding="ISO-8859-1")
conservation_status = pd.read_csv("data/conservation/conservation_status.csv")

#### Misc stuff

In [63]:
# The number of atlases in the dataset
dists["period"].unique()

array(['1968-72', '2008-11', '2007/08-10/11', '1980/81-1982/83',
       '1988-91'], dtype=object)

In [64]:
# These are the top 10 birds which were spotted most in the Big Garden 
# Birdwatch (2023) https://www.rspb.org.uk/whats-happening/big-garden-birdwatch
HOUSE_SPARROW = 459
BLUE_TIT = 436
STARLING = 457
WOODPIDGEON = 270
BLACKBIRD = 371
ROBIN = 345
GOLDFINCH = 471
GREAT_TIT = 437
MAGPIE = 450
LONG_TAILED_TIT = 431

# Ideas
- Regression of distribution changes of common garden birds?
- Classifying birds that need special concern?
- To what extent does the presence/movement of passerines affect the movement of birds of prey?
    - Regression where y = bird of prey and x = passerine

- **Feed data in to NN which predicts whether a bird will be on the latest Red List or not**

# Predicting conservation status using citizen science bird atlases

[Classifications are from the BTO birds of conservation concern](https://www.bto.org/our-science/publications/birds-conservation-concern)

### Data preprocessing

- Got atlas from Gillings et al.
- Got conservation status from BTO
- Formatted conservation status with atlas species lookup codes in CSV
- Corrected erroneous records where no species code was filled in caused by difference in bird names between lists

In [65]:
all_species = pd.concat([red_amber, species_lookup[["speccode", "english_name"]]])
all_species.drop_duplicates(subset="english_name", keep="first")
all_species

Unnamed: 0,speccode,english_name,prev_status,status
0,131.0,grey partridge,nc,r
1,171.0,lapwing,nc,r
2,384.0,grasshopper warbler,nc,r
3,126.0,ptarmigan,g,r
4,202.0,whimbrel,nc,r
...,...,...,...,...
460,1583.0,rock/water pipit,,
461,1584.0,white-cheeked turaco,,
462,1586.0,violet turaco,,
463,1603.0,house crow,,


In [67]:
conservation_status[conservation_status["speccode"].isna()]

Unnamed: 0,speccode,english_name,status
56,,redpoll,r
