In [11]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [12]:
dists = pd.read_csv("data/atlas_open_data_files/distributions.csv")
dist_changes = pd.read_csv("data/atlas_open_data_files/distribution_changes.csv")
grid_coords = pd.read_csv("data/atlas_open_data_files/grid_square_coordinates_lookup.csv")
pct_bench_species = pd.read_csv("data/atlas_open_data_files/percent_benchmark_species_detected.csv")
species_lookup = pd.read_csv("data/atlas_open_data_files/species_lookup_nocase.csv", encoding="ISO-8859-1")
redlist = pd.read_csv("data/conservation/redlist_2021.csv")
amberlist = pd.read_csv("data/conservation/amberlist_2021.csv")

#### Misc stuff

In [13]:
# The number of atlases in the dataset
dists["period"].unique()

array(['1968-72', '2008-11', '2007/08-10/11', '1980/81-1982/83',
       '1988-91'], dtype=object)

In [14]:
# These are the top 10 birds which were spotted most in the Big Garden 
# Birdwatch (2023) https://www.rspb.org.uk/whats-happening/big-garden-birdwatch
HOUSE_SPARROW = 459
BLUE_TIT = 436
STARLING = 457
WOODPIDGEON = 270
BLACKBIRD = 371
ROBIN = 345
GOLDFINCH = 471
GREAT_TIT = 437
MAGPIE = 450
LONG_TAILED_TIT = 431

# Ideas
- Regression of distribution changes of common garden birds?
- Classifying birds that need special concern?
- To what extent does the presence/movement of passerines affect the movement of birds of prey?
    - Regression where y = bird of prey and x = passerine

- **Feed data in to NN which predicts whether a bird will be on the latest Red List or not**

# Predicting conservation status using citizen science bird atlases

[Classifications are from the BTO birds of conservation concern](https://www.bto.org/our-science/publications/birds-conservation-concern)

In [15]:
redlist

Unnamed: 0,speccode,english_name,prev_status
0,131.0,grey partridge,nc
1,171.0,lapwing,nc
2,384.0,grasshopper warbler,nc
3,126.0,ptarmigan,g
4,202.0,whimbrel,nc
...,...,...,...
65,514.0,yellowhammer,nc
66,166.0,dotterel,nc
67,392.0,marsh warbler,nc
68,160.0,ringed plover,nc


In [16]:
amberlist

Unnamed: 0,speccode,english_name,prev_status
0,132.0,quail,nc
1,155.0,stone-curlew,nc
2,285.0,tawny owl,nc
3,48.0,whooper swan,nc
4,152.0,oystercatcher,nc
...,...,...,...
98,524.0,reed bunting,nc
99,38.0,great white egret,na
100,260.0,guillemot,nc
101,25.0,gannet,nc


In [19]:
# Join red and amber lists to form conservation status red & amber (green later)

redlist_prime = redlist
redlist_prime["status"] = "r"

amberlist_prime = amberlist
amberlist_prime["status"] = "a"

In [20]:
red_amber = pd.concat([redlist_prime, amberlist_prime])
red_amber

Unnamed: 0,speccode,english_name,prev_status,status
0,131.0,grey partridge,nc,r
1,171.0,lapwing,nc,r
2,384.0,grasshopper warbler,nc,r
3,126.0,ptarmigan,g,r
4,202.0,whimbrel,nc,r
...,...,...,...,...
98,524.0,reed bunting,nc,a
99,38.0,great white egret,na,a
100,260.0,guillemot,nc,a
101,25.0,gannet,nc,a


In [26]:
all_species = pd.concat([red_amber, species_lookup[["speccode", "english_name"]]])
all_species.drop_duplicates(subset="english_name", keep="first")
all_species

Unnamed: 0,speccode,english_name,prev_status,status
0,131.0,grey partridge,nc,r
1,171.0,lapwing,nc,r
2,384.0,grasshopper warbler,nc,r
3,126.0,ptarmigan,g,r
4,202.0,whimbrel,nc,r
...,...,...,...,...
460,1583.0,rock/water pipit,,
461,1584.0,white-cheeked turaco,,
462,1586.0,violet turaco,,
463,1603.0,house crow,,


In [28]:
all_species_p = all_species
all_species_p["status"] = all_species_p["status"].fillna(value="g")
all_species_p[["speccode", "english_name", "status"]]

Unnamed: 0,speccode,english_name,status
0,131.0,grey partridge,r
1,171.0,lapwing,r
2,384.0,grasshopper warbler,r
3,126.0,ptarmigan,r
4,202.0,whimbrel,r
...,...,...,...
460,1583.0,rock/water pipit,g
461,1584.0,white-cheeked turaco,g
462,1586.0,violet turaco,g
463,1603.0,house crow,g


In [30]:
all_species_p.to_csv("data/conservation/conservation_status.csv", index=False)