Find and print the list species seen only by one birder.

Before running this, the following must be true:
* `update_list.ipynb` has been run
* All `.csvs` in `csv_dir_processed` are labeled with a space-separated hyphen before the birder's name, e.g. `ebird_life_list - Tessa Rhinehart.csv`

In [None]:
import pandas as pd
import os

In [None]:
# Where the processed lists are stored
csv_dir_processed = 'lists_processed/'

# Where the completed seen_list is stored
current_list = 'seen_list.csv'
name_of_seen_column = 'number of people seeing species'

Collect a list of species seen by every person, and cross-reference against the unique species list

In [None]:
all_sightings = pd.read_csv(current_list)

In [None]:
csvs_to_examine = [
    os.path.join(csv_dir_processed, file)
                 for file in os.listdir(csv_dir_processed)
                 if file.endswith('.csv')]
                 
all_data = pd.DataFrame({'scientific name'})

print(f'Total number of birders: {len(csvs_to_examine)}')

In [None]:
# Get total number of species seen
been_sighted = all_sightings[all_sightings[name_of_seen_column] > 0]
been_sighted.shape[0]
print(f'Total size of cumulative list: {been_sighted.shape[0]}')

# Get list of species seen only by one contributor
single_sightings = all_sightings[all_sightings[name_of_seen_column] == 1]
single_sightings.shape[0]
print(f'Total number of unique contributions: {single_sightings.shape[0]}')

In [None]:
print()
print()
individual_uniques = {} # A dictionary for each person's unique sightings
for csv in csvs_to_examine:
    birder = csv.split(' - ')[-1].split('.')[0]
    print('Birder:', birder)
    
    their_data = pd.read_csv(csv)
    their_species = their_data.Species.values
    first_species = their_species[0]
    
    name_type = None
    names = None
    # If 'Species' column of .csv is in Common name - species name format
    if ' - ' in first_species:
        name_type = 'scientific'
        names = [name.split(' - ')[-1] for name in their_species]
      
    # If 'Species' column of .csv is in English name only format
    elif first_species in all_sightings['English name'].values:
        name_type = 'common'
        names = list(their_species)
            
    # If 'Species' column of .csv is in scientific name only format
    elif first_species in all_sightings['scientific name'].values:
        name_type = 'scientific'
        names = list(their_species)
    
    uniques = []
    # Find all species in this birder's sightings that are in the single_sightings DF
    if name_type == 'scientific':
        print('Using scientific')
        for species in single_sightings['scientific name']:
            if species in names:
                uniques.append(single_sightings[single_sightings['scientific name']==species].values)
    else: #name_type == common
        print('Using common')
        for species in single_sightings['English name']:
            if species in names:
                uniques.append(single_sightings[single_sightings['English name']==species].values)

    # Ascertain if this person has contributed the most unique species so far
    individual_uniques[birder] = len(uniques)
        
    print(f'Species seen only by this birder: {len(uniques)}')
    for sighting in uniques:
        print(f'{sighting[0][1]} ({sighting[0][0]})')
        
    print()
    print()

In [None]:
individual_uniques