In [1]:
import csv
import pandas as pd
import numpy as np
from vincenty import vincenty
import datetime

# Import data

In [2]:
rough_df = pd.DataFrame.from_csv('MyEBirdData3.csv', index_col=None)

# Modify dataframe

Add location row

In [3]:
rough_df['Location'] = rough_df.apply(lambda row: (row['Latitude'], row['Longitude']), axis=1)

Remove "sp.", slashes, and hybrids

In [4]:
rough_df = rough_df.loc[
    ~(
        rough_df['Scientific Name'].str.contains('\.')
    ) &
    ~(
        (
            rough_df['Scientific Name'].str.contains('/')
        ) & 
        (
            ~rough_df['Common Name'].str.contains('\(') 
            # Required because some subspecific designations (i.e. those with
            # parentheses in the common name) have slashes in the scientific name
            # (and in the common name!)
            # This isn't perfect because of Traill's and Western species groups
            # (and potentially some others), which is taken care of later.
        )
    ) &
    ~(
        rough_df['Common Name'].str.contains('hybrid')
    )
]

Remove subspecific designations

In [5]:
def remove_subspp(string):
    # Common names sometimes have a parenthetical comment
    # about supspecific designation at the end of the 
    # species name. Returns the first part of the name
    # before the open parenthesis (with ' ' stripped off)
    return string.split('(')[0].strip()

rough_df['Common Name'] = rough_df['Common Name'].apply(remove_subspp)

In [6]:
# Remove those pesky species slashes like Pac-Slope/Cordy (Western)
rough_df = rough_df.loc[~rough_df['Common Name'].str.contains('/')]

Check out the common names we came up with

In [39]:
def show_unique(series):
    '''
    Return a list of the unique
    elements in a series
    '''
    
    all_elements = list(series)
    seen = set()
    seen_add = seen.add
    unique_elements = [x for x in all_elements if not (x in seen or seen_add(x))]

    return unique_elements

In [40]:
show_unique(rough_df['Common Name'])

['Black-bellied Whistling-Duck',
 'Fulvous Whistling-Duck',
 'Snow Goose',
 "Ross's Goose",
 'Greater White-fronted Goose',
 'Brant',
 'Cackling Goose',
 'Canada Goose',
 'Mute Swan',
 'Trumpeter Swan',
 'Tundra Swan',
 'Muscovy Duck',
 'Wood Duck',
 'Blue-winged Teal',
 'Cinnamon Teal',
 'Northern Shoveler',
 'Gadwall',
 'American Wigeon',
 'Mallard',
 'American Black Duck',
 'Mottled Duck',
 'Northern Pintail',
 'Green-winged Teal',
 'Canvasback',
 'Redhead',
 'Ring-necked Duck',
 'Greater Scaup',
 'Lesser Scaup',
 'Common Eider',
 'Harlequin Duck',
 'Surf Scoter',
 'White-winged Scoter',
 'Black Scoter',
 'Long-tailed Duck',
 'Bufflehead',
 'Common Goldeneye',
 "Barrow's Goldeneye",
 'Hooded Merganser',
 'Common Merganser',
 'Red-breasted Merganser',
 'Ruddy Duck',
 'Plain Chachalaca',
 'Northern Bobwhite',
 'Scaled Quail',
 'California Quail',
 "Gambel's Quail",
 'Montezuma Quail',
 'Ring-necked Pheasant',
 'Ruffed Grouse',
 'White-tailed Ptarmigan',
 'Dusky Grouse',
 'Sooty Grouse

Select the necessary columns

In [8]:
fine_df = rough_df[['Submission ID', 'Common Name', 'Count', 'Location', 'Date']]

Replace Xs with NaNs

In [9]:
fine_df.reset_index(drop=True, inplace=True) # Drop current index (don't incorporate into DF)
df = fine_df.replace(to_replace='X', value=1)
df['Count'] = pd.to_numeric(df['Count']) #Coerce 1s from strs to numeric
df['Date'] = pd.to_datetime(df['Date']).dt.date # Convert to datetime.date

# Identify points around centers

Create list of centers

In [41]:
# Create a list of all unique points to use for our centers
all_points = show_unique(df['Location'])
centers = all_points

Create a dictionary with keys=centers, values=list of all close points

In [11]:
radius_miles = 5

my_dict = {}
centers = all_points
for center in centers:
    close_points = []
    for point in all_points:
        distance = vincenty(center, point, miles=True)
        if distance < radius_miles:
            close_points.append(point)

    my_dict[center] = close_points

# For each center, create collapsed DF of sightings

Select locations based on list of points close to desired center

In [12]:
center_sightings = df[df['Location'].isin(my_dict[(26.137485899999998, -97.1735573)])]
center_sightings

Unnamed: 0,Submission ID,Common Name,Count,Location,Date
1,S37351021,Black-bellied Whistling-Duck,9,"(26.137485899999998, -97.1735573)",2017-06-03
2,S37374846,Black-bellied Whistling-Duck,4,"(26.140091199999997, -97.1748233)",2017-06-04
3,S37403270,Black-bellied Whistling-Duck,3,"(26.140091199999997, -97.1748233)",2017-06-05
483,S37351021,Blue-winged Teal,1,"(26.137485899999998, -97.1735573)",2017-06-03
857,S37351021,Mottled Duck,11,"(26.137485899999998, -97.1735573)",2017-06-03
858,S37374846,Mottled Duck,9,"(26.140091199999997, -97.1748233)",2017-06-04
859,S37403270,Mottled Duck,2,"(26.140091199999997, -97.1748233)",2017-06-05
955,S17105353,Redhead,2,"(26.0780372, -97.16895509999999)",2014-02-15
956,S17105484,Redhead,3,"(26.0780372, -97.16895509999999)",2014-02-15
957,S17105720,Redhead,3,"(26.0780372, -97.16895509999999)",2014-02-16


Calculate how many days there are between the desired date and the date of a sighting

In [13]:
def interval_without_year(date1, date2):
    '''
    Returns interval of days between two points regardless of year. 
    For instance, January 1, 2019 and January 4, 1999 are considered 
    3 days apart with this method, no matter which one is provided
    as day1 or day2.'''
    
    delta = date1-date2
    way1 = delta.days % 365
    way2 = 365-way1
    
    if way1 < way2: return way1
    else: return way2


In [14]:
desired_date=datetime.date(year=2019,month=6,day=4)
center_sightings['Interval'] = center_sightings['Date'].apply(interval_without_year, date2=desired_date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Select only sightings within the desired date range

In [15]:
interval = 7
selected_sightings = center_sightings.loc[center_sightings['Interval'] <= interval]

# Create score

Find number of checklists total

In [56]:
len(checklists)

12

In [44]:
checklists = show_unique(center_sightings['Submission ID'])

Remove species that are already on life list (important that this comes AFTER the checklist total)

In [47]:
with open('lifelist.csv') as f:
    reader = csv.reader(f)
    for line in reader:
        life_list.append(line[0].lower())

#center_sightings = center_sightings.reset_index() #Make common names searchable
missing_sp = center_sightings[~center_sightings['Common Name'].str.lower().isin(life_list)]

For each species, find the number of checklists it was reported on

In [69]:
missing_sp

Unnamed: 0,Submission ID,Common Name,Count,Location,Date,Interval
483,S37351021,Blue-winged Teal,1,"(26.137485899999998, -97.1735573)",2017-06-03,1
857,S37351021,Mottled Duck,11,"(26.137485899999998, -97.1735573)",2017-06-03,1
858,S37374846,Mottled Duck,9,"(26.140091199999997, -97.1748233)",2017-06-04,0
859,S37403270,Mottled Duck,2,"(26.140091199999997, -97.1748233)",2017-06-05,1
955,S17105353,Redhead,2,"(26.0780372, -97.16895509999999)",2014-02-15,110
956,S17105484,Redhead,3,"(26.0780372, -97.16895509999999)",2014-02-15,110
957,S17105720,Redhead,3,"(26.0780372, -97.16895509999999)",2014-02-16,109
1268,S17105484,Red-breasted Merganser,1,"(26.0780372, -97.16895509999999)",2014-02-15,110
1269,S37374846,Red-breasted Merganser,1,"(26.140091199999997, -97.1748233)",2017-06-04,0
1501,S37351021,Pied-billed Grebe,3,"(26.137485899999998, -97.1735573)",2017-06-03,1


In [65]:
missing_sp['Common Name'].value_counts()

Brown Pelican                 8
Willet                        7
Laughing Gull                 6
Great Blue Heron              5
Reddish Egret                 4
Black-necked Stilt            4
Least Tern                    4
White Ibis                    4
Black-crowned Night-Heron     4
Red-winged Blackbird          4
Snowy Egret                   4
Killdeer                      4
Tricolored Heron              4
Great-tailed Grackle          4
Clapper Rail                  3
Neotropic Cormorant           3
Wilson's Plover               3
White-winged Dove             3
Mottled Duck                  3
Sandwich Tern                 3
Eurasian Collared-Dove        3
Common Gallinule              3
Barn Swallow                  3
Mourning Dove                 3
Osprey                        3
Black Skimmer                 3
Black-bellied Plover          3
Redhead                       3
House Sparrow                 3
Great Egret                   3
                             ..
Common N

In [61]:
sum(missing_sp['Common Name'].value_counts())

172

Make dict of (center, score) : species/percentage dataframe

Sort dict by key[1]

In [16]:
pivoted_df = pivoted_df = selected_sightings.pivot_table(values='Count',
                   columns=['Common Name'],
                   aggfunc=np.sum)

For all centers:
- create this table and combine it:
number of checklists, altamira oriole, american coot, bank swallow, ...,

In [28]:
pivoted_df.set_index('Common Name').T

Common Name,Altamira Oriole,American Coot,Bank Swallow,Barn Swallow,Black Skimmer,Black-bellied Plover,Black-bellied Whistling-Duck,Black-chinned Hummingbird,Black-crowned Night-Heron,Black-necked Stilt,...,Semipalmated Plover,Snowy Egret,Tricolored Heron,White Ibis,White-faced Ibis,White-winged Dove,Willet,Wilson's Plover,Yellow-billed Cuckoo,Yellow-crowned Night-Heron
Count,1,12,1,13,19,1,16,1,3,12,...,20,1,17,19,44,3,27,10,1,1


In [70]:
import collections

In [76]:
my_dict = dict({'center1':1, 'center2':100000, 'center3':5})

In [77]:
collections.OrderedDict(sorted(my_dict.values()))

TypeError: 'int' object is not iterable

# Add statistics about species and center

# Create list of species not on life list

Read species from .csv

In [20]:
import csv
life_list = []

Remove species that are already on life list

Unnamed: 0,Common Name,Count
2,Bank Swallow,1
3,Barn Swallow,13
4,Black Skimmer,19
5,Black-bellied Plover,1
6,Black-bellied Whistling-Duck,16
7,Black-chinned Hummingbird,1
8,Black-crowned Night-Heron,3
9,Black-necked Stilt,12
10,Blue-winged Teal,1
11,Bronzed Cowbird,1


# TODO: add % of time observed for each species, and add # of checklists for each location?