In [1]:
import csv
import pandas as pd
import numpy as np
from vincenty import vincenty

# Import our data

get useful info from ebird csv

In [2]:
rough_df = pd.DataFrame.from_csv('MyEBirdData3.csv', index_col=None)
rough_df['Location'] = rough_df.apply(lambda row: (row['Latitude'], row['Longitude']), axis=1)
fine_df = rough_df[['Submission ID', 'Common Name', 'Count', 'Location', 'Date']]
fine_df

Unnamed: 0,Submission ID,Common Name,Count,Location,Date
0,S37335810,Black-bellied Whistling-Duck,3,"(26.022082, -97.510919)",06-02-2017
1,S37351021,Black-bellied Whistling-Duck,9,"(26.137485899999998, -97.1735573)",06-03-2017
2,S37374846,Black-bellied Whistling-Duck,4,"(26.140091199999997, -97.1748233)",06-04-2017
3,S37403270,Black-bellied Whistling-Duck,3,"(26.140091199999997, -97.1748233)",06-05-2017
4,S32402100,Black-bellied Whistling-Duck,8,"(26.187509, -97.689038)",11-04-2016
5,S39452117,Black-bellied Whistling-Duck,5,"(30.409033, -91.1761025)",09-29-2017
6,S39205747,Black-bellied Whistling-Duck,17,"(30.408501, -91.172635)",09-16-2017
7,S39436325,Black-bellied Whistling-Duck,16,"(30.408501, -91.172635)",09-28-2017
8,S40010884,Black-bellied Whistling-Duck,2,"(30.408501, -91.172635)",10-19-2017
9,S32382837,Black-bellied Whistling-Duck,52,"(26.126030399999998, -97.9559173)",11-05-2016


# Modify dataframe

## Small test

### Create a small test DF

In [3]:
small_df = fine_df.loc[[0, 1, 25, 500, 501]]
small_df.reset_index(drop=True, inplace=True) # Drop current index (don't incorporate into DF)
small_df

Unnamed: 0,Submission ID,Common Name,Count,Location,Date
0,S37335810,Black-bellied Whistling-Duck,3,"(26.022082, -97.510919)",06-02-2017
1,S37351021,Black-bellied Whistling-Duck,9,"(26.137485899999998, -97.1735573)",06-03-2017
2,S33643267,Snow Goose,8000,"(39.25133710000001, -94.4367209)",01-13-2017
3,S18697501,Blue-winged Teal,X,"(40.068893, -95.23534000000001)",05-17-2014
4,S22478416,Blue-winged Teal,200,"(40.068893, -95.23534000000001)",03-22-2015


### Replace "X"s with NaNs

In [31]:
no_x_df = small_df.replace(to_replace='X', value=1)
no_x_df['Count'] = pd.to_numeric(no_x_df['Count']) #Coerce 1s from strs to numeric
no_x_df

Unnamed: 0,Submission ID,Common Name,Count,Location,Date
0,S37335810,Black-bellied Whistling-Duck,3,"(26.022082, -97.510919)",06-02-2017
1,S37351021,Black-bellied Whistling-Duck,9,"(26.137485899999998, -97.1735573)",06-03-2017
2,S33643267,Snow Goose,8000,"(39.25133710000001, -94.4367209)",01-13-2017
3,S18697501,Blue-winged Teal,1,"(40.068893, -95.23534000000001)",05-17-2014
4,S22478416,Blue-winged Teal,200,"(40.068893, -95.23534000000001)",03-22-2015


## Full-scale

In [5]:
fine_df.reset_index(drop=True, inplace=True) # Drop current index (don't incorporate into DF)
fine_df = fine_df.replace(to_replace='X', value=1)
fine_df['Count'] = pd.to_numeric(fine_df['Count']) #Coerce 1s from strs to numeric
fine_df

Unnamed: 0,Submission ID,Common Name,Count,Location,Date
0,S37335810,Black-bellied Whistling-Duck,3,"(26.022082, -97.510919)",06-02-2017
1,S37351021,Black-bellied Whistling-Duck,9,"(26.137485899999998, -97.1735573)",06-03-2017
2,S37374846,Black-bellied Whistling-Duck,4,"(26.140091199999997, -97.1748233)",06-04-2017
3,S37403270,Black-bellied Whistling-Duck,3,"(26.140091199999997, -97.1748233)",06-05-2017
4,S32402100,Black-bellied Whistling-Duck,8,"(26.187509, -97.689038)",11-04-2016
5,S39452117,Black-bellied Whistling-Duck,5,"(30.409033, -91.1761025)",09-29-2017
6,S39205747,Black-bellied Whistling-Duck,17,"(30.408501, -91.172635)",09-16-2017
7,S39436325,Black-bellied Whistling-Duck,16,"(30.408501, -91.172635)",09-28-2017
8,S40010884,Black-bellied Whistling-Duck,2,"(30.408501, -91.172635)",10-19-2017
9,S32382837,Black-bellied Whistling-Duck,52,"(26.126030399999998, -97.9559173)",11-05-2016


# Create a pivot table

## Small example

In [6]:
pivoted_small = no_x_df.pivot_table(values='Count',
                   columns=['Common Name', 'Location', 'Date'],
                   aggfunc=np.sum)
pivoted_small

Common Name                   Location                           Date      
Black-bellied Whistling-Duck  (26.022082, -97.510919)            06-02-2017       3
                              (26.137485899999998, -97.1735573)  06-03-2017       9
Blue-winged Teal              (40.068893, -95.23534000000001)    03-22-2015     200
                                                                 05-17-2014       1
Snow Goose                    (39.25133710000001, -94.4367209)   01-13-2017    8000
dtype: int64

## Full scale

In [7]:
pivoted_df = fine_df.pivot_table(values='Count',
                   columns=['Common Name', 'Location', 'Date'],
                   aggfunc=np.sum)
pivoted_df

Common Name                  Location                                   Date      
Abert's Towhee               (31.4724011, -110.9867749)                 08-08-2014     1
                             (31.5080086, -110.80229979999999)          08-08-2014     1
                             (31.528000000000002, -110.775)             08-08-2014     3
                             (31.5390131, -110.7606337)                 08-08-2014     2
                             (31.547899199999996, -110.14089969999999)  08-06-2014     8
                             (31.87029, -109.0351)                      08-02-2014     1
                             (32.1812, -110.932)                        08-09-2014     1
                             (32.2806015, -110.7303009)                 08-01-2014     1
                             (32.283798, -110.738203)                   08-01-2014     1
                             (33.3614502, -111.7339478)                 03-23-2017     7
                           

# Identify points around centers

In [8]:
# Create a list of all unique points to use for our centers
locs = list(fine_df['Location'])
seen = set()
seen_add = seen.add
all_points = [x for x in locs if not (x in seen or seen_add(x))]

## Small-scale test

In [9]:
test_center = all_points[0]
test_center

(26.022082, -97.510919)

In [34]:
radius_miles = 5

my_points = []
for point in all_points:
    distance = vincenty(test_center, point, miles=True)
    if distance < radius_miles:
        my_points.append(point)
        
my_points

[(26.022082, -97.510919),
 (25.9859009, -97.56199649999999),
 (26.071054500000002, -97.4693613)]

## For all points

Create a dictionary with key=center, value=list of all close points

In [19]:
radius_miles = 5

my_dict = {}
centers = all_points
for center in centers:
    close_points = []
    for point in all_points:
        distance = vincenty(center, point, miles=True)
        if distance < radius_miles:
            close_points.append(point)

    my_dict[center] = close_points

[(39.2306038, -94.3923202), (39.25133710000001, -94.4367209), (39.283559600000004, -94.4340563), (39.249902299999995, -94.39508909999999), (39.240491999999996, -94.45496240000001), (39.251194, -94.434709), (39.22660820000001, -94.3878682), (39.2645338, -94.4554871), (39.20927929999999, -94.4452572), (39.23322720000001, -94.3838025), (39.249292700000005, -94.4343324), (39.2308511, -94.38752629999999), (39.23351629999999, -94.38315870000001), (39.245025, -94.451484), (39.244521999999996, -94.486933), (39.2119724, -94.5527458), (39.247688000000004, -94.434405), (39.262057, -94.438191), (39.254134, -94.43710300000001), (39.225928, -94.468834), (39.168200399999996, -94.5050383), (39.263837, -94.445541), (39.251436, -94.441424), (39.251439000000005, -94.43744000000001), (39.2499701, -94.4389767), (39.241769, -94.456953), (39.247762, -94.44601999999999), (39.255282, -94.442065)]


# Create collapsed DF of sightings for each center

## A small example

In [32]:
no_x_df = no_x_df.set_index('Location')
no_x_df

Unnamed: 0_level_0,Submission ID,Common Name,Count,Date
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(26.022082, -97.510919)",S37335810,Black-bellied Whistling-Duck,3,06-02-2017
"(26.137485899999998, -97.1735573)",S37351021,Black-bellied Whistling-Duck,9,06-03-2017
"(39.25133710000001, -94.4367209)",S33643267,Snow Goose,8000,01-13-2017
"(40.068893, -95.23534000000001)",S18697501,Blue-winged Teal,1,05-17-2014
"(40.068893, -95.23534000000001)",S22478416,Blue-winged Teal,200,03-22-2015


In [36]:
close_df = fine_df.loc[my_points]
close_df

Unnamed: 0_level_0,Submission ID,Common Name,Count,Date
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(26.022082, -97.510919)",S37335810,Black-bellied Whistling-Duck,3,06-02-2017
"(26.022082, -97.510919)",S37335810,nighthawk sp.,2,06-02-2017
"(26.022082, -97.510919)",S37335810,Tropical/Couch's Kingbird,1,06-02-2017
"(26.022082, -97.510919)",S37335810,Scissor-tailed Flycatcher,1,06-02-2017
"(25.9859009, -97.56199649999999)",S32400061,Greater White-fronted Goose,8,11-06-2016
"(25.9859009, -97.56199649999999)",S32400061,Blue-winged Teal,6,11-06-2016
"(25.9859009, -97.56199649999999)",S32400061,Gadwall,3,11-06-2016
"(25.9859009, -97.56199649999999)",S32400061,Mottled Duck,4,11-06-2016
"(25.9859009, -97.56199649999999)",S32400061,Plain Chachalaca,12,11-06-2016
"(25.9859009, -97.56199649999999)",S32400061,Least Grebe,14,11-06-2016


In [11]:

fine_df = fine_df.set_index('Location')
close_df = fine_df.loc[close_points]
fine_df

Unnamed: 0_level_0,Submission ID,Common Name,Count,Date
Location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(26.022082, -97.510919)",S37335810,Black-bellied Whistling-Duck,3,06-02-2017
"(26.137485899999998, -97.1735573)",S37351021,Black-bellied Whistling-Duck,9,06-03-2017
"(26.140091199999997, -97.1748233)",S37374846,Black-bellied Whistling-Duck,4,06-04-2017
"(26.140091199999997, -97.1748233)",S37403270,Black-bellied Whistling-Duck,3,06-05-2017
"(26.187509, -97.689038)",S32402100,Black-bellied Whistling-Duck,8,11-04-2016
"(30.409033, -91.1761025)",S39452117,Black-bellied Whistling-Duck,5,09-29-2017
"(30.408501, -91.172635)",S39205747,Black-bellied Whistling-Duck,17,09-16-2017
"(30.408501, -91.172635)",S39436325,Black-bellied Whistling-Duck,16,09-28-2017
"(30.408501, -91.172635)",S40010884,Black-bellied Whistling-Duck,2,10-19-2017
"(26.126030399999998, -97.9559173)",S32382837,Black-bellied Whistling-Duck,52,11-05-2016


In [12]:
pivoted_close_df = close_df.pivot_table(values='Count',
                   index=['Submission ID'],
                   columns='Common Name',
                   aggfunc=np.sum)
pivoted_close_df

Common Name,Altamira Oriole,American Pipit,American White Pelican,Barn Swallow,Bewick's Wren,Black Vulture,Black-bellied Whistling-Duck,Black-crested Titmouse,Blue-gray Gnatcatcher,Blue-winged Teal,...,Tropical/Couch's Kingbird,Turkey Vulture,Verdin,White Ibis,White-eyed Vireo,White-tailed Kite,White-tipped Dove,White-winged Dove,Wilson's Warbler,nighthawk sp.
Submission ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S17106156,,,,,,,,,,,...,,,,,,,,,,
S17106199,,,,,,,,,,,...,,,,,,,,,,
S32400061,2.0,2.0,43.0,2.0,2.0,5.0,,2.0,7.0,6.0,...,,7.0,1.0,33.0,3.0,2.0,4.0,2.0,1.0,
S37335810,,,,,,,3.0,,,,...,1.0,,,,,,,,,2.0


# Create collapsed DF

Create DF containing only the found locations

Collapse new DF into percentages

In [13]:
import csv

def main(
        source_file,
        seen_file,
        percentage,
        number_weighted,
        start_month,
        start_week,
        radius_size,
        end_month,
        end_week):

        '''
        Inputs:
                source_file: data for all sightings from location
                seen_file: data for birds already seen
                percentage: lowest allowable seen percentage on checklists
                number_weighted: should it be weighted by number of individuals seen
        '''

        # source_file includes but not limited to:
        # submission_id, common_name, count, lat, long, date
        with open(source_file, 'r') as s:
                csv.read(s)

        # Create DF from source_file:
        # lat, long, checklist_id, sp1, sp2, sp3, ..., spX

        # Identify centers

        # For each center:
                # Create new DF including only locations within radius of that center

                # Collapse new DF into percentages
