In [1]:
import pandas as pd
import matplotlib as pyplot
import seaborn as sns

In [2]:
df = pd.read_csv("mbird_data.csv")

In [3]:
df['Date'] = pd.to_datetime(df['Date'])

In [4]:
non_bird = ["Eastern Gray Squirrel", "Roof Rat", "Raccoon", "Muskrat", "Long-tailed Weasel", "Northwestern Garter Snake", "Bullfrog", "Red-eared Slider",
           "Coyote", "American Beaver", "Western Toad", "River Otter", "Painted Turtle", "Virginia Opossum", "Pacific Tree Frog", "Mule Deer", 
           "Big Brown Bat", "Douglas Squirrel", "North American Deer Mouse", "Bobcat", "Eastern Cottontail", "White-tailed Deer", "Townsend's Mole",
           "Northern Flying Squirrel", "Townsend's Chipmonk", "Black-tailed Jack Rabbit", "Mink", "Long-tailed Vole", "Mountain Beaver",
           "Long-toed Salamander", "Coypu", "Black Bear", "Northwestern Salamander", "Little Brown Myotis", "Water Vole", "Northern Leopard Frog"]

In [5]:
bird = []
for val in df["Common Name"].unique():
    if val not in non_bird:
        bird.append(val)

In [6]:
birds_df = df[df['Common Name'].isin(bird)]
columns_keep = ['Common Name', 'Scientific Name', 'Date', 'Number', 'Uncertain ID', 'Uncountable',]
bt = birds_df[columns_keep].copy()

In [7]:
bt.dropna(inplace=True)

In [8]:
bt.head()

Unnamed: 0,Common Name,Scientific Name,Date,Number,Uncertain ID,Uncountable
0,Least Flycatcher,Empidonax minimus,1983-06-05,1.0,False,False
1,Canada Goose,Branta canadensis,1990-04-18,1.0,False,False
2,Gadwall,Mareca strepera,1990-04-18,0.0,False,False
3,Mallard,Anas platyrhynchos,1990-04-18,4.0,False,False
4,Green-winged Teal,Anas crecca,1990-04-18,10.0,False,False


In [74]:
test_bt = bt[:10].copy()
test_bt

Unnamed: 0,Common Name,Scientific Name,Date,Number,Uncertain ID,Uncountable
0,Least Flycatcher,Empidonax minimus,1983-06-05,1.0,False,False
1,Canada Goose,Branta canadensis,1990-04-18,1.0,False,False
2,Gadwall,Mareca strepera,1990-04-18,0.0,False,False
3,Mallard,Anas platyrhynchos,1990-04-18,4.0,False,False
4,Green-winged Teal,Anas crecca,1990-04-18,10.0,False,False
5,Common Merganser,Mergus merganser,1990-04-18,10.0,False,False
6,Rock Pigeon,Columba livia,1990-04-18,2.0,False,False
7,American Coot,Fulica americana,1990-04-18,1.0,False,False
8,Killdeer,Charadrius vociferus,1990-04-18,1.0,False,False
9,Western Sandpiper,Calidris mauri,1990-04-18,0.0,True,False


For birds, it is important to know dates from beginning of year. Dates are often binned into weeklong increments because day variance is difficult to track when observations are typically made on a weekly basis, so the first week of the year is January 1st to 7th. For the extra day in the year, some groups add that day onto the last week of the year. Others, such as eBird (the biggest community science birding platform) actually bins it by quarter month. Each month is divided into 4 periods, the first 3 are 7 days, and the last "week" holding 7, 8, 9, or 10 days. The

The week function for Python is an ISO week date, which means that some years have 52 weeks, and some have 53. That doesn't make sense 

In [86]:
pd.Timestamp('1/2/2000').is_leap_year

True

In [None]:
def time_tag(dt):
    quarter_month = dt.dayinmonth / dt.daysinmonth
    if quarter_month < 0.25:
        return pd.Timestamp()

In [59]:
test_spec_count = test_bt.groupby('Date').size().reset_index()
test_spec_count = test_spec_count.rename({0:'spec_count'}, axis=1)

In [38]:
test_spec_count

Unnamed: 0,Date,spec_count
0,1983-06-05,1
1,1990-04-18,9


In [63]:
test_df = test_bt.merge(test_spec_count, on='Date')

In [69]:
test_df.set_index('Date', inplace=True)

In [70]:
test_df.head()

Unnamed: 0_level_0,Common Name,Scientific Name,Number,Uncertain ID,Uncountable,spec_count
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1983-06-05,Least Flycatcher,Empidonax minimus,1.0,False,False,1
1990-04-18,Canada Goose,Branta canadensis,1.0,False,False,9
1990-04-18,Gadwall,Mareca strepera,0.0,False,False,9
1990-04-18,Mallard,Anas platyrhynchos,4.0,False,False,9
1990-04-18,Green-winged Teal,Anas crecca,10.0,False,False,9


In [73]:
test_df['spec_count'].resample('W').count().reset_index()

Unnamed: 0,Date,spec_count
0,1983-06-05,1
1,1983-06-12,0
2,1983-06-19,0
3,1983-06-26,0
4,1983-07-03,0
5,1983-07-10,0
6,1983-07-17,0
7,1983-07-24,0
8,1983-07-31,0
9,1983-08-07,0
