In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
from datetime import time
%matplotlib inline

In [2]:
obs = pd.read_csv('data/ruru_obs.csv')

In [3]:
obs.head()

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,field:is this observation part of your 1 hour survey?,field:ruru call type,field:direction to call,field:repeat observation (animal)
0,191655352,2023-11-21,2023-11-21 08:10:47 UTC,3612063,Poneke gives a hoot project,"Khandallah Park, Wellington, Wellington, NZ",-41.24165,174.787957,yes,,,
1,191655600,2023-11-21,2023-11-21 08:10:27 UTC,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241592,174.787875,yes,,,
2,191655883,2023-11-21,2023-11-21 08:27:05 UTC,3612063,,"Khandallah Park, Wellington, Wellington, NZ",-41.241673,174.787947,yes,,,
3,191656729,2023-11-21,2023-11-21 08:48:51 UTC,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241593,174.787876,yes,,,
4,191656804,2023-11-21,2023-11-21 08:51:45 UTC,7614482,"Heard more pork call\nFar away, direction hard...","Station Road (near 7), Khandallah, Wellington ...",-41.241913,174.793068,yes,Morepork,N,yes


In [4]:
# Rename columns using the rename() method
obs = obs.rename(columns={
    'field:is this observation part of your 1 hour survey?': 'survey_observation',
    'field:ruru call type': 'call_type',
    'field:direction to call' : 'direction_to_call',
    'field:repeat observation (animal)': 'repeat_observation'
})

In [5]:
obs.dtypes

id                      int64
observed_on            object
time_observed_at       object
user_id                 int64
description            object
place_guess            object
latitude              float64
longitude             float64
survey_observation     object
call_type              object
direction_to_call      object
repeat_observation     object
dtype: object

In [6]:
# columns with nan values
obs.columns[obs.isna().any()]

Index(['time_observed_at', 'description', 'survey_observation', 'call_type',
       'direction_to_call', 'repeat_observation'],
      dtype='object')

In [7]:
# convert to date time format, error='coerce' handles any errors as NaT (Not a Time)
obs['time_observed_at'] = pd.to_datetime(obs['time_observed_at'], errors='coerce')

In [8]:
obs

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,survey_observation,call_type,direction_to_call,repeat_observation
0,191655352,2023-11-21,2023-11-21 08:10:47+00:00,3612063,Poneke gives a hoot project,"Khandallah Park, Wellington, Wellington, NZ",-41.24165,174.787957,yes,,,
1,191655600,2023-11-21,2023-11-21 08:10:27+00:00,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241592,174.787875,yes,,,
2,191655883,2023-11-21,2023-11-21 08:27:05+00:00,3612063,,"Khandallah Park, Wellington, Wellington, NZ",-41.241673,174.787947,yes,,,
3,191656729,2023-11-21,2023-11-21 08:48:51+00:00,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241593,174.787876,yes,,,
4,191656804,2023-11-21,2023-11-21 08:51:45+00:00,7614482,"Heard more pork call\nFar away, direction hard...","Station Road (near 7), Khandallah, Wellington ...",-41.241913,174.793068,yes,Morepork,N,yes
5,191743943,2023-11-22,2023-11-22 08:07:00+00:00,57754,calling in the distance,"Roseneath, Wellington, New Zealand",-41.294902,174.798935,no,,,
6,192178950,2023-11-26,2023-11-26 09:02:00+00:00,762458,X 2 individual birds,"Te Ika-a-Māui/North Island, Paraparaumu, Welli...",-40.921486,175.005007,no,Morepork,Not recorded,
7,192534631,2023-11-29,2023-11-29 09:38:53+00:00,6686,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.264182,174.764985,,Morepork,W,no
8,193366756,2023-12-08,2023-12-08 08:22:26+00:00,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.253368,174.764102,yes,Morepork,W,no
9,193367043,2023-12-08,2023-12-08 08:29:42+00:00,57754,,"Thane Road at Robieson Street (near 17), Rosen...",-41.294687,174.799116,no,,,


In [9]:
# convert UTC timezone to NZ timezone
obs['time_observed_at'] = obs['time_observed_at'].dt.tz_convert('Pacific/Auckland')

In [10]:
# keep only time
obs['time_observed_at'] = obs['time_observed_at'].dt.time

In [11]:
obs

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,survey_observation,call_type,direction_to_call,repeat_observation
0,191655352,2023-11-21,21:10:47,3612063,Poneke gives a hoot project,"Khandallah Park, Wellington, Wellington, NZ",-41.24165,174.787957,yes,,,
1,191655600,2023-11-21,21:10:27,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241592,174.787875,yes,,,
2,191655883,2023-11-21,21:27:05,3612063,,"Khandallah Park, Wellington, Wellington, NZ",-41.241673,174.787947,yes,,,
3,191656729,2023-11-21,21:48:51,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241593,174.787876,yes,,,
4,191656804,2023-11-21,21:51:45,7614482,"Heard more pork call\nFar away, direction hard...","Station Road (near 7), Khandallah, Wellington ...",-41.241913,174.793068,yes,Morepork,N,yes
5,191743943,2023-11-22,21:07:00,57754,calling in the distance,"Roseneath, Wellington, New Zealand",-41.294902,174.798935,no,,,
6,192178950,2023-11-26,22:02:00,762458,X 2 individual birds,"Te Ika-a-Māui/North Island, Paraparaumu, Welli...",-40.921486,175.005007,no,Morepork,Not recorded,
7,192534631,2023-11-29,22:38:53,6686,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.264182,174.764985,,Morepork,W,no
8,193366756,2023-12-08,21:22:26,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.253368,174.764102,yes,Morepork,W,no
9,193367043,2023-12-08,21:29:42,57754,,"Thane Road at Robieson Street (near 17), Rosen...",-41.294687,174.799116,no,,,


In [12]:
# fill NaN values with the dates from observed_on
# obs['time_observed_at'].fillna(obs['observed_on'], inplace=True)

# Find the NaN values in 'time_observed_at' column
nan_indices = obs['time_observed_at'].isna()

# Generate random times between 9pm and 10pm, including minutes and seconds
random_times = pd.to_timedelta(np.random.randint(21*60*60, 22*60*60, sum(nan_indices)), unit='s')

In [13]:
# Fill NaN values with random times
obs.loc[nan_indices, 'time_observed_at'] = random_times

In [14]:
# Convert timedelta to string (str) and extract time part which is the last part of the string after the space (split()[-1])
obs['time_observed_at'] = obs['time_observed_at'].apply(lambda x: str(x).split()[-1])

In [15]:
# Convert 'observed_on' and 'time_observed_at' to datetime format
obs['observed_on'] = pd.to_datetime(obs['observed_on'], format='%Y-%m-%d')
obs['time_observed_at'] = pd.to_datetime(obs['time_observed_at'], format='%H:%M:%S').dt.time

In [16]:
# Extract year, month, and day
obs['year'] = obs['observed_on'].dt.year
obs['month'] = obs['observed_on'].dt.month
obs['day'] = obs['observed_on'].dt.day

In [17]:
# Sort the DataFrame by year, month, and day in ascending order
obs = obs.sort_values(by=['year', 'month', 'day', 'time_observed_at'], ascending=True)

In [18]:
# convert to MM/DD/YYYY format
obs['observed_on'] = obs['observed_on'].dt.strftime('%m/%d/%Y')

In [19]:
# Drop the intermediate columns
obs = obs.drop(columns=['year', 'month', 'day'])

In [20]:
obs

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,survey_observation,call_type,direction_to_call,repeat_observation
14,193639074,05/18/2023,07:20:00,3785171,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.31272,174.791891,no,Not recorded,Not recorded,yes
1,191655600,11/21/2023,21:10:27,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241592,174.787875,yes,,,
0,191655352,11/21/2023,21:10:47,3612063,Poneke gives a hoot project,"Khandallah Park, Wellington, Wellington, NZ",-41.24165,174.787957,yes,,,
2,191655883,11/21/2023,21:27:05,3612063,,"Khandallah Park, Wellington, Wellington, NZ",-41.241673,174.787947,yes,,,
3,191656729,11/21/2023,21:48:51,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241593,174.787876,yes,,,
4,191656804,11/21/2023,21:51:45,7614482,"Heard more pork call\nFar away, direction hard...","Station Road (near 7), Khandallah, Wellington ...",-41.241913,174.793068,yes,Morepork,N,yes
5,191743943,11/22/2023,21:07:00,57754,calling in the distance,"Roseneath, Wellington, New Zealand",-41.294902,174.798935,no,,,
6,192178950,11/26/2023,22:02:00,762458,X 2 individual birds,"Te Ika-a-Māui/North Island, Paraparaumu, Welli...",-40.921486,175.005007,no,Morepork,Not recorded,
7,192534631,11/29/2023,22:38:53,6686,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.264182,174.764985,,Morepork,W,no
8,193366756,12/08/2023,21:22:26,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.253368,174.764102,yes,Morepork,W,no


In [21]:
# Filter and keep only rows where the survey observation is "yes" and between 9-10pm
obs['time_observed_at'] = pd.to_datetime(obs['time_observed_at'], format='%H:%M:%S').dt.time

surveyed_obs = obs[
    (obs['time_observed_at'].apply(lambda x: time(21, 0) <= x <= time(22, 0))) &
    (obs['survey_observation'] == 'yes')
]

In [22]:
surveyed_obs

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,survey_observation,call_type,direction_to_call,repeat_observation
1,191655600,11/21/2023,21:10:27,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241592,174.787875,yes,,,
0,191655352,11/21/2023,21:10:47,3612063,Poneke gives a hoot project,"Khandallah Park, Wellington, Wellington, NZ",-41.24165,174.787957,yes,,,
2,191655883,11/21/2023,21:27:05,3612063,,"Khandallah Park, Wellington, Wellington, NZ",-41.241673,174.787947,yes,,,
3,191656729,11/21/2023,21:48:51,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241593,174.787876,yes,,,
4,191656804,11/21/2023,21:51:45,7614482,"Heard more pork call\nFar away, direction hard...","Station Road (near 7), Khandallah, Wellington ...",-41.241913,174.793068,yes,Morepork,N,yes
8,193366756,12/08/2023,21:22:26,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.253368,174.764102,yes,Morepork,W,no
10,193367376,12/08/2023,21:33:00,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.25335,174.764131,yes,Morepork,S,yes
16,194318735,12/18/2023,21:25:00,3785171,,"Hataitai Park, Wellington, Wellington, NZ",-41.309925,174.789332,yes,Morepork,E,yes
17,194343572,12/19/2023,21:46:57,605867,Albemarle Reserve,"Northland, Wellington 6012, New Zealand",-41.27675,174.757751,yes,Not recorded,Not recorded,yes
18,194416519,12/20/2023,21:37:02,5866007,Observed call 37 minutes into listening hour f...,"Khandallah, Wellington 6035, New Zealand",-41.252639,174.800385,yes,Morepork,N,no


In [23]:
print(len(obs), len(surveyed_obs))

54 27


In [24]:
# export cleaned data
surveyed_obs.to_csv('data/surveyed_obs_cleaned.csv', index=False)

In [25]:
obs.to_csv('data/obs_cleaned.csv', index=False)

### Was i-Naturalist a suitable platform for data collection and reporting?
- Did people follow instructions and how many actually did? 
    - i.e., observations must be between 9-10pm, survey_observation == yes, repeated the survey over 3 nights at the same place
   
- How many casual observations were reported?
    - i.e., observations not between 9-10pm or survey_observation == no
    
- How many did a survey but never repeated it more than 3 times?
    - i.e., observatiosn between 9-10pm, survey_observation == yes,  but did not have  more than 3 observations  reported at the same place

In [26]:
obs = obs.copy()

In [27]:
len(obs)

54

In [28]:
# Convert 'observed_on' and 'time_observed_at' to datetime format
obs['observed_on'] = pd.to_datetime(obs['observed_on'], format='%m/%d/%Y')
obs['time_observed_at'] = pd.to_datetime(obs['time_observed_at'], format='%H:%M:%S').dt.time

# Create a new column 'datetime' which combines 'observed_on' and 'time_observed_at'
obs['datetime'] = pd.to_datetime(obs['observed_on'].astype(str) + ' ' + obs['time_observed_at'].astype(str))

# Filter observations that are not between 9-10 PM or where survey_observation is 'no'
casual_observations = obs[
    (~obs['time_observed_at'].apply(lambda x: time(21, 0) <= x <= time(22, 0))) |
    (obs['survey_observation'] == 'no')
]

# Get the count of casual observations
casual_observation_count = len(casual_observations)

print("Number of casual observations reported:", casual_observation_count)

Number of casual observations reported: 27


In [29]:
casual_observations

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,survey_observation,call_type,direction_to_call,repeat_observation,datetime
14,193639074,2023-05-18,07:20:00,3785171,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.31272,174.791891,no,Not recorded,Not recorded,yes,2023-05-18 07:20:00
5,191743943,2023-11-22,21:07:00,57754,calling in the distance,"Roseneath, Wellington, New Zealand",-41.294902,174.798935,no,,,,2023-11-22 21:07:00
6,192178950,2023-11-26,22:02:00,762458,X 2 individual birds,"Te Ika-a-Māui/North Island, Paraparaumu, Welli...",-40.921486,175.005007,no,Morepork,Not recorded,,2023-11-26 22:02:00
7,192534631,2023-11-29,22:38:53,6686,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.264182,174.764985,,Morepork,W,no,2023-11-29 22:38:53
9,193367043,2023-12-08,21:29:42,57754,,"Thane Road at Robieson Street (near 17), Rosen...",-41.294687,174.799116,no,,,,2023-12-08 21:29:42
11,193373197,2023-12-09,21:29:58,3629246,,476 horokiwi road,-41.197917,174.85316,no,Morepork,W,yes,2023-12-09 21:29:58
13,193609071,2023-12-09,22:10:00,3785171,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.312818,174.791841,no,Trill,E,yes,2023-12-09 22:10:00
12,193608979,2023-12-10,22:10:00,3785171,,"Te Ika-a-Māui/North Island, Wellington, Wellin...",-41.312833,174.791848,no,Low trill,E,yes,2023-12-10 22:10:00
15,194304113,2023-12-18,22:00:00,7682958,Regularly hear morepork call from home in evening,"127 Grafton Road, Wellington",-41.295368,174.801504,no,Morepork,Not recorded,yes,2023-12-18 22:00:00
21,194494713,2023-12-21,22:53:25,451919,,"4018841, Mākara, Wellington 6972, New Zealand",-41.295638,174.688985,yes,Morepork,W,maybe,2023-12-21 22:53:25


In [30]:
# Group observations by user_id, observed_on, time_observed_at, and place_guess
grouped_casual_observations = casual_observations.groupby(['user_id', 'observed_on', 'place_guess']).size().reset_index(name='count')
grouped_casual_observations

Unnamed: 0,user_id,observed_on,place_guess,count
0,6686,2023-11-29,"Te Ika-a-Māui/North Island, Wellington, Wellin...",1
1,57754,2023-11-22,"Roseneath, Wellington, New Zealand",1
2,57754,2023-12-08,"Thane Road at Robieson Street (near 17), Rosen...",1
3,58187,2024-01-22,"Te Ika-a-Māui/North Island, Wellington, Wellin...",1
4,59165,2024-01-02,"Wellington, NZ",1
5,373980,2023-12-31,"South Wairarapa, NZ-WG, NZ",1
6,451919,2023-12-21,"4018841, Mākara, Wellington 6972, New Zealand",1
7,451919,2023-12-26,"4018841, Mākara, Wellington 6972, New Zealand",1
8,451919,2023-12-31,"4018841, Mākara, Wellington 6972, New Zealand",1
9,451919,2024-01-05,"4018841, Mākara, Wellington 6972, New Zealand",1


In [31]:
len(grouped_casual_observations)

27

In [32]:
# Filter observations between 9-10 PM and where survey_observation is 'yes'
survey_observations = obs[
    (obs['time_observed_at'].apply(lambda x: time(21, 0) <= x <= time(22, 0))) &
    (obs['survey_observation'] == 'yes')
]

# Group observations by user_id, observed_on, time_observed_at, and place_guess
grouped_observations = survey_observations.groupby(['user_id', 'observed_on', 'place_guess']).size().reset_index(name='count')


In [33]:
survey_observations

Unnamed: 0,id,observed_on,time_observed_at,user_id,description,place_guess,latitude,longitude,survey_observation,call_type,direction_to_call,repeat_observation,datetime
1,191655600,2023-11-21,21:10:27,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241592,174.787875,yes,,,,2023-11-21 21:10:27
0,191655352,2023-11-21,21:10:47,3612063,Poneke gives a hoot project,"Khandallah Park, Wellington, Wellington, NZ",-41.24165,174.787957,yes,,,,2023-11-21 21:10:47
2,191655883,2023-11-21,21:27:05,3612063,,"Khandallah Park, Wellington, Wellington, NZ",-41.241673,174.787947,yes,,,,2023-11-21 21:27:05
3,191656729,2023-11-21,21:48:51,117691,,"Khandallah, Wellington 6035, New Zealand",-41.241593,174.787876,yes,,,,2023-11-21 21:48:51
4,191656804,2023-11-21,21:51:45,7614482,"Heard more pork call\nFar away, direction hard...","Station Road (near 7), Khandallah, Wellington ...",-41.241913,174.793068,yes,Morepork,N,yes,2023-11-21 21:51:45
8,193366756,2023-12-08,21:22:26,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.253368,174.764102,yes,Morepork,W,no,2023-12-08 21:22:26
10,193367376,2023-12-08,21:33:00,7636785,,"Huntleigh Park, Wellington, Wellington, NZ",-41.25335,174.764131,yes,Morepork,S,yes,2023-12-08 21:33:00
16,194318735,2023-12-18,21:25:00,3785171,,"Hataitai Park, Wellington, Wellington, NZ",-41.309925,174.789332,yes,Morepork,E,yes,2023-12-18 21:25:00
17,194343572,2023-12-19,21:46:57,605867,Albemarle Reserve,"Northland, Wellington 6012, New Zealand",-41.27675,174.757751,yes,Not recorded,Not recorded,yes,2023-12-19 21:46:57
18,194416519,2023-12-20,21:37:02,5866007,Observed call 37 minutes into listening hour f...,"Khandallah, Wellington 6035, New Zealand",-41.252639,174.800385,yes,Morepork,N,no,2023-12-20 21:37:02


In [34]:
len(survey_observations)

27

In [35]:
grouped_observations.sort_values(by='count')

Unnamed: 0,user_id,observed_on,place_guess,count
9,1226581,2024-01-02,"West Taratahi, New Zealand",1
16,7614482,2023-11-21,"Station Road (near 7), Khandallah, Wellington ...",1
14,3785171,2023-12-18,"Hataitai Park, Wellington, Wellington, NZ",1
12,1226581,2024-01-24,"Carterton 5791, New Zealand",1
10,1226581,2024-01-03,"Tararua Forest Park, NZ-WG-CR, NZ-WG, NZ",1
18,7684918,2023-12-23,"Te Ika-a-Māui/North Island, Wellington, Wellin...",1
8,1226581,2023-12-27,"West Taratahi, New Zealand",1
19,7706104,2024-01-03,"Khandallah Park, Wellington, Wellington, NZ",1
6,760161,2023-12-31,"Wellington, NZ-WG, NZ",1
5,760161,2023-12-28,"Miramar, Wellington 6022, New Zealand",1


In [36]:
len(grouped_observations)

20