In [1]:
# Imports

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from urllib.request import urlopen
from bs4 import BeautifulSoup
import io
import datetime
import matplotlib.dates as mdates

In [2]:
# Data location
from sys import platform
if platform == "linux" or platform == "linux2":
    data_string = "/home/maria/Documents/data/pied_piper/green/"
    output_string = "/home/maria/Documents/output/pied_piper/green/"

elif platform == "win32"or platform == "win64":
    data_string = "C:/Users/maria/OneDrive/Documents/data/pied_piper/green/"
    output_string = "C:/Users/maria/OneDrive/Documents/output/pied_piper/green/"
else:
    data_string = "/Users/erikamiller/Desktop/Pied Piper/Data/Green/"
    output_string = "/Users/erikamiller/Desktop/Pied Piper/Output/Green/"

In [3]:
# Read csv and assign it to df
year = 2019
csv_name = data_string+str(year)+"_green_all_R_new.csv"
df = pd.read_csv(csv_name, header=0)

In [4]:
# Format date & time
df['midpoint'] = pd.to_datetime(df['midpoint'], errors='coerce')
df['start_datetime'] = pd.to_datetime(df['start_datetime'], errors='coerce')
df['end_datetime'] = pd.to_datetime(df['end_datetime'], errors='coerce')

df['Date'] = pd.to_datetime(df['start_datetime']).dt.date
df['Date'] = pd.to_datetime(df['Date'])
df['doy'] = df['Date'].dt.dayofyear

In [5]:
# Checking whether the midpoint of the time when trap was in during day or night
def categorize_daytime(dt):
    if dt.hour >= 6 and dt.hour < 18:
        return 'Day'
    else:
        return 'Night'
df['daytime_category'] = df['midpoint'].apply(categorize_daytime)

In [6]:
list(df)

['Unnamed: 0',
 'start_datetime',
 'end_datetime',
 'In',
 'midpoint',
 'StartDate',
 'EndDate',
 'chum0_mixed_num',
 'steelheadsmolt_wild_num',
 'steelheadsmolt_hatchery_num',
 'coho0_wild_num',
 'coho1_mixed_num',
 'coho1_hatchery_num',
 'chinook1_wild_num',
 'chinook1_hatchery_num',
 'chinook0_wild_num',
 'chinook0_hatchery_num',
 'Date',
 'doy',
 'daytime_category']

In [7]:
# Calculate CPUE (Catch Per Unit Effort)

df['chum0_mixed_perhour'] = df['chum0_mixed_num']/df['In']

df['steelheadsmolt_wild_perhour'] = df['steelheadsmolt_wild_num']/df['In']
df['steelheadsmolt_hatchery_perhour'] = df['steelheadsmolt_hatchery_num']/df['In']

df['coho1_mixed_perhour'] = df['coho1_mixed_num']/df['In']
df['coho1_hatchery_perhour'] = df['coho1_hatchery_num']/df['In']
# df['coho1_wild_perhour'] = df['coho1_wild_num']/df['In']

df['coho0_wild_perhour'] = df['coho0_wild_num']/df['In']

df['chinook1_wild_perhour'] = df['chinook1_wild_num']/df['In']
df['chinook1_hatchery_perhour'] = df['chinook1_hatchery_num']/df['In']

df['chinook0_wild_perhour'] = df['chinook0_wild_num']/df['In']
df['chinook0_hatchery_perhour'] = df['chinook0_hatchery_num']/df['In']

#df['pink0_wild_perhour'] = df['pink0_wild_num']/df['In']

In [8]:
# Make 2 separate dataframe categories (Day & Night)
df_day = df[df['daytime_category']=="Day"]
df_night = df[df['daytime_category']=="Night"]

In [9]:
# Check for NaN or missing values in the 'midpoint' column of df_day and df_night
df_day[df_day['midpoint'].isnull()]
df_night[df_night['midpoint'].isnull()]

## May not need

Unnamed: 0.1,Unnamed: 0,start_datetime,end_datetime,In,midpoint,StartDate,EndDate,chum0_mixed_num,steelheadsmolt_wild_num,steelheadsmolt_hatchery_num,...,chum0_mixed_perhour,steelheadsmolt_wild_perhour,steelheadsmolt_hatchery_perhour,coho1_mixed_perhour,coho1_hatchery_perhour,coho0_wild_perhour,chinook1_wild_perhour,chinook1_hatchery_perhour,chinook0_wild_perhour,chinook0_hatchery_perhour


In [10]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(df)
    #display(df_day)
    #display(df_night)

Unnamed: 0.1,Unnamed: 0,start_datetime,end_datetime,In,midpoint,StartDate,EndDate,chum0_mixed_num,steelheadsmolt_wild_num,steelheadsmolt_hatchery_num,coho0_wild_num,coho1_mixed_num,coho1_hatchery_num,chinook1_wild_num,chinook1_hatchery_num,chinook0_wild_num,chinook0_hatchery_num,Date,doy,daytime_category,chum0_mixed_perhour,steelheadsmolt_wild_perhour,steelheadsmolt_hatchery_perhour,coho1_mixed_perhour,coho1_hatchery_perhour,coho0_wild_perhour,chinook1_wild_perhour,chinook1_hatchery_perhour,chinook0_wild_perhour,chinook0_hatchery_perhour
0,1,2019-01-23 15:00:00,2019-01-24 08:30:00,17.5,2019-01-23 23:45:00,2019-01-23,2019-01-24,0,0,0,0,0,0,0,0,698,0,2019-01-23,23,Night,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39.885714,0.0
1,2,2019-01-24 08:30:00,2019-01-24 16:00:00,7.5,2019-01-24 12:15:00,2019-01-24,2019-01-24,0,0,0,0,0,0,0,0,93,0,2019-01-24,24,Day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.4,0.0
2,3,2019-01-24 16:00:00,2019-01-25 08:00:00,16.0,2019-01-25 00:00:00,2019-01-24,2019-01-25,2,0,0,0,0,2,0,0,272,0,2019-01-24,24,Night,0.125,0.0,0.0,0.0,0.125,0.0,0.0,0.0,17.0,0.0
3,4,2019-01-25 08:00:00,2019-01-25 16:00:00,8.0,2019-01-25 12:00:00,2019-01-25,2019-01-25,0,0,0,0,0,0,0,0,33,0,2019-01-25,25,Day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.125,0.0
4,5,2019-01-25 16:00:00,2019-01-26 09:20:00,17.333333,2019-01-26 00:40:00,2019-01-25,2019-01-26,0,0,0,1,0,0,0,0,171,0,2019-01-25,25,Night,0.0,0.0,0.0,0.0,0.0,0.057692,0.0,0.0,9.865385,0.0
5,6,2019-01-26 09:20:00,2019-01-26 16:00:00,6.666667,2019-01-26 12:40:00,2019-01-26,2019-01-26,0,0,0,0,0,0,0,0,30,0,2019-01-26,26,Day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0
6,7,2019-01-26 16:00:00,2019-01-27 08:30:00,16.5,2019-01-27 00:15:00,2019-01-26,2019-01-27,0,0,0,0,0,0,0,0,106,0,2019-01-26,26,Night,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.424242,0.0
7,8,2019-01-27 08:30:00,2019-01-27 16:00:00,7.5,2019-01-27 12:15:00,2019-01-27,2019-01-27,0,0,0,0,0,0,0,0,17,0,2019-01-27,27,Day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.266667,0.0
8,9,2019-01-27 16:00:00,2019-01-28 08:15:00,16.25,2019-01-28 00:07:30,2019-01-27,2019-01-28,2,0,0,2,0,0,0,0,69,0,2019-01-27,27,Night,0.123077,0.0,0.0,0.0,0.0,0.123077,0.0,0.0,4.246154,0.0
9,10,2019-01-28 08:15:00,2019-01-28 16:00:00,7.75,2019-01-28 12:07:30,2019-01-28,2019-01-28,0,0,0,0,0,0,0,0,9,0,2019-01-28,28,Day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.16129,0.0


In [11]:
#need to have one row for every date
df_dates_day = pd.DataFrame({'date_range' : 
                         pd.date_range(start='2019-01-23 12:00:00', 
                                       end='2019-07-22 12:00:00')})
df_dates_night = pd.DataFrame({'date_range' : 
                         pd.date_range(start='2019-01-23 00:00:00', 
                                       end='2019-07-22 00:00:00')})

df_merged_day = pd.merge_asof(df_dates_day, 
              df_day, left_on = 'date_range', 
              right_on = 'midpoint', direction = 'nearest',
              tolerance = pd.Timedelta(hours = 12))

df_merged_night = pd.merge_asof(df_dates_night, 
              df_night, left_on = 'date_range', 
              right_on = 'midpoint', direction = 'nearest',
              tolerance = pd.Timedelta(hours = 12))

In [12]:
# doy columns cannot be nan, so making those columns again
# I will keep the current doy in midpoint doy 
# daytime_category columns cannot be nan

df_merged_day['midpoint_doy'] = df_merged_day['doy']
df_merged_day['doy'] = df_merged_day['date_range'].dt.dayofyear
df_merged_day['year'] = df_merged_day['date_range'].dt.year
df_merged_day['daytime_category'] = 'day'

df_merged_night['midpoint_doy'] = df_merged_night['doy']
df_merged_night['doy'] = df_merged_night['date_range'].dt.dayofyear
df_merged_night['year'] = df_merged_night['date_range'].dt.year
df_merged_night['daytime_category'] = 'night'

In [13]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    #display(df_merged_night)
    display(df_merged_day)

Unnamed: 0.1,date_range,Unnamed: 0,start_datetime,end_datetime,In,midpoint,StartDate,EndDate,chum0_mixed_num,steelheadsmolt_wild_num,steelheadsmolt_hatchery_num,coho0_wild_num,coho1_mixed_num,coho1_hatchery_num,chinook1_wild_num,chinook1_hatchery_num,chinook0_wild_num,chinook0_hatchery_num,Date,doy,daytime_category,chum0_mixed_perhour,steelheadsmolt_wild_perhour,steelheadsmolt_hatchery_perhour,coho1_mixed_perhour,coho1_hatchery_perhour,coho0_wild_perhour,chinook1_wild_perhour,chinook1_hatchery_perhour,chinook0_wild_perhour,chinook0_hatchery_perhour,midpoint_doy,year
0,2019-01-23 12:00:00,,NaT,NaT,,NaT,,,,,,,,,,,,,NaT,23,day,,,,,,,,,,,,2019
1,2019-01-24 12:00:00,2.0,2019-01-24 08:30:00,2019-01-24 16:00:00,7.5,2019-01-24 12:15:00,2019-01-24,2019-01-24,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.0,0.0,2019-01-24,24,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.4,0.0,24.0,2019
2,2019-01-25 12:00:00,4.0,2019-01-25 08:00:00,2019-01-25 16:00:00,8.0,2019-01-25 12:00:00,2019-01-25,2019-01-25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33.0,0.0,2019-01-25,25,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.125,0.0,25.0,2019
3,2019-01-26 12:00:00,6.0,2019-01-26 09:20:00,2019-01-26 16:00:00,6.666667,2019-01-26 12:40:00,2019-01-26,2019-01-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30.0,0.0,2019-01-26,26,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,26.0,2019
4,2019-01-27 12:00:00,8.0,2019-01-27 08:30:00,2019-01-27 16:00:00,7.5,2019-01-27 12:15:00,2019-01-27,2019-01-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.0,2019-01-27,27,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.266667,0.0,27.0,2019
5,2019-01-28 12:00:00,10.0,2019-01-28 08:15:00,2019-01-28 16:00:00,7.75,2019-01-28 12:07:30,2019-01-28,2019-01-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,2019-01-28,28,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.16129,0.0,28.0,2019
6,2019-01-29 12:00:00,12.0,2019-01-29 07:55:00,2019-01-29 16:00:00,8.083333,2019-01-29 11:57:30,2019-01-29,2019-01-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,2019-01-29,29,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.618557,0.0,29.0,2019
7,2019-01-30 12:00:00,14.0,2019-01-30 08:05:00,2019-01-30 16:05:00,8.0,2019-01-30 12:05:00,2019-01-30,2019-01-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,2019-01-30,30,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.25,0.0,30.0,2019
8,2019-01-31 12:00:00,16.0,2019-01-31 08:00:00,2019-01-31 16:15:00,8.25,2019-01-31 12:07:30,2019-01-31,2019-01-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,2019-01-31,31,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.333333,0.0,31.0,2019
9,2019-02-01 12:00:00,18.0,2019-02-01 08:12:00,2019-02-01 16:05:00,7.883333,2019-02-01 12:08:30,2019-02-01,2019-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.0,0.0,2019-02-01,32,day,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.790698,0.0,32.0,2019


In [15]:
# Save csv
df_merged_day.to_csv(data_string + "green_2019_day.csv")
df_merged_night.to_csv(data_string + "green_2019_night.csv")