In [125]:
from scipy.stats import gamma
import datetime as dt
import pandas as pd
import numpy as np
import datetime

from bokeh.plotting import figure, show, output_notebook
output_notebook()


# extract raw

In [12]:
def extract_feedings():
    # little bit of transform too
    df = pd.read_csv('./data.csv')

    df = df.loc[df.Activity=='Nursing', [
        'Date and Time', 'End Time', 'Duration (min)', 'Text'
    ]]

    df.rename(columns={
        'Date and Time': 'start', 
        'End Time': 'end', 
        'Duration (min)': 'duration', 
        'Text': 'notes'
    }, inplace=True)

    df.start = pd.to_datetime(df.start)
    df.end = pd.to_datetime(df.end)

    df = df[~df['end'].isnull()]
    df['duration'] = df['duration'].astype(int)
    
    return df

# astronomical data

sunrise, sunset, moonrise, moonset, moonphase

In [137]:
# sunrise/set

# Emeryville, CA
lat=37.831318
lon=-122.28524

def get_url(lat, lon, year):
    return f'https://www.esrl.noaa.gov/gmd/grad/solcalc/table.php?lat={lat}&lon={lon}&year={year}'

def make_date(year, day, month):
    return year + '-' + str(month_dict[month]) + '-' + str(day)

# third table is solar noon, not using
sunrise_2020, sunset_2020, _ = pd.read_html(get_url(lat, lon, 2020))

# sunrise/set 2019
sunrise_2019, sunset_2019, _ = pd.read_html(get_url(lat, lon, 2019))


months = [
    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
       'Oct', 'Nov', 'Dec'
]
month_numbers = list(range(1, len(months) + 1))
month_dict = {months[x]: month_numbers[x] for x in range(len(months))}


def reshape_make_date(raw_input, year, sunrise=False):
    if sunrise:
        value_name = 'sunrise'
    else:
        value_name = 'sunset'
        
    solar_times = raw_input.melt(
        id_vars='Day', 
        value_vars=months,
        var_name='month', 
        value_name=value_name
    )
    solar_times['year'] = year
    solar_times['date'] = solar_times[['year', 'Day', 'month']].apply(
        lambda x: make_date(x.year, x.Day, x.month), axis=1
    )
    solar_times['date'] = pd.to_datetime(solar_times['date'], errors='coerce')
    del solar_times['Day']
    del solar_times['month']
    del solar_times['year']
    return solar_times


sr_2019 = reshape_make_date(sunrise_2019, '2019', sunrise=True)
ss_2019 = reshape_make_date(sunset_2019, '2019', sunrise=False)
sr_2020 = reshape_make_date(sunrise_2020, '2020', sunrise=True)
ss_2020 = reshape_make_date(sunset_2020, '2020', sunrise=False)

sunrise = pd.concat([sr_2019, sr_2020])
sunset = pd.concat([ss_2019, ss_2020])



# Clean Sierra data

In [22]:
df = extract_feedings()


def clean_strings(df):
    df['left'] = 0
    df['right'] = 0

    for index, feeding in df.iterrows():
        sides = {
            'left': 0,
            'right': 0
        }
        try:
            clean_string = (
                feeding['notes'].split('(')[1]
                       .split(')')[0]
                       .replace(',', '')
                       .replace('m', '')
                       .split(' ')
            )
            for side in sides:
                try:
                    sides[side] = clean_string[clean_string.index(side) - 1]
    #                 if 'h' in sides[side]:
    #                     hour_min = sides[side].split('h')
    #                     mins = int(hour_min[0]) * 60 + int(hour_min[1])

                    df.loc[index, side] = sides[side]
                except ValueError:
                    pass

        except:
            # have spurious rows like 'Sierra Marie Guignard nursed'
            pass

In [24]:
def hour_splitter(string):

    if type(string) is int:
        return string
    elif 'h' in string: 
        h, m = string.split('h')
        if not m:
            m = 0
        return 60 * int(h) + int(m)
    else:
        return string


df['left'] = df['left'].apply(hour_splitter)
df['right'] = df['right'].apply(hour_splitter)
df['left'] = df['left'].astype(int)
df['right'] = df['right'].astype(int)

df['date'] = df['start'].dt.date
df['date'] = pd.to_datetime(df['date'])

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1053 entries, 0 to 1414
Data columns (total 7 columns):
start       1053 non-null datetime64[ns]
end         1053 non-null datetime64[ns]
duration    1053 non-null int64
notes       1053 non-null object
left        1053 non-null int64
right       1053 non-null int64
date        1053 non-null datetime64[ns]
dtypes: datetime64[ns](3), int64(3), object(1)
memory usage: 105.8+ KB


In [140]:
# moon phase
# moon rise
# sunrise
# sunset
# time of day (feeding)
df = df.merge(sunrise, how='left').merge(sunset, how='left')

In [142]:
# create days_old and weeks_old

df['birthday'] = pd.to_datetime(datetime.date(2019, 10, 18))

df['days_old'] = (df['start'] - df['birthday']).dt.days
df['weeks_old'] = df['days_old'] // 7
del df['birthday']

In [143]:
df

Unnamed: 0,start,end,duration,notes,left,right,date,sunrise,sunset,days_old,weeks_old
0,2020-03-25 14:02:00,2020-03-25 14:12:00,10,"Sierra Marie Guignard nursed (6m right, 4m left)",4,6,2020-03-25,07:04,19:26,159,22
1,2020-03-25 10:48:00,2020-03-25 10:59:00,10,"Sierra Marie Guignard nursed (5m right, 5m left)",5,5,2020-03-25,07:04,19:26,159,22
2,2020-03-25 07:39:00,2020-03-25 08:14:00,35,"Sierra Marie Guignard nursed (25m left, 10m ri...",25,10,2020-03-25,07:04,19:26,159,22
3,2020-03-24 19:27:00,2020-03-24 19:38:00,11,"Sierra Marie Guignard nursed (5m right, 6m left)",6,5,2020-03-24,07:06,19:25,158,22
4,2020-03-24 15:51:00,2020-03-24 15:59:00,8,"Sierra Marie Guignard nursed (5m left, 3m right)",5,3,2020-03-24,07:06,19:25,158,22
...,...,...,...,...,...,...,...,...,...,...,...
1048,2019-10-18 19:12:00,2019-10-18 19:32:00,20,"Baby nursed (10m right, 10m left)",10,10,2019-10-18,07:20,18:28,0,0
1049,2019-10-18 15:54:00,2019-10-18 16:26:00,32,"Baby nursed (22m right, 10m left)",10,22,2019-10-18,07:20,18:28,0,0
1050,2019-10-18 13:22:00,2019-10-18 13:24:00,2,Baby nursed (2m right),0,2,2019-10-18,07:20,18:28,0,0
1051,2019-10-18 10:45:00,2019-10-18 10:55:00,10,Baby nursed (10m left),10,0,2019-10-18,07:20,18:28,0,0


Example of d3 using time format we have here: https://gist.github.com/d3netxer/10a28b7aee406f4e7fce



# TODO:

- moonrise/moonset/moonphase

In [144]:
df.to_csv('./data_clean.csv', index=False)

# Simulate pulsar data from Feedings

This project is to make a Joy Division Unknown Pleasures t-shirt design based upon Sierra's feedings

## Steps:

1. Reshape feeding data to be like pulsar data

- array ~ length 80 each element of which is ~300
- values should be ~ [-1, 1]
- turn all feeding data into minutes since epoch and then binarize into 1=feeding, 0 = not feeding
- find midpoint of feedings and set this to 1, all else to zero
- add ? gaussian noise
- LPF to spread peaks

In [68]:
feedings = extract_feedings()
feedings['date'] = feedings['start'].dt.date
feedings['date'] = pd.to_datetime(feedings['date'])

feedings['ep_start'] = (feedings['start'] - dt.datetime(1970,1,1)).dt.total_seconds()
feedings['ep_end'] = (feedings['end'] - dt.datetime(1970,1,1)).dt.total_seconds()

feedings.sort_values('date', inplace=True)


minutes_in_day = 1440 # we will split array to sizes of day, probably want to downsample to get to ~300 points per day

# lets convert to epoch minutes.. then have 1440 per day
feedings['ep_start'] /= 60
feedings['ep_end'] /= 60

In [63]:
feedings.head()

Unnamed: 0,start,end,duration,notes,date,ep_start,ep_end,midnight_left,midnight_right
1414,2019-10-18 08:00:00,2019-10-18 08:10:00,10,Baby nursed (10m right),2019-10-18,26189760.0,26189770.0,1571357000.0,1571443000.0
1400,2019-10-18 21:25:00,2019-10-18 22:14:00,49,"Sierra Marie Guignard nursed (23m right, 26m l...",2019-10-18,26190565.0,26190614.0,1571357000.0,1571443000.0
1402,2019-10-18 19:34:00,2019-10-18 19:51:00,17,"Baby nursed (8m right, 9m left)",2019-10-18,26190454.0,26190471.0,1571357000.0,1571443000.0
1403,2019-10-18 19:25:00,2019-10-18 19:45:00,20,"Baby nursed (10m right, 10m left)",2019-10-18,26190445.0,26190465.0,1571357000.0,1571443000.0
1413,2019-10-18 10:45:00,2019-10-18 10:55:00,10,Baby nursed (10m left),2019-10-18,26189925.0,26189935.0,1571357000.0,1571443000.0


In [199]:

dates = feedings.date.unique()

In [200]:
days = []
days.append([3,4,5])

In [201]:
days

[[3, 4, 5]]

In [320]:
# loop over unique days (~160)
days = []

for date in dates[::3]:

    feedings_of_the_day = feedings.loc[feedings['date'] == date].copy()

    midnight_left = ((feedings_of_the_day['date'] - dt.datetime(1970,1,1)).dt.total_seconds()).iloc[0] / 60
    feedings_of_the_day['ep_start'] -= midnight_left
    feedings_of_the_day['ep_end'] -= midnight_left
    feedings_of_the_day['ep_start'] = feedings_of_the_day['ep_start'].astype(int)
    feedings_of_the_day['ep_end'] = feedings_of_the_day['ep_end'].astype(int)

    day_arr = np.zeros(minutes_in_day)

    # loop over feedings within a day
    for _, feeding in feedings_of_the_day.iterrows():
        day_arr[feeding.ep_start:] += 1
        day_arr[feeding.ep_end:] -= 1
    
    # clip for double countings
    day_arr[day_arr < 0] = 0
    day_arr[day_arr > 1] = 1

    # decimate to get 288 samples per day
    down_sample = 5 
    day_arr = day_arr[::down_sample]



    # lpf
    filt = pd.Series(day_arr).rolling(17, win_type='blackmanharris').sum()
    day_arr = filt[~pd.isnull(filt)].to_numpy()
    
    # add some noise
    day_arr += np.random.normal(size=len(day_arr)) / 20
    
#     # lpf again
#     filt = pd.Series(day_arr).rolling(4, win_type='blackmanharris').sum()
#     day_arr = filt[~pd.isnull(filt)].to_numpy()
    
    # crush peaks
#     day_arr = np.power(day_arr, 1/2)
    
    days.append(day_arr.tolist())


# p = figure()
# p.circle(y=day_arr, x=range(len(day_arr)))
# show(p)

days = pd.DataFrame(days).round(3)

days.to_csv('./jd_feedings.csv', index=False, header=False)

In [287]:
days.max().max()

1.397

In [288]:
days.min().min()

0.514