## airtable_tools_v2

In [1]:
import os, logging
import numpy as np

from tqdm import tqdm
import pandas as pd
from pyairtable import Table
from dotenv import load_dotenv
from datetime import datetime
import pytz

In [2]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
                    datefmt='%m-%d %H:%M:%S',level='INFO')
logger=logging.getLogger(__name__)
logger.setLevel('INFO')

In [3]:
load_dotenv()
api_key=os.getenv('AIRTABLE_API_KEY')
base_id='appDTdyxcShkSR3oF'

## Logs

In [4]:
dst_table_name='[A]Logs'

In [5]:
logger.info('Connecting to Log and Habits table...')
log_table=Table(api_key, base_id, 'Log')
habit_table=Table(api_key, base_id, 'Habits')

logger.info('Connecting to %s table...', dst_table_name)
a=Table(api_key, base_id, dst_table_name)

[08-21 09:41:20] p6837 {2472340047.py:1} INFO - Connecting to Log and Habits table...
[08-21 09:41:20] p6837 {2472340047.py:5} INFO - Connecting to [A]Logs table...


In [6]:
logger.info('Finding Log records with "Log" in the Habit name...')
res=[]
for o in tqdm(log_table.all()):
    if 'fields' not in o: continue
    if 'Habit' not in o['fields']: continue
    if 'Score' not in o['fields']: continue
    if len(o['fields']['Habit'])>0:
        habit=habit_table.get(o['fields']['Habit'][0])
        if 'Log' in habit['fields']['Habit']:
            res.append({'Date':o['fields']['Date'], 'Score':o['fields']['Score'], 'Log': habit['fields']['Habit'].strip('Log: ')})

logger.info('Converting to dataframe and resampling...')
df=pd.DataFrame(res)
df.index=pd.DatetimeIndex(df['Date'])

logger.info('Deleting existing %s data..', dst_table_name)
r=a.all()
ids=[o['id'] for o in r]
res=a.batch_delete(ids)

logger.info('Uploading new analytics data...')
for l in list(df['Log'].unique()):
    dfs=df[df['Log']==l]
    dfs=dfs.sort_index()
    dfr=dfs.resample('D').mean() #Resample
    
    res=[]
    for d in dfr.index:
        if np.isnan(dfr.loc[d]['Score']): continue
        res.append({'Date': str(d.date()),
                 'Score': int(dfr.loc[d]['Score']),
                 'Metric': l})
    res=a.batch_create(res)
logger.info('Completed uploading %i records. ', len(res))


[08-21 09:41:20] p6837 {882991070.py:1} INFO - Finding Log records with "Log" in the Habit name...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1050/1050 [00:13<00:00, 80.37it/s]
[08-21 09:41:37] p6837 {882991070.py:12} INFO - Converting to dataframe and resampling...
[08-21 09:41:37] p6837 {882991070.py:16} INFO - Deleting existing [A]Logs data..
[08-21 09:41:37] p6837 {882991070.py:21} INFO - Uploading new analytics data...
[08-21 09:41:43] p6837 {882991070.py:34} INFO - Completed uploading 76 records. 


## Health Habits

In [7]:
dst_table_name='[A]HealthHabits'
non_weight_volume_mult=50

In [8]:
logger.info('Connecting to Log, Habits, and Goal table...')
log_table=Table(api_key, base_id, 'Log')
habit_table=Table(api_key, base_id, 'Habits')
goal_table=Table(api_key, base_id, 'Goals')

logger.info('Connecting to %s table...', dst_table_name)
a=Table(api_key, base_id, dst_table_name)

[08-21 09:41:43] p6837 {4205170511.py:1} INFO - Connecting to Log, Habits, and Goal table...
[08-21 09:41:43] p6837 {4205170511.py:6} INFO - Connecting to [A]HealthHabits table...


In [9]:
logger.info('Finding Exercise Logs')
res=[]
for o in tqdm(log_table.all()):
    if 'fields' not in o: continue
    if 'Habit' not in o['fields']: continue
    if 'Goal 2' not in o['fields']: continue
    if 'Category' not in o['fields']: continue
    if len(o['fields']['Goal 2'])>0:
        goal=goal_table.get(o['fields']['Goal 2'][0])
        habit=habit_table.get(o['fields']['Habit'][0])['fields']['Habit']
        if 'fields' not in goal: continue
        if 'Goal' not in goal['fields']: continue
        if goal['fields']['Goal']=="Health":
            if 'Reps' in o['fields'] and 'Weight' in o['fields'] and 'Sets' in o['fields']:
                volume=float(o['fields']['Reps'])*float(o['fields']['Sets'])*float(o['fields']['Weight'])
            elif 'Minutes' in o['fields']: 
                volume=non_weight_volume_mult*o['fields']['Minutes']
            else:
                volume=100
                
            if len(o['fields']['Category'])>0: category=o['fields']['Category'][0]
            else: category='Unknown'
            res.append({'Date':o['fields']['Date'], 
                        'Volume':volume, 
                        'Habit':habit,
                        'Category': category})
            
logger.info('Converting to dataframe and resampling...')
df=pd.DataFrame(res)
df.index=pd.DatetimeIndex(df['Date'])

logger.info('Deleting existing %s data..', dst_table_name)
r=a.all()
ids=[o['id'] for o in r]
res=a.batch_delete(ids)

[08-21 09:41:44] p6837 {1923462214.py:1} INFO - Finding Exercise Logs
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1053/1053 [00:36<00:00, 29.18it/s]
[08-21 09:42:23] p6837 {1923462214.py:28} INFO - Converting to dataframe and resampling...
[08-21 09:42:23] p6837 {1923462214.py:32} INFO - Deleting existing [A]HealthHabits data..


In [10]:
df.head()

Unnamed: 0_level_0,Date,Volume,Habit,Category
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-06-29,2022-06-29,500.0,Peloton,Cardio
2022-05-18,2022-05-18,1320.0,Overhead Press,Shoulders
2022-05-30,2022-05-30,3000.0,Bench Press,Chest
2022-05-16,2022-05-16,500.0,Peloton,Cardio
2022-07-08,2022-07-08,100.0,Lunge,Legs


In [11]:
df=df.sort_index()
dfr=df.resample('D').sum() #Resample

In [12]:
# dfr.loc[pd.datetime.strptime('2021-4-20', '%Y-%m-%d')]=0 #Working test case
end_date=pd.datetime.now()
if end_date not in dfr.index: #Do we have data from today?
    dfr.loc[end_date]=0 #Add 0s at todays date
    dfr=dfr.resample('D').sum()

  end_date=pd.datetime.now()


In [13]:
logger.info('Computing cumulative sums by type...')

for s in df['Category'].unique(): dfr['Volume_'+s]=df[df['Category']==s]['Volume'].resample('D').sum()
dfr=dfr.replace(np.NaN, 0)
for s in df['Category'].unique(): dfr['Volume_Cumulative_'+s]=dfr['Volume_'+s].cumsum()

[08-21 09:42:49] p6837 {1019186728.py:1} INFO - Computing cumulative sums by type...


In [14]:
dfr

Unnamed: 0_level_0,Volume,Volume_Chest,Volume_Back,Volume_Arms,Volume_Legs,Volume_Cardio,Volume_Abs,Volume_Shoulders,Volume_Yoga/Stretch,Volume_Cumulative_Chest,Volume_Cumulative_Back,Volume_Cumulative_Arms,Volume_Cumulative_Legs,Volume_Cumulative_Cardio,Volume_Cumulative_Abs,Volume_Cumulative_Shoulders,Volume_Cumulative_Yoga/Stretch
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-05-09,6380.0,2380.0,100.0,1350.0,2050.0,500.0,0.0,0.0,0.0,2380.0,100.0,1350.0,2050.0,500.0,0.0,0.0,0.0
2022-05-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2380.0,100.0,1350.0,2050.0,500.0,0.0,0.0,0.0
2022-05-11,7525.0,100.0,1440.0,960.0,2865.0,500.0,100.0,1560.0,0.0,2480.0,1540.0,2310.0,4915.0,1000.0,100.0,1560.0,0.0
2022-05-12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2480.0,1540.0,2310.0,4915.0,1000.0,100.0,1560.0,0.0
2022-05-13,5870.0,1920.0,280.0,0.0,3120.0,250.0,0.0,300.0,0.0,4400.0,1820.0,2310.0,8035.0,1250.0,100.0,1860.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45000.0,26148.0,22800.0,103065.0,27900.0,600.0,20440.0,6750.0
2022-08-18,8320.0,4060.0,100.0,0.0,3660.0,500.0,0.0,0.0,0.0,49060.0,26248.0,22800.0,106725.0,28400.0,600.0,20440.0,6750.0
2022-08-19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49060.0,26248.0,22800.0,106725.0,28400.0,600.0,20440.0,6750.0
2022-08-20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,49060.0,26248.0,22800.0,106725.0,28400.0,600.0,20440.0,6750.0


In [15]:
logger.info('Deleting old Analytics data..')
r=a.all()
ids=[o['id'] for o in r]
res=a.batch_delete(ids)

[08-21 09:42:49] p6837 {604491207.py:1} INFO - Deleting old Analytics data..


In [16]:
logger.info('Uploading new analytics data...')
l=[]
for s in df['Category'].unique():
    for d in dfr.index:
        l.append({'Date': str(d.date()),
                 'Exercise Volume': dfr.loc[d]['Volume_Cumulative_'+s],
                 'Category': s})
res=a.batch_create(l)

[08-21 09:42:49] p6837 {3056668839.py:1} INFO - Uploading new analytics data...


## General Actions Bro

In [17]:
dst_table_name='[A]Actions'
logger.info('Deleting existing %s data..', dst_table_name)

logger.info('Connecting to %s table...', dst_table_name)
a=Table(api_key, base_id, dst_table_name)
r=a.all()
ids=[o['id'] for o in r]
res=a.batch_delete(ids)

[08-21 09:43:20] p6837 {3412329302.py:2} INFO - Deleting existing [A]Actions data..
[08-21 09:43:20] p6837 {3412329302.py:4} INFO - Connecting to [A]Actions table...


In [18]:
logger.info('Connecting to Log, Habits, and Goal table...')
log_table=Table(api_key, base_id, 'Log')
habit_table=Table(api_key, base_id, 'Habits')
goal_table=Table(api_key, base_id, 'Goals')

[08-21 09:43:57] p6837 {823660372.py:1} INFO - Connecting to Log, Habits, and Goal table...


In [19]:
res=[]
for o in tqdm(log_table.all()):
    if 'fields' not in o: continue
    if 'Action' not in o['fields']: continue
    if 'Goal' not in o['fields']: continue
    res.append({'Date': o['fields']['Date'], 
                'Action': o['fields']['Action'],
                'Goal': o['fields']['Goal']})

logger.info('Converting to dataframe and resampling...')
df=pd.DataFrame(res)
df.index=pd.DatetimeIndex(df['Date'])
df=df.sort_index()

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1056/1056 [00:00<00:00, 440233.08it/s]
[08-21 09:44:00] p6837 {2819157270.py:10} INFO - Converting to dataframe and resampling...


In [20]:
for g in df['Goal'].unique():
    df['Goal_'+g]=(df['Goal']==g)
dfr=df.resample('D').sum()
dfr=dfr.replace(np.NaN, 0)
for s in df['Goal'].unique(): dfr['Goal_Cumulative_'+s]=dfr['Goal_'+s].cumsum()

In [21]:
dfr.head()

Unnamed: 0_level_0,Goal_Invisible: General,Goal_Productivity,Goal_Chores,Goal_Personal Development,Goal_Ivy,Goal_Birthdays,Goal_Finance - $2.5M,Goal_Fun,Goal_House,Goal_Giving,...,Goal_Cumulative_Chores,Goal_Cumulative_Personal Development,Goal_Cumulative_Ivy,Goal_Cumulative_Birthdays,Goal_Cumulative_Finance - $2.5M,Goal_Cumulative_Fun,Goal_Cumulative_House,Goal_Cumulative_Giving,Goal_Cumulative_Health,Goal_Cumulative_Veritasium: Analog
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-05-09,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2022-05-10,11,1,1,1,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
2022-05-11,7,0,8,0,1,0,0,0,0,0,...,9,1,1,0,0,0,0,0,0,0
2022-05-12,2,0,0,0,0,2,1,0,0,0,...,9,1,1,2,1,0,0,0,0,0
2022-05-13,2,0,1,0,1,0,0,0,0,0,...,10,1,2,2,1,0,0,0,0,0


In [22]:
logger.info('Uploading new actions data...')
l=[]
for s in df['Goal'].unique():
    for d in dfr.index:
        l.append({'Date': str(d.date()),
                  'Actions Completed': int(dfr.loc[d]['Goal_'+s]),
                  'Actions Completed Cumulative': int(dfr.loc[d]['Goal_Cumulative_'+s]),
                  'Goal': s})
res=a.batch_create(l)

[08-21 09:44:00] p6837 {3245747300.py:1} INFO - Uploading new actions data...


## Goal/Habit Tracking
- Can I make this super general so I can visualize any goal over time?
- Pausing here for now -> key pieces are deployed, can contineue to improve over time, but let's get this thing live!!

In [23]:
dst_table_name='[A]Habits'

In [24]:
logger.info('Connecting to Log, Habits, and Goal table...')
log_table=Table(api_key, base_id, 'Log')
habit_table=Table(api_key, base_id, 'Habits')
goal_table=Table(api_key, base_id, 'Goals')

logger.info('Connecting to %s table...', dst_table_name)
a=Table(api_key, base_id, dst_table_name)

[08-21 09:44:49] p6837 {4205170511.py:1} INFO - Connecting to Log, Habits, and Goal table...
[08-21 09:44:49] p6837 {4205170511.py:6} INFO - Connecting to [A]Habits table...


In [25]:
logger.info('Finding Personal Development Logs without "Log" in name')
res=[]
for o in tqdm(log_table.all()):
    if 'fields' not in o: continue
    if 'Habit' not in o['fields']: continue
    if 'Goal 2' not in o['fields']: continue
    if len(o['fields']['Goal 2'])>0:
        goal=goal_table.get(o['fields']['Goal 2'][0])
        habit=habit_table.get(o['fields']['Habit'][0])
        if 'fields' not in goal: continue
        if 'Goal' not in goal['fields']: continue
        if 'Log' in habit['fields']['Habit']: continue
        d={'Date':o['fields']['Date'], 'Habit': habit['fields']['Habit'], 'Goal':goal['fields']['Goal']}
        if 'Minutes' in o['fields']: d['Minutes']=o['fields']['Minutes']
        res.append(d)

[08-21 09:44:49] p6837 {751767185.py:1} INFO - Finding Personal Development Logs without "Log" in name
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 1056/1056 [01:12<00:00, 14.49it/s]


In [26]:
logger.info('Converting to dataframe and resampling...')
df=pd.DataFrame(res)
df.index=pd.DatetimeIndex(df['Date'])
df=df.sort_index()
dfr=df.resample('D').sum() #Resample

logger.info('Deleting existing %s data..', dst_table_name)
r=a.all()
ids=[o['id'] for o in r]
res=a.batch_delete(ids)

[08-21 09:46:05] p6837 {3975660076.py:1} INFO - Converting to dataframe and resampling...
[08-21 09:46:05] p6837 {3975660076.py:7} INFO - Deleting existing [A]Habits data..


In [27]:
end_date=pd.datetime.now()
if end_date not in dfr.index: #Do we have data from today?
    dfr.loc[end_date]=0 #Add 0s at todays date
    dfr=dfr.resample('D').sum()

  end_date=pd.datetime.now()


In [28]:
logger.info('Computing cumulative sums by type...')

for s in df['Habit'].unique(): dfr['Minutes_'+s]=df[df['Habit']==s]['Minutes'].resample('D').sum()
dfr=dfr.replace(np.NaN, 0)
for s in df['Habit'].unique(): dfr['Minutes_Cumulative_'+s]=dfr['Minutes_'+s].cumsum()

[08-21 09:47:59] p6837 {3856466161.py:1} INFO - Computing cumulative sums by type...


In [29]:
logger.info('Uploading new analytics data...')
l=[]
for s in df['Habit'].unique():
    for d in dfr.index:
        l.append({'Date': str(d.date()),
                  'Goal': df[df['Habit']==s]['Goal'].iloc[0],
                  'Minutes': dfr.loc[d]['Minutes_'+s],
                  'Minutes Cumulative': dfr.loc[d]['Minutes_Cumulative_'+s],
                  'Habit': s})
res=a.batch_create(l)

[08-21 09:47:59] p6837 {1596327494.py:1} INFO - Uploading new analytics data...


## References

In [30]:
dst_table_name='[A]References'

In [31]:
logger.info('Connecting to Log, Habits, and Goal table...')
references_table=Table(api_key, base_id, 'References')

logger.info('Connecting to %s table...', dst_table_name)
a=Table(api_key, base_id, dst_table_name)

[08-21 09:50:17] p6837 {1003154639.py:1} INFO - Connecting to Log, Habits, and Goal table...
[08-21 09:50:17] p6837 {1003154639.py:4} INFO - Connecting to [A]References table...


In [33]:
logger.info('Deleting existing %s data..', dst_table_name)
r=a.all()
ids=[o['id'] for o in r]
res=a.batch_delete(ids)

[08-21 10:00:46] p6837 {1240893064.py:1} INFO - Deleting existing [A]References data..


In [34]:
logger.info('Finding references data...')
res=[]
for o in tqdm(references_table.all()):
    res.append({'Date':o['createdTime'], 'Refs':1})

[08-21 10:00:56] p6837 {3809918773.py:1} INFO - Finding references data...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 223/223 [00:00<00:00, 616565.45it/s]


In [35]:
df=pd.DataFrame(res)
df.index=pd.DatetimeIndex(df['Date'])
df=df.sort_index()
dfr=df.resample('D').sum() #Resample

In [36]:
end_date=pd.datetime.now(tz=pytz.timezone('UTC'))
if end_date not in dfr.index: #Do we have data from today?
    dfr.loc[end_date]=0 #Add 0s at todays date
    dfr=dfr.resample('D').sum()

  end_date=pd.datetime.now(tz=pytz.timezone('UTC'))


In [37]:
dfr['Refs Cumulative']=dfr['Refs'].cumsum()
dfr['Date']=[str(dfr.index[i].date()) for i in range(len(dfr))]

In [38]:
res=a.batch_create(dfr.to_dict(orient='Records'))