In [1]:
%load_ext autoreload
%autoreload 2

import os
import glob
import json
import time
import pickle
import numpy as np
import pandas as pd
import geopy.distance as gd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import MinMaxScaler
import datetime

from scipy import sparse
from collections import Counter
from sklearn.metrics.pairwise import haversine_distances

import helper_methods_for_aggregate_data_analysis as helper
import covid_constants_and_util as cu

Setting numpy cores to 1
Running code on rambo.stanford.edu; at Stanford=True


  import pandas.util.testing as tm
Importing plotly failed. Interactive plots will not work.


# Prep `visits`, `index`, and `cbg_device_counts` for entire US (run this once)

In [2]:
# columns to keep from SafeGraph Weekly Patterns
cols_to_keep_weekly = [
    'safegraph_place_id',
    'visitor_home_cbgs',
    'date_range_start',
    'poi_cbg',
]

# columns to keep from SafeGraph Places
cols_to_keep_places = [
    'safegraph_place_id',
    'city',
    'region',
    'top_category',
    'sub_category',
]

path_1 = os.path.join(cu.CURRENT_DATA_DIR, 'weekly_pre_20200615/main-file/')
path_2 = os.path.join(cu.CURRENT_DATA_DIR, 'weekly_post_20200615/patterns/')
path_3 = os.path.join(cu.CURRENT_DATA_DIR, 'weekly_post_20201130/patterns/')

In [3]:
places = helper.load_core_places_data(cols_to_keep_places)
acs_d = helper.load_and_reconcile_multiple_acs_data()

Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part1.csv.gz
[########################################] | 100% Completed |  9.6s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part2.csv.gz
[########################################] | 100% Completed |  9.7s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part3.csv.gz
[########################################] | 100% Completed |  8.6s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part4.csv.gz
[########################################] | 100% Completed | 10.8s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part5.csv.gz
[########################################] | 100% Completed | 11.8s
Loading core places info for 5333501 POIs
220333 rows of 2018 1-year ACS da

In [4]:
# Construct DF containing data for each week from Weekly Patterns
correct_column_dates = {}
weekly_dfs = []
poi_cbg_sers = []
poi_dwell_sers = []
for path in [path_1, path_2, path_3]: # Loop over Three data collection "periods"
    path_is_pre = 'weekly_pre' in path
    week_paths = sorted(glob.glob(os.path.join(path, '*')) if path_is_pre else glob.glob(os.path.join(path, '*/*/*/*')))
    for week_path in week_paths: # Loop over each week in the data collection period
        week_str = week_path.split('/')[-1][:10] if path_is_pre else '-'.join(week_path.split('/')[-4:-1])
        if (week_str == '2020-12-09') and (path == path_3): # Duplicates in path_2 and path_3
            continue
        print(week_str)
        weekly_df = helper.load_weekly_patterns_v2_data(week_str, cols_to_keep_weekly, expand_hourly_visits=False)        
        correct_column_dates[week_str + '.visitor_home_cbgs'] = weekly_df.iloc[0].date_range_start.split('T')[0] + '.visitor_home_cbgs'
        poi_cbg_sers.append(weekly_df.poi_cbg)  # the CBG of the POI
        poi_dwell_sers.append(weekly_df.median_dwell.rename(week_str+'.median_dwell'))
        weekly_df = weekly_df.drop(columns=['date_range_start', 'poi_cbg', 'median_dwell']).rename({'visitor_home_cbgs': week_str+'.visitor_home_cbgs'}, axis=1)
        weekly_dfs.append(weekly_df)
weekly_df = pd.concat(weekly_dfs, axis=1, sort=False).fillna('{}')

2018-12-31
2019-01-07
2019-01-14
2019-01-21
2019-01-28
2019-02-04
2019-02-11
2019-02-18
2019-02-25
2019-03-04
2019-03-11
2019-03-18
2019-03-25
2019-04-01
2019-04-08
2019-04-15
2019-04-22
2019-04-29
2019-05-06
2019-05-13
2019-05-20
2019-05-27
2019-06-03
2019-06-10
2019-06-17
2019-06-24
2019-07-01
2019-07-08
2019-07-15
2019-07-22
2019-07-29
2019-08-05
2019-08-12
2019-08-19
2019-08-26
2019-09-02
2019-09-09
2019-09-16
2019-09-23
2019-09-30
2019-10-07
2019-10-14
2019-10-21
2019-10-28
2019-11-04
2019-11-11
2019-11-18
2019-11-25
2019-12-02
2019-12-09
2019-12-16
2019-12-23
2019-12-30
2020-01-06
2020-01-13
2020-01-20
2020-01-27
2020-02-03
2020-02-10
2020-02-17
2020-02-24
2020-03-02
2020-03-09
2020-03-16
2020-03-23
2020-03-30
2020-04-06
2020-04-13
2020-04-20
2020-04-27
2020-05-04
2020-05-11
2020-05-18
2020-05-25
2020-06-01
2020-06-08
2020-06-24
2020-07-01
2020-07-08
2020-07-15
2020-07-22
2020-07-29
2020-08-05
2020-08-12
2020-08-19
2020-08-26
2020-09-02
2020-09-09
2020-09-16
2020-09-23
2020-09-30

In [11]:
# Extracting latest CBG registered for POI that is not NaN. POI CBGs are found in the weekly dataset.

def extract_cbg(ser):
    cbg = ser.values
    latest_idx = np.where((~np.isnan(cbg)))[0][-1]
    return cbg[latest_idx]

poi_cbgs = pd.concat(poi_cbg_sers, axis=1, sort=False)
poi_cbgs = poi_cbgs[~poi_cbgs.isna().all(axis=1)]  # keep POIs that are not all NaN for poi_cbg
poi_cbgs = poi_cbgs.apply(extract_cbg, axis=1).rename('poi_cbg')

In [12]:
# Creating index variable
visit_cols = [col for col in correct_column_dates.values() if col.endswith('.visitor_home_cbgs')]
weeks = [visit_col.strip('.visitor_home_cbgs') for visit_col in visit_cols]
pois = places.index[places.index.isin(weekly_df.index)].tolist()
cbgs = acs_d.census_block_group.unique()

In [13]:
# Gather Label and Feature Data in one DF
places = places.join(poi_cbgs, how='inner').loc[pois, :]
weekly_df = weekly_df.rename(correct_column_dates, axis=1).loc[pois, :]
poi_dwell = pd.concat(poi_dwell_sers, axis=1, sort=False).loc[pois,:]
poi_dwell.index.name = 'safegraph_place_id'

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  


In [22]:
# visitor_home_cbgs from SafeGraph are type string, need to transform to dict
for visit_col in visit_cols:
    if type(weekly_df.iloc[0][visit_col]) != Counter:
        weekly_df[visit_col] = weekly_df[visit_col].fillna('{}').map(lambda d: Counter(helper.cast_keys_to_ints(json.loads(d))))

In [23]:
# Constructing CBG -> POI Visits Matrices
cbg2idx = {c:i for i, c in enumerate(cbgs)}
visits = []
total = len(cbgs) * len(pois)
for w, week in enumerate(weeks):
    poi_idx = []
    cbg_idx = []
    visit_data = []
    for p, poi_id in enumerate(pois):
        visits_wp = weekly_df.loc[poi_id, f"{week}.visitor_home_cbgs"]
        for src_cbg, num_visits in visits_wp.items():
            if src_cbg in cbg2idx:
                poi_idx.append(p)
                cbg_idx.append(cbg2idx[src_cbg])
                visit_data.append(num_visits)
    visits.append(sparse.csr_matrix((visit_data, (cbg_idx, poi_idx)), shape=(len(cbgs), len(pois))))
    print('Found %d visits (%.3f%% of matrix)' % (len(cbg_idx), len(cbg_idx) * 100 / total))

In [49]:
# keep all POIs with poi_cbg info
mask = ~places.poi_cbg.isna().to_numpy()
poi_cbgs = places.loc[mask, 'poi_cbg'].astype(int).tolist()
poi_dwell = poi_dwell.loc[mask, :]
visits = [visit[mask, :] for visit in visits]
pois = [poi for poi, m in zip(pois, mask) if m]

with open(os.path.join(cu.PATH_TO_CBG_POI_DATA, 'US/visits.pkl'), 'wb') as f:
    pickle.dump(visits, f, protocol=pickle.HIGHEST_PROTOCOL)

with open(os.path.join(cu.PATH_TO_CBG_POI_DATA, 'US/index.pkl'), 'wb') as f:
    pickle.dump({'weeks':weeks, 'pois':pois, 'cbgs':cbgs, 'poi_cbgs':poi_cbgs}, f, protocol=pickle.HIGHEST_PROTOCOL)
    
poi_dwell.to_csv(os.path.join(cu.PATH_TO_CBG_POI_DATA, 'US/median_dwell.csv'))

In [None]:
# get CBG device counts
results = {'census_block_group':cbgs}
week_order = []
for path in [path_1, path_2, path_3]:
    path_is_pre = 'weekly_pre' in path
    week_paths = sorted(glob.glob(os.path.join(path, '*')) if path_is_pre else glob.glob(os.path.join(path, '*/*/*/*')))
    for week_path in week_paths: # Loop over each week in the data collection period
        week_str = week_path.split('/')[-1][:10] if path_is_pre else '-'.join(week_path.split('/')[-4:-1])
        if (week_str == '2020-12-09') and (path == path_3): # Duplicates in path_2 and path_3
            continue
        home_summary_panel = helper.load_weekly_home_panel_summary(week_str)   
        home_summary_panel = home_summary_panel.set_index('census_block_group')
        home_summary_panel = home_summary_panel.loc[cbgs]
        week_start = home_summary_panel.iloc[0].date_range_start.split('T')[0]
        print(week_str, week_start)
        week_order.append(week_start)
        results[week_start] = home_summary_panel['number_devices_residing'].values

summary_df = pd.DataFrame(results)
summary_df = summary_df.set_index('census_block_group')[week_order]
summary_df = summary_df.fillna(0).astype(int)
fn = os.path.join(cu.PATH_TO_CBG_POI_DATA, 'cbg_device_counts.csv')
summary_df.to_csv(fn)

# Prep `visits`, `index`, and `poi_attrs`  for California

In [17]:
with open(os.path.join(cu.PATH_TO_CBG_POI_DATA, 'US/index.pkl'), 'rb') as f:
    indices = pickle.load(f)
    weeks = np.array(indices['weeks'])
    cbgs = indices['cbgs']
    pois = indices['pois']
    poi_cbgs = indices['poi_cbgs']

with open(os.path.join(cu.PATH_TO_CBG_POI_DATA, 'US/visits.pkl'), 'rb') as f:
    visits = pickle.load(f)

In [18]:
state_of_interest = 'CA'
state_code_of_interest = 6
mask_fn = np.vectorize(lambda cbg: helper.extract_state_code_fr_fips(cbg) == state_code_of_interest)

# Filter CBGs
mask  = mask_fn(cbgs)
cbgs = [cbg for cbg, m in zip(cbgs, mask) if m]
print('%d CBGs found in %s' % (len(cbgs), state_of_interest))
state_visits = [m[mask, :] for m in visits]

# Filter POIs
mask = mask_fn(poi_cbgs)
pois = [poi for poi, m in zip(pois, mask) if m]
print('%d POIs found in %s' % (len(pois), state_of_interest))
poi_cbgs = [poi_cbg for poi_cbg, m in zip(poi_cbgs, mask) if m]
state_visits = [m[:, mask] for m in state_visits]

23212 CBGs found in CA
614847 POIs found in CA


In [4]:
counties = {helper.extract_county_code_fr_fips(c) for c in cbgs}
print('Found %d unique counties' % len(counties))
indices = {'cbgs':np.array(cbgs),
           'pois':np.array(pois),
           'poi_cbgs':np.array(poi_cbgs),
           'counties':np.array(sorted(list(counties))),
           'weeks':np.array(indices['weeks'])}
with open(os.path.join(cu.PATH_TO_CBG_POI_DATA, state_of_interest, 'index.pkl'), 'wb') as f:
    pickle.dump(indices, f)

Found 58 unique counties
[6001 6003 6005 6007 6009 6011 6013 6015 6017 6019 6021 6023 6025 6027
 6029 6031 6033 6035 6037 6039 6041 6043 6045 6047 6049 6051 6053 6055
 6057 6059 6061 6063 6065 6067 6069 6071 6073 6075 6077 6079 6081 6083
 6085 6087 6089 6091 6093 6095 6097 6099 6101 6103 6105 6107 6109 6111
 6113 6115]


In [19]:
cols_to_keep_places = [
    'safegraph_place_id',
    'city',
    'region',
    'top_category',
    'sub_category',
    'latitude',
    'longitude',
    'open_hours'
]
places = helper.load_core_places_data(cols_to_keep_places)
poi_attrs = places.loc[pois, ['sub_category', 'top_category', 'latitude', 'longitude']]

Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part1.csv.gz
[########################################] | 100% Completed | 22.9s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part2.csv.gz
[########################################] | 100% Completed | 15.8s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part3.csv.gz
[########################################] | 100% Completed | 19.1s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part4.csv.gz
[########################################] | 100% Completed | 17.4s
Loading /dfs/project/safegraph-homes/all_aggregate_data/raw_safegraph_data/core_places/2020/10/core_poi-part5.csv.gz
[########################################] | 100% Completed | 16.6s
Loading core places info for 5333501 POIs


In [26]:
poi_areas = pd.read_csv(cu.PATH_TO_SAFEGRAPH_AREAS, index_col='safegraph_place_id')
poi_areas = poi_areas.reindex(pois).area_square_feet
missing = pd.isna(poi_areas).sum()
print('Missing area for %d (%.2f%%) POIs' % (missing, 100. * missing / len(poi_areas)))
poi_attrs['area_square_feet'] = poi_areas.fillna(np.nanmedian(poi_areas)).values

Missing area for 36889 (6.00%) POIs


In [29]:
poi_attrs.to_csv(os.path.join(cu.PATH_TO_CBG_POI_DATA, state_of_interest, 'poi_attrs.csv'))
poi_attrs.head()

Unnamed: 0_level_0,sub_category,top_category,latitude,longitude,area_square_feet
safegraph_place_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
sg:00558d9ab88d4ad9998e3bb43c3f6f60,Insurance Agencies and Brokerages,"Agencies, Brokerages, and Other Insurance Rela...",33.124147,-117.275411,11220.0
sg:008eaaf6ef3a43bea9757af6a79ff239,Commercial Banking,Depository Credit Intermediation,37.736914,-122.198516,27527.0
sg:01ba472a875d402e95738e6a93cd6e2f,Snack and Nonalcoholic Beverage Bars,Restaurants and Other Eating Places,34.019287,-118.455474,5418.0
sg:01bd97e5e4d34ca5a3fa3521e515495a,Religious Organizations,Religious Organizations,33.811085,-118.057272,17667.0
sg:02b54fb3e3a14f7bab06f145c30bddb6,Furniture Stores,Furniture Stores,33.891485,-118.170562,8485.0


# Prep county-level tiers in California

In [5]:
# functions to get constructed Z variable
def get_large_Z_met(case_rate, test_pos, health_equity, vaccine_stage):  # large county
    if vaccine_stage == 0:  # before statewide vaccine equity goal #1 was met
        case_rate -= 7  # above is purple, below is red
    else:  # after vaccine equity goal #1
        case_rate -= 10
    test_pos -= 8  # above is purple, below is red
    health_equity -= 8
    return np.max([case_rate, test_pos, health_equity])

def get_small_Z_met(case_rate, test_pos, vaccine_stage):  # small county
    if vaccine_stage == 0:
        case_rate -= 7  
    else:
        case_rate -= 10
    test_pos -= 8
    return np.max([case_rate, test_pos])

def get_Z_acc(test_pos, health_equity):        
    test_pos -= 5
    health_equity -= 5
    return np.max([test_pos, health_equity])

In [6]:
datestrings = ['012621', '020221', '020921', '021621', '022321', '030221', '031321',
               '031621', '032321', '033021']
all_cases = np.zeros((len(datestrings), 58))
all_test_pos = np.zeros((len(datestrings), 58))
all_health_eq = np.zeros((len(datestrings), 58))
Z_met = np.zeros((len(datestrings), 58))
Z_acc = np.zeros((len(datestrings), 58))
Z_hat = np.zeros((len(datestrings), 58))
Z_fin = np.zeros((len(datestrings), 58))
T = np.zeros((len(datestrings), 58))
population = None
datetimes = []
blueprint_stages = []

for t, ds in enumerate(datestrings):
    dt = datetime.datetime.strptime(ds, '%m%d%y')
    datetimes.append(dt)
    directory = os.path.join(cu.BASE_DIR, 'external_datasets_for_aggregate_analysis/blueprints_cdph')
    fn = os.path.join(directory, 'Blueprint_Data_Chart_%s.xlsx' % ds)
    df = pd.read_excel(fn, header=1)
    df = df.iloc[:58]
    cols = {'tier for week':None, 'previous tier assignment':None, 'final tier assignment':None}
    for orig_col in df.columns:
        for k in cols:
            if k in orig_col.lower():
                cols[k] = orig_col
    cols['population'] = 'Population' if 'Population' in df.columns else 'Population^'
    if ds == '031321':  # special case where we say updated, not final
        cols['final tier assignment'] = 'Updated Tier Assignment for 03-13-21,         03-08-21 Assessment'
    
    population = df[cols['population']].values
    cases = df['Case Rate Used for Tier Adjusted Using Linear Adjustment (7-day avg, 7-day lag)'].values
    all_cases[t] = cases
    tests = df['Test Positivity excl prisons (7-day, 7-day lag)'].values
    all_test_pos[t] = tests
    health_eq = df['Health Equity Quartile Test Positivity Excl Prison Cases (7 day, 7 day lag)'].values
    all_health_eq[t, population >= cu.LARGE_COUNTY_CUTOFF] = health_eq[population >= cu.LARGE_COUNTY_CUTOFF]
    vaccine_stage = int(dt >= datetime.datetime(2021, 3, 12))  # when statewide vaccine equity goal #1 was met
    blueprint_stages.append(vaccine_stage)
    for i in range(58):
        pop = population[i]
        if pop < cu.LARGE_COUNTY_CUTOFF:  # small county rules
            Z_met[t, i] = get_small_Z_met(cases[i], tests[i], vaccine_stage)
        else:
            Z_met[t, i] = get_large_Z_met(cases[i], tests[i], health_eq[i], vaccine_stage)
            Z_acc[t, i] = get_Z_acc(tests[i], health_eq[i])
    
    if t > 0:
        print(dt, dt.weekday())  # all Tuesdays
        Z_met_combined = np.maximum(Z_met[t-1], Z_met[t])
        Z_acc_combined = np.maximum(Z_acc[t-1], Z_acc[t])
        # large counties could meet metrics OR be accelerated
        Z_met_or_acc = np.minimum(Z_met_combined, Z_acc_combined)
        Z_hat[t, population >= cu.LARGE_COUNTY_CUTOFF] = Z_met_or_acc[population >= cu.LARGE_COUNTY_CUTOFF]
        # small counties have no accelerated option
        Z_hat[t, population < cu.LARGE_COUNTY_CUTOFF] = Z_met_combined[population < cu.LARGE_COUNTY_CUTOFF]
        Z_fin[t] = np.min(Z_hat[1:t+1], axis=0)
        
        expected_assignment = (Z_fin[t] < 0).astype(int)
        actual_assignment = df[cols['final tier assignment']].values.astype(int)
        T[t] = actual_assignment
        actual_assignment_bin = (actual_assignment > 1).astype(int)
        diff = expected_assignment - actual_assignment_bin
        total = np.sum(np.abs(diff))
        if total > 0:
            new_df = pd.DataFrame({'county':df.County, 'expected':expected_assignment, 'actual':actual_assignment_bin,
                                   'actual_tier':actual_assignment})
            print(new_df[new_df.expected != new_df.actual])
        else:
            print('all matched!')
    
    if ds == '030221':  # need to rewrite case-related metrics because of change in thresholds
        for i in range(58):
            pop = population[i]
            if pop < cu.LARGE_COUNTY_CUTOFF:
                Z_met[t, i] = get_small_Z_met(cases[i], tests[i], vaccine_stage=1)
            else:
                Z_met[t, i] = get_large_Z_met(cases[i], tests[i], health_eq[i], vaccine_stage=1)

  warn("""Cannot parse header or footer so it will be ignored""")
  warn("""Cannot parse header or footer so it will be ignored""")


2021-02-02 00:00:00 1
      county  expected  actual  actual_tier
21  Mariposa         0       1            2
45   Sierra*         0       1            3
2021-02-09 00:00:00 1
       county  expected  actual  actual_tier
21  Mariposa*         0       1            2
2021-02-16 00:00:00 1
      county  expected  actual  actual_tier
21  Mariposa         0       1            2
2021-02-23 00:00:00 1
all matched!
2021-03-02 00:00:00 1
all matched!
2021-03-13 00:00:00 5
        county  expected  actual  actual_tier
2     Amador**         0       1            2
34  San Benito         0       1            2
48      Sonoma         0       1            2
2021-03-16 00:00:00 1
   county  expected  actual  actual_tier
2  Amador         0       1            2
2021-03-23 00:00:00 1
           county  expected  actual  actual_tier
49  Stanislaus***         0       1            2
2021-03-30 00:00:00 1
        county  expected  actual  actual_tier
49  Stanislaus         0       1            2


In [42]:
bundle = (fips, population, np.array(datetimes[1:]), np.array(blueprint_stages[1:]), T[1:], Z_fin[1:])
for x in bundle:
    print(type(x), x.shape)
fn = os.path.join(cu.PATH_TO_CBG_POI_DATA, 'CA/county_dynamic_attrs_2021_t1t2.pkl')
with open(fn, 'wb') as f:
    pickle.dump(bundle, f)

<class 'numpy.ndarray'> (58,)
<class 'numpy.ndarray'> (58,)
<class 'numpy.ndarray'> (9,)
<class 'numpy.ndarray'> (9,)
<class 'numpy.ndarray'> (9, 58)
<class 'numpy.ndarray'> (9, 58)


In [15]:
# check triggering patterns
Z_met_counts = []
Z_acc_counts = []
Z_hat_counts = []
Z_fin_counts = []
for t in range(1, len(datestrings)):
    dt = datetime.datetime.strptime(datestrings[t], '%m%d%y')
    if dt >= datetime.datetime(2021, 3, 12):
        case_cutoff = 10
    else:
        case_cutoff = 7
    print(dt, case_cutoff)
    for i in range(58):
        expected_assignment = int(Z_fin[t,i] < 0)
        actual_assignment_bin = int(T[t,i] > 1)
        if population[i] >= cu.LARGE_COUNTY_CUTOFF and (expected_assignment == actual_assignment_bin) and (T[t, i] in [1, 2]):
            case_rate_w = all_cases[t,i] - case_cutoff
            test_pos_w = all_test_pos[t,i] - 8
            health_eq_w = all_health_eq[t,i] - 8
            case_rate_w_prev = all_cases[t-1,i] - case_cutoff
            test_pos_w_prev = all_test_pos[t-1,i] - 8
            health_eq_w_prev = all_health_eq[t-1,i] - 8
            inputs = [case_rate_w, test_pos_w, health_eq_w,
                      case_rate_w_prev, test_pos_w_prev, health_eq_w_prev]
            Z_met_counts.append(np.argmax(inputs))
            Z_met_it = np.max(inputs)
            
            test_pos_w = all_test_pos[t,i] - 5
            health_eq_w = all_health_eq[t,i] - 5
            test_pos_w_prev = all_test_pos[t-1,i] - 5
            health_eq_w_prev = all_health_eq[t-1,i] - 5
            inputs = [test_pos_w, health_eq_w, test_pos_w_prev, health_eq_w_prev]
            Z_acc_counts.append(np.argmax(inputs))
            Z_acc_it = np.max(inputs)
            
            inputs = [Z_met_it, Z_acc_it]
            Z_hat_counts.append(np.argmin(inputs))
            Z_hat_it = np.min(inputs)
            assert Z_hat_it == Z_hat[t,i]
            Z_fin_counts.append(int(np.argmin(Z_hat[1:t+1,i]) == (t-1)))  # 0 if prev is min, 1 if curr is min

2021-02-02 00:00:00 7
2021-02-09 00:00:00 7
2021-02-16 00:00:00 7
2021-02-23 00:00:00 7
2021-03-02 00:00:00 7
2021-03-13 00:00:00 10
2021-03-16 00:00:00 10
2021-03-23 00:00:00 10
2021-03-30 00:00:00 10


In [16]:
print('Max over metrics', Counter(Z_met_counts))
print('Max over accelerated', Counter(Z_acc_counts))
print('Min over two rules', Counter(Z_hat_counts))
print('Min over prev and curr', Counter(Z_fin_counts))

Max over metrics Counter({3: 244, 5: 25, 0: 15, 2: 12})
Max over accelerated Counter({3: 231, 1: 42, 2: 19, 0: 4})
Min over two rules Counter({1: 188, 0: 108})
Min over prev and curr Counter({1: 260, 0: 36})
