In [1]:
from nomad.io import base as loader
import pandas as pd
import nomad.visit_attribution as visits
import nomad.visit_attribution.home_attribution as homes
import nomad.filters as filters

tc = dict(
      latitude='lat',
      longitude='lng',
      timestamp='event_timestamp',
      date='date',
      user_id='cuebiq_id'
)

## read in and clean the stop data

In [2]:
m_bases = pd.read_csv("m_bases_cells.csv")

In [3]:
base_stops = loader.from_file("base_stops", format="parquet", traj_cols=tc)
jan_feb_stops = loader.from_file("jan_feb_stops", format="parquet", traj_cols=tc)
feb_apr_stops = loader.from_file("feb_apr_stops", format="parquet", traj_cols=tc)

dfs = [base_stops, jan_feb_stops, feb_apr_stops]

In [4]:
for df in dfs:
    df['h3_cell'] = filters.to_tessellation(df, index="h3", res=8, longitude="lng", latitude="lat")
    df['sPOI'] = df['h3_cell'].isin(m_bases.h3_8)
    # remove h3_cells from sensitive POIs
    df.query("sPOI == False", inplace=True)
    df['start_datetime'] = loader.naive_datetime_from_unix_and_offset(df.event_timestamp, df.tz_offset)
    df['end_datetime'] = loader.naive_datetime_from_unix_and_offset(df.end_timestamp, df.tz_offset)

### load california zip codes

In [5]:
def k_nbr_cells(cells, k):
    return list({nbr for h in cells for nbr in h3.grid_disk(h, k)})

In [6]:
import geopandas as gpd
import h3
zip_codes_ca = gpd.read_file("zip_codes_ca.geojson").set_index("geography_id")
altadena = zip_codes_ca.loc[['US.91001']]
palisades = zip_codes_ca.loc[['US.90272']]

In [7]:
altadena_cells = h3.h3shape_to_cells_experimental(
    h3.geo_to_h3shape(altadena.geometry.unary_union),
    8,
    contain='overlap')
altadena_cells =  k_nbr_cells(altadena_cells, 2)

palisades_cells = h3.h3shape_to_cells_experimental(
    h3.geo_to_h3shape(palisades.geometry.unary_union),
    8,
    contain='overlap')
palisades_cells = k_nbr_cells(palisades_cells, 2)

In [8]:
cell_to_zip = {}
for cell in altadena_cells:
    cell_to_zip[cell] = 'US.91001'
    
for cell in palisades_cells:
    cell_to_zip[cell] = 'US.90272'

## Find baseline home h3 and zip

In [9]:
import nomad.visit_attribution.home_attribution as homes

cand_homes = homes.compute_candidate_homes(base_stops,
                                           datetime="start_datetime",
                                           location_id="h3_cell",
                                           user_id="cuebiq_id",
                                           dawn_hour=8,
                                           dusk_hour=17
                                           )

In [10]:
home_table = homes.select_home(cand_homes, min_days=3, min_weeks=2, stops_table=base_stops, user_id='cuebiq_id', location_id="h3_cell")
home_table['baseline_zip'] = home_table.h3_cell.map(cell_to_zip)
home_table = home_table.rename(columns={"h3_cell":"h3_cell_baseline"}).drop("date_last_active", axis=1)

### jan-feb

In [12]:
cand_homes_jan_feb = homes.compute_candidate_homes(jan_feb_stops,
                                           datetime="start_datetime",
                                           location_id="h3_cell",
                                           user_id="cuebiq_id",
                                           dawn_hour=8,
                                           dusk_hour=17
                                           )


In [13]:
home_table_jan_feb = homes.select_home(cand_homes_jan_feb, min_days=3, min_weeks=2, stops_table=jan_feb_stops, user_id='cuebiq_id', location_id="h3_cell")
homes_jan_feb = home_table_jan_feb.rename(columns={"h3_cell":"h3_cell_jan_feb"}).set_index("cuebiq_id").h3_cell_jan_feb

In [14]:
home_table = home_table.join(homes_jan_feb, on="cuebiq_id")

### feb-apr

In [16]:
cand_homes_feb_apr = homes.compute_candidate_homes(feb_apr_stops,
                                           datetime="start_datetime",
                                           location_id="h3_cell",
                                           user_id="cuebiq_id",
                                           dawn_hour=8,
                                           dusk_hour=17
                                           )

In [17]:
home_table_feb_apr = homes.select_home(cand_homes_feb_apr, min_days=3, min_weeks=2, stops_table=feb_apr_stops, user_id='cuebiq_id', location_id="h3_cell")
homes_feb_apr = home_table_feb_apr.rename(columns={"h3_cell":"h3_cell_feb_apr"}).set_index("cuebiq_id").h3_cell_feb_apr

In [18]:
home_table = home_table.join(homes_feb_apr, on="cuebiq_id")

## persist home tables

In [None]:
home_table = home_table.loc[~home_table.baseline_zip.isna()].copy()
home_table['jan_feb_zip'] = home_table.h3_cell_jan_feb.map(cell_to_zip)
home_table['feb_apr_zip'] = home_table.h3_cell_feb_apr.map(cell_to_zip)

In [25]:
home_table.baseline_zip.value_counts()

baseline_zip
US.91001    109
US.90272     52
Name: count, dtype: int64

In [26]:
home_table.jan_feb_zip.value_counts()

jan_feb_zip
US.91001    29
US.90272    11
Name: count, dtype: int64

In [27]:
home_table.feb_apr_zip.value_counts()

feb_apr_zip
US.91001    33
US.90272    12
Name: count, dtype: int64

In [28]:
home_table[["cuebiq_id","h3_cell_baseline", "h3_cell_jan_feb",
            "h3_cell_feb_apr", "baseline_zip", "jan_feb_zip",
            "feb_apr_zip"]].to_csv("home_table.csv", index=False)