In [2]:
import pandas as pd
import numpy as np

# Load data
blocks = pd.read_csv('..//data//census//tl_2010_06075_tabblock10.csv')
caseid_block = pd.read_csv('..//data//created//all_blocks.csv')
cases = pd.read_csv('..//data//created//311_cases_clean.csv')

# Create the date range
date_range_use = pd.date_range('2016-05-01','2018-05-01', freq = '12H')

# Create the columns
date_col = list(date_range_use)*len(blocks['GEOID10'].unique())
block_col = sorted(list(blocks['GEOID10'].unique())*len(date_range_use))

# Create the empty matrix
empty_matrix = pd.DataFrame({'Opened_rnd':date_col,
                             'block_fips':block_col})

# Get the hour
cases['hour'] = cases['Opened'].apply(lambda x:int(x[11:13]))

def bin_hours_12(row):
    """
    will group hours together in order to have bins instead of the raw timestamp
    ::input: datetime object
    ::output: datetime object rounded to the nearest 12 hour mark
    """
    if row['hour'] < 12:
        return '0' 
    else:
        return '12'

# Insert the rounded hour back into the dataframe
cases['bin_hour'] = cases.apply(bin_hours_12, axis=1)
cases['new_date'] = cases['Opened_rnd'].apply(lambda x:x[0:10])
cases['Opened_rnd'] = cases['new_date'] + ' '+ cases['bin_hour'] + ':00:00'
cases['Opened_rnd'] = pd.to_datetime(cases['Opened_rnd'])

# Select Case ID and Block ID
caseid_block = caseid_block.loc[:,[
    
    'CaseID',
    'stfid',
]]
caseid_block.columns = ['CaseID','block_fips']

cases = cases.loc[:,[
    
    'CaseID',
    'Opened_rnd',
]]

# Join tables together
case_block_date = cases.merge(caseid_block, on = 'CaseID')
model_data = empty_matrix.merge(case_block_date,
                                on = ['block_fips','Opened_rnd'],
                                how = 'left')

# Create target feature
model_data['poop'] = np.where(model_data['CaseID'].notnull(), 1, 0)
model_data = model_data.reset_index(drop = True)

# Save to hdf
model_data.to_hdf('..//data//model//model_data_12H.h5', key = 'xyz', complib = 'blosc')

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)
