In [1]:
import pandas as pd
import hillmaker as hm
from pandas import DataFrame
from pandas import Timestamp
from datetime import datetime
from datetime import timedelta
from pandas.tseries.offsets import Minute

In [2]:
import itertools

In [3]:
from hillmaker import bydatetime

In [4]:
help(bydatetime.make_bydatetime)

Help on function make_bydatetime in module hillmaker.bydatetime:

make_bydatetime(stops_df, infield, outfield, start_analysis, end_analysis, catfield, total_str='Total', bin_size_minutes=60, cat_to_exclude=None, totals=True, verbose=0)
    Create bydatetime table based on user inputs.
    
    This is the table from which summary statistics can be computed.
    
    Parameters
    ----------
    stops_df: DataFrame
        Stop data
    
    infield: string
        Name of column in stops_df to use as arrival datetime
    
    outfield: string
        Name of column in stops_df to use as departure datetime
    
    start_analysis: datetime
        Start date for the analysis
    
    end_analysis: datetime
        End date for the analysis
    
    catfield : string or List of strings, optional
        Column name(s) corresponding to the categories. If none is specified, then only overall occupancy is analyzed.
    
    total_str: string, default 'Total'
        Value to use for the to

In [8]:
file_stopdata = '../data/ShortStay2.csv'

scenario = 'iss17_test1'
in_fld_name = 'InRoomTS'
out_fld_name = 'OutRoomTS'
cat_fld_name = 'PatType'
start_analysis_dt = '1/1/1996'
end_analysis_dt = '3/30/1996 23:45'

# Optional inputs
tot_fld_name = 'SSU'
bin_size_minutes = 60

In [9]:
stops_df = pd.read_csv(file_stopdata, parse_dates=[in_fld_name, out_fld_name])

In [10]:
stops_df.tail(4)

Unnamed: 0,PatID,InRoomTS,OutRoomTS,PatType,Severity
59873,59874,1996-09-30 20:23:00,1996-09-30 21:30:00,IVT,2
59874,59875,1996-09-30 21:00:00,1996-09-30 22:45:00,CAT,1
59875,59876,1996-09-30 21:57:00,1996-09-30 22:40:00,IVT,2
59876,59877,1996-09-30 22:45:00,1996-09-30 23:35:00,CAT,1


In [11]:
catfield = ['PatType','Severity']

In [12]:
catfield[1]

'Severity'

In [13]:
len(catfield)

2

In [11]:
range(len(catfield))

range(0, 2)

In [32]:
for i in range(len(catfield)):
    print(i)

0
1


In [33]:
cats = []
cat_to_exclude = [[],[]]
for i in range(len(catfield)):
    cats.append(tuple([c for c in stops_df[catfield[i]].unique() if c not in cat_to_exclude[i]]))

In [34]:
cats

[('IVT', 'MYE', 'CAT', 'ART', 'OTH'), (1, 2)]

In [14]:
categories = []
cat_to_exclude = [[],[]]
if cat_to_exclude is not None:
        for i in range(len(catfield)):
            categories.append(tuple([c for c in stops_df[catfield[i]].unique() if c not in cat_to_exclude[i]]))
else:
        for i in range(len(catfield)):
            categories.append(tuple([c for c in stops_df[catfield[i]].unique()]))


In [15]:
categories

[('IVT', 'MYE', 'CAT', 'ART', 'OTH'), (1, 2)]

In [15]:
n_catfields = len(catfield)

In [16]:
cols = []
measures = ['datetime', 'arrivals', 'departures', 'occupancy']
for cat in catfield:
    cols.append(cat)
cols.extend(measures)
cols

['PatType', 'Severity', 'datetime', 'arrivals', 'departures', 'occupancy']

In [17]:
# Create a list of column names for the by datetime table and then an empty data frame based on these columns.

bydt_df = pd.DataFrame(columns=cols)

In [44]:
bydt_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 6 columns):
PatType       0 non-null object
Severity      0 non-null object
datetime      0 non-null object
arrivals      0 non-null object
departures    0 non-null object
occupancy     0 non-null object
dtypes: object(6)
memory usage: 0.0+ bytes


In [18]:
for z in zip([c for c in categories]):
    print(z)

(('IVT', 'MYE', 'CAT', 'ART', 'OTH'),)
((1, 2),)


In [51]:
itertools.product(('IVT', 'MYE', 'CAT', 'ART', 'OTH'),(1,2))

<itertools.product at 0x7f3f8fff9d38>

In [56]:
for p in itertools.product(['IVT', 'MYE', 'CAT', 'ART', 'OTH'],[1,2]):
    print(p)

('IVT', 1)
('IVT', 2)
('MYE', 1)
('MYE', 2)
('CAT', 1)
('CAT', 2)
('ART', 1)
('ART', 2)
('OTH', 1)
('OTH', 2)


In [21]:
for p in itertools.product(*categories):
    print(p)
    print(*p)

('IVT', 1)
IVT 1
('IVT', 2)
IVT 2
('MYE', 1)
MYE 1
('MYE', 2)
MYE 2
('CAT', 1)
CAT 1
('CAT', 2)
CAT 2
('ART', 1)
ART 1
('ART', 2)
ART 2
('OTH', 1)
OTH 1
('OTH', 2)
OTH 2


In [23]:
print(*categories)

('IVT', 'MYE', 'CAT', 'ART', 'OTH') (1, 2)


In [36]:
for p in itertools.product(('IVT', 'MYE', 'CAT', 'ART', 'OTH'),(1,2)):
    print([*p],p[0],p[1])

['IVT', 1] IVT 1
['IVT', 2] IVT 2
['MYE', 1] MYE 1
['MYE', 2] MYE 2
['CAT', 1] CAT 1
['CAT', 2] CAT 2
['ART', 1] ART 1
['ART', 2] ART 2
['OTH', 1] OTH 1
['OTH', 2] OTH 2


In [19]:
df1 = pd.DataFrame({'B': ['B0', 'B1', 'B2', 'B3'],
   ...:                     'D': ['D0', 'D1', 'D2', 'D3']},
   ...:                     index=[0, 1, 2, 3])

In [19]:
df1 = pd.DataFrame({'B': ['B0', 'B1', 'B2', 'B3'],
   ...:                     'D': ['D0', 'D1', 'D2', 'D3']},
   ...:                     index=[0, 1, 2, 3])

In [20]:
df1

Unnamed: 0,B,D
0,B0,D0
1,B1,D1
2,B2,D2
3,B3,D3


In [21]:
df2 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
   ...:                     'C': ['C0', 'C1', 'C2', 'C3']},               
   ...:                     index=[0, 1, 2, 3])

In [22]:
df2

Unnamed: 0,A,C
0,A0,C0
1,A1,C1
2,A2,C2
3,A3,C3


In [23]:
pd.concat([df1,df2],axis=1)

Unnamed: 0,B,D,A,C
0,B0,D0,A0,C0
1,B1,D1,A1,C1
2,B2,D2,A2,C2
3,B3,D3,A3,C3


In [32]:
print(catfield)

['PatType', 'Severity']


In [31]:
rng_bydt = pd.date_range(start_analysis_dt, end_analysis_dt, freq=Minute(bin_size_minutes))

In [39]:
itertools.product(*categories)

<itertools.product at 0x7fb68b660c60>

In [None]:
len_bydt = len(rng_bydt)
for p in itertools.product(*categories):
        i=0
        cat_df = DataFrame(columns=catfield)
        for c in [*p]:
            print(c)
            bydt_catdata = {catfield[i]: [c] * len_bydt}
            cat_df_cat = DataFrame(bydt_catdata, columns=[catfield[i]])
            cat_df_cat.info()
            #cat_df = pd.concat([cat_df, cat_df_cat],axis=1)
            i+=1

In [35]:
cat_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2160 entries, 0 to 2159
Data columns (total 4 columns):
PatType                      0 non-null object
Severity                     0 non-null object
(IVT, MYE, CAT, ART, OTH)    2160 non-null object
(1, 2)                       2160 non-null int64
dtypes: int64(1), object(3)
memory usage: 84.4+ KB
