In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [40]:
bedflow_2000 = pd.read_csv('bedflow_2000_cleaned.csv')
print(bedflow_2000.shape)
bedflow_2000.columns

(2234899, 7)


Index(['hospitalid', 'patientid', 'admissionid', 'bedflowid', 'arrivaltime',
       'dismissaltime', 'nhsnunitid'],
      dtype='object')

In [41]:
bedflow_2000.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2234899 entries, 0 to 2234898
Data columns (total 7 columns):
 #   Column         Dtype 
---  ------         ----- 
 0   hospitalid     int64 
 1   patientid      int64 
 2   admissionid    int64 
 3   bedflowid      int64 
 4   arrivaltime    object
 5   dismissaltime  object
 6   nhsnunitid     int64 
dtypes: int64(5), object(2)
memory usage: 119.4+ MB


In [42]:
bf_working_cols = ['patientid', 'admissionid', 'bedflowid', 'arrivaltime', 'dismissaltime', 'nhsnunitid']
bf = bedflow_2000[bf_working_cols].copy()
# bf.head()

In [5]:
bf['arrivaltime'] = pd.to_datetime(bf['arrivaltime'])
bf['dismissaltime'] = pd.to_datetime(bf['dismissaltime'])
bf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2234899 entries, 0 to 2234898
Data columns (total 6 columns):
 #   Column         Dtype         
---  ------         -----         
 0   patientid      int64         
 1   admissionid    int64         
 2   bedflowid      int64         
 3   arrivaltime    datetime64[ns]
 4   dismissaltime  datetime64[ns]
 5   nhsnunitid     int64         
dtypes: datetime64[ns](2), int64(4)
memory usage: 102.3 MB


In [43]:
# sorting so that the nhsnunits are ordered by time of visit
bf_sorted = bf.sort_values(['patientid', 'admissionid', 'bedflowid', 'arrivaltime'], ascending=[True, True, True, True]).reset_index(drop=True).copy()
# bf_sorted.head()

In [44]:
# creating the next unit column by shifting the nhsnunitid
# the nhsnunitid is the current unit
# so basically the transfer is from current unit to next unit
# also creating the prev_unit column to identify the entry units
# when next_unit is null, that is the exit point
# when prev_unit is null that is the entry point
bf_sorted['prev_unit'] = bf_sorted.groupby(['patientid', 'admissionid'])['nhsnunitid'].shift(1).fillna(0)
bf_sorted['next_unit'] = bf_sorted.groupby(['patientid', 'admissionid'])['nhsnunitid'].shift(-1).fillna(0)
# bf_sorted.head(20)
bf_sorted['next_unit'] = bf_sorted['next_unit'].astype(int)
bf_sorted['prev_unit'] = bf_sorted['prev_unit'].astype(int)

# creating two dummy id for entry and exit unit
# id 1000 : entry
# id 1001 : exit
bf_sorted['next_unit'] = bf_sorted['next_unit'].replace(0, 1001)
bf_sorted['prev_unit'] = bf_sorted['prev_unit'].replace(0, 1000)
print(bf_sorted.shape)
bf_sorted.columns

(2234899, 8)


Index(['patientid', 'admissionid', 'bedflowid', 'arrivaltime', 'dismissaltime',
       'nhsnunitid', 'prev_unit', 'next_unit'],
      dtype='object')

In [45]:
# functions for the edgelist calculation

def get_monthly_bedflow(df, year=None, month=None):
    df_filtered = df.copy()
    # if(hid):
    #     df_filtered = df_filtered[(df_filtered['hospitalid'] == hid)]
    if(year):
        df_filtered = df_filtered[(df_filtered['dismissaltime'].dt.year == year)]
    if(month):
        df_filtered = df_filtered[(df_filtered['dismissaltime'].dt.month == month)]

    return df_filtered

def generate_edgelist(bf_month, source, dest, source_rename, dest_rename, self_edge=True):
    
    # edge_list = bf_month.groupby(['nhsnunitid', 'next_unit']).size().reset_index(name='count')
    edge_list = bf_month.groupby([source, dest]).size().reset_index(name='count')
    edge_list.rename(columns={source:source_rename, dest:dest_rename}, inplace=True)
    
    if(self_edge == False):
        print('need to remove self-edge i.e. source and dest are same')
        edge_list = edge_list[edge_list[source_rename] != edge_list[dest_rename]].copy()
    
    return edge_list

def convert_unitid_to_unitname(edgelist, source_col, dest_col, df_unitname, id_col):
    
    edgelist[source_col] = edgelist[source_col].map(df_unitname.set_index(id_col)['nhsnunitname'])
    edgelist[dest_col] = edgelist[dest_col].map(df_unitname.set_index(id_col)['nhsnunitname'])
    
    return edgelist

def save_edgelist(df_edgelist, savepath, file_prefix, year, month):
    
    filename = file_prefix+"_"+str(year)+"_"+str(month)+".csv"    
    if not os.path.isdir(savepath):
            os.makedirs(savepath)
    df_edgelist.to_csv(savepath+filename, index=False)
    print('Saved edge list for year '+str(year)+' and month '+str(month))

In [13]:
units = pd.read_csv('nhsnunit_extended_dason.csv')
unitname = units[['nhsnunitid', 'nhsnunitname']]
print(unitname.shape)
unitname

(96, 2)


Unnamed: 0,nhsnunitid,nhsnunitname
0,1,24-Hour Observation Area
1,3,Administrative Areas
2,4,Adult Step Down Unit (post-critical care)
3,8,Allergy Clinic
4,9,Ambulatory Surgery Center
...,...,...
91,300,Neonatal Critical Care (Level IV)
92,998,Acute Home Care
93,999,Unknown
94,1000,Entry


In [31]:
# calculating and saving edge list

year_list = list(range(2016, 2022)) #[2016, 2017, 2018, 2019, 2020, 2021]
month_list = list(range(1, 13))
source_unit, dest_unit = 'nhsnunitid', 'next_unit'
source_entry, dest_entry_unit = 'prev_unit', 'nhsnunitid'
source_rename, dest_rename = 'source', 'destination'
id_col = 'nhsnunitid'
self_edge = False
# file_prefix = "edgelist_hid2000"
# file_prefix = "edgelist_removing_self_edge_hid2000"
# print(year_list)
# print(month_list)
for year in year_list:
    for month in month_list:
        print('Y-M: '+str(year)+'-'+str(month))
        bf_month = get_monthly_bedflow(bf_sorted, year, month)
        bf_month_entry = bf_month[bf_month['prev_unit'] == 1000].copy()
        edge_list_month = generate_edgelist(bf_month, source_unit, dest_unit, source_rename, dest_rename, self_edge)
        print('edgelist shape without entry: ', edge_list_month.shape)
        edge_list_entry = generate_edgelist(bf_month_entry, source_entry, dest_entry_unit, source_rename, dest_rename, self_edge)
        edge_list_month = pd.concat([edge_list_entry, edge_list_month], axis=0)
        edge_list_month_name_converted = convert_unitid_to_unitname(edge_list_month.copy(), source_rename, dest_rename, unitname, id_col)
        print('edgelist shape with entry: ', edge_list_month.shape)        
        print('bedflow shape for this month: ', bf_month.shape)
        if(self_edge == True):
            folder_name = 'edgelist_hid_2000'
            file_prefix = "edgelist_hid2000"
        else:
            folder_name = 'edgelist_hid_2000_removing_self_edge'
            file_prefix = "edgelist_removing_self_edge_hid2000"
            
        savepath_unitid = './edgelist_hid_2000_unitid/'+folder_name+'/'+str(year)+'/'
        savepath_unitname = './edgelist_hid_2000_unitname/'+folder_name+'/'+str(year)+'/'
        save_edgelist(edge_list_month, savepath_unitid, file_prefix, year, month)        
        save_edgelist(edge_list_month_name_converted, savepath_unitname, file_prefix, year, month)        
        # break
    # break

Y-M: 2016-1
need to remove self-edge i.e. source and dest are same
edgelist shape without entry:  (259, 3)
need to remove self-edge i.e. source and dest are same
edgelist shape with entry:  (288, 3)
bedflow shape for this month:  (28953, 8)
Saved edge list for year 2016 and month 1
Saved edge list for year 2016 and month 1
Y-M: 2016-2
need to remove self-edge i.e. source and dest are same
edgelist shape without entry:  (263, 3)
need to remove self-edge i.e. source and dest are same
edgelist shape with entry:  (293, 3)
bedflow shape for this month:  (28851, 8)
Saved edge list for year 2016 and month 2
Saved edge list for year 2016 and month 2
Y-M: 2016-3
need to remove self-edge i.e. source and dest are same
edgelist shape without entry:  (275, 3)
need to remove self-edge i.e. source and dest are same
edgelist shape with entry:  (304, 3)
bedflow shape for this month:  (32661, 8)
Saved edge list for year 2016 and month 3
Saved edge list for year 2016 and month 3
Y-M: 2016-4
need to remov

In [39]:
# converting edgelist to adjacency matrix
units = np.unique(edge_list_month_name_converted[['source', 'destination']])
adj_mat = edge_list_month_name_converted.pivot(index='source', columns='destination', 
                         values='count').reindex(columns=units, index=units, fill_value=0)
adj_mat.fillna(0, inplace=True)

In [38]:
adj_mat

destination,24-Hour Observation Area,Adult Mixed Acuity Unit,Adult Step Down Unit (post-critical care),Ambulatory Surgery Center,Emergency Department,Endoscopy Suite,Entry,Exit,"Labor, Delivery, Recovery, Postpartum Suite (LDRP)",Medical Cardiac Critical Care,...,Pediatric Medical Ward,Pediatric Medical/Surgical Critical Care,Pediatric Step Down Unit (post-critical care),Pediatric Surgical Ward,Postpartum Ward,Pulmonary Ward,Step down Neonatal Nursery (Level II),Surgical Cardiothoracic Critical Care,Surgical Critical Care,Surgical Ward
source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
24-Hour Observation Area,0.0,0.0,0.0,0.0,0.0,0.0,0.0,357.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0
Adult Mixed Acuity Unit,0.0,0.0,0.0,0.0,0.0,4.0,0.0,96.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0,2.0
Adult Step Down Unit (post-critical care),0.0,0.0,0.0,0.0,0.0,10.0,0.0,103.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,2.0
Ambulatory Surgery Center,0.0,0.0,0.0,0.0,0.0,0.0,0.0,531.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Emergency Department,0.0,88.0,34.0,0.0,0.0,11.0,0.0,4398.0,1.0,46.0,...,119.0,30.0,14.0,19.0,0.0,66.0,0.0,13.0,37.0,176.0
Endoscopy Suite,0.0,4.0,10.0,0.0,10.0,0.0,0.0,11.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,14.0
Entry,110.0,11.0,40.0,531.0,6180.0,11.0,0.0,0.0,481.0,33.0,...,44.0,24.0,22.0,4.0,160.0,30.0,0.0,11.0,5.0,59.0
Exit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Labor, Delivery, Recovery, Postpartum Suite (LDRP)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,69.0,0.0,1.0,...,1.0,0.0,0.0,0.0,410.0,0.0,0.0,0.0,0.0,0.0
Medical Cardiac Critical Care,0.0,3.0,3.0,0.0,0.0,0.0,0.0,38.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
