In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml # If you don't have this package use 'pip install pyyaml' into commandline or terminal
# Matplotlib Config
%matplotlib inline
plt.style.use('fivethirtyeight')
# Turn 'building.yaml' file into a python dictionary using PyYAML 
with open('building.yaml') as f:
    building_data = f.read()
    building_data = yaml.load(building_data)
    f.close()
building = pd.DataFrame(building_data) #building contains only two rows out of which only one(first one) contains actual info
building = pd.DataFrame(building['building1'][0])
    
### Column Names for our dataframe
col_names = ['sec', 
             'agent_id',
             'agent_type',
             'has_luggage',
             'is_disabled',
             'X', 
             'Y',
             'Z',
             'velocity',
             'queue_id',
             'lookUp_X',
             'lookUp_Y']
df1 = pd.read_csv('Agents/thread_0.txt', header=0, names=col_names)
df2 = pd.read_csv('Agents/thread_1.txt', header=0, names=col_names)
df3 = pd.read_csv('Agents/thread_2.txt', header=0, names=col_names)
df4 = pd.read_csv('Agents/thread_3.txt', header=0, names=col_names)
df5 = pd.read_csv('Agents/thread_4.txt', header=0, names=col_names)
df6 = pd.read_csv('Agents/thread_5.txt', header=0, names=col_names)
df7 = pd.read_csv('Agents/thread_6.txt', header=0, names=col_names)
df8 = pd.read_csv('Agents/thread_7.txt', header=0, names=col_names)

frames = [df1,df2,df3,df4,df5,df6,df7,df8]
df = pd.concat(frames)
agents = df[df['Z']>=0] #taking only the points which have positive Z value

In [102]:
agents = agents[['sec','agent_id','agent_type']].drop_duplicates()
print(agents.shape)
agents.head()

(3830209, 3)


Unnamed: 0,sec,agent_id,agent_type
0,70201,0,4
1,70201,8,4
2,70201,16,1
3,70201,24,1
4,70201,32,1


In [103]:
tlimit = (min(agents['sec'].unique()),max(agents['sec'].unique())+1)
tlimit

(70201, 71400)

In [95]:
def split_time(tlimt):
    d = round((tlimt[1]-tlimt[0])/2,1)
    mp = tlimt[0]+d
    l_tlimt = (tlimt[0],mp)
    u_tlimt = (mp,tlimt[1])
    return (l_tlimt,u_tlimt)

#get number of unique agents in the building in that time period and no of unique agents by agent_type
def get_metrics(agents,tlimt):
    df = agents[(agents['sec']>=tlimt[0]) & (agents['sec']<tlimt[1])].drop_duplicates()
    no_of_agents = len(df['agent_id'].unique())
    agents_by_type = {0:0,1:0,2:0,3:0,4:0}
    k = df.groupby('agent_type').agent_id.nunique()
    for i in k.keys():
        agents_by_type[i] = k[i]
    return (no_of_agents,agents_by_type)

In [96]:
get_metrics(agents,tlimit)

(8498, {0: 6874, 1: 463, 2: 1155, 3: 62, 4: 206})

In [97]:
#n is the no of binary splits,eg:n=2 will have 4 equal time periods, n=4 will have 16 eq t.periods &so on
def temporal_split(n,agents,tlimit):
    t_queue = [tlimit]
    temp = []
    for i in range(n):
        for j in t_queue:
            temp.extend((split_time(j)))
        t_queue = temp
        temp = []
    
    df_list = []
    for i in t_queue:
        a,b = get_metrics(agents,i)
        data = {'time_period' : i,
               'no_of_agents' : a,
               'agents_by_type' : b
               }
        df_list.append(data)
    
    return pd.DataFrame(df_list)        

In [98]:
#runs in time O(2^n)
k = temporal_split(8,agents,tlimit)
k.shape

(4, 3)

In [99]:
k

Unnamed: 0,agents_by_type,no_of_agents,time_period
0,"{0: 4018, 1: 446, 2: 705, 3: 57, 4: 196}",5422,"(70201, 70500.8)"
1,"{0: 5216, 1: 294, 2: 854, 3: 45, 4: 196}",6605,"(70500.8, 70800.5)"
2,"{0: 5889, 1: 226, 2: 957, 3: 54, 4: 199}",7325,"(70800.5, 71100.3)"
3,"{0: 5686, 1: 163, 2: 1015, 3: 51, 4: 203}",7118,"(71100.3, 71400)"
