# Data Analysis

## Imports
Import all the libraries needed.

In [42]:
from __future__ import annotations

from typing import Any, Optional

import numpy as np
import pandas as pd

import pickle
import os

from tqdm import tqdm

SIMULATION_TIME = 3600

pd.set_option('display.max_rows', 20)

## Get files
Get at what time the last run was, then automatically open the last run.

In [43]:
## Get last run
runs = tuple(os.walk("results"))[0][1]
runs.sort()
last_run = runs[-1]
last_run

'1651754413'

In [44]:
# Get last run files
files = tuple(os.walk(f"results/{last_run}"))[0][2]
files

['ground_floor_rest.pickle', 'top_floor_rest.pickle', 'base.pickle']

In [45]:
# Open files in DATA
RAW_DATA: dict[str, Any] = {}

for file_name in files:
    with open(f"results/{last_run}/{file_name}", "rb") as f:
        RAW_DATA[file_name.split(".")[0]] = pickle.load(f)

## Convert to analyzable data
All the data is in different numpy matrices, or pandas DataFrames, we combine it into a dataframe s.t. we can get our key indicators.

### Occupancy data

In [46]:
DATA: dict[str, dict[str, Optional[pd.DataFrame]]] = {}

for r_data in RAW_DATA.items():
    strategy_name, d = r_data

    waiting_df: Optional[pd.DataFrame] = None
    elevator_df: Optional[pd.DataFrame] = None
    waiting_df_list_to_add = []
    elevator_df_list_to_add = []
    for iteration, curr_state_history in tqdm(enumerate(d["state_history"])):
        curr_scheduled_events = d["scheduled_events"][iteration]
        waiting_index = pd.MultiIndex.from_product([[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]]).set_names(["arrival", "target"])
        columns = pd.MultiIndex.from_product([[iteration], curr_scheduled_events.index]).set_names(["iteration", "time"])
        add_waiting_df = pd.DataFrame({}, index=waiting_index, columns=columns)
        add_elevator_df = pd.DataFrame({}, index=pd.Index([0, 1, 2, 3, 4, 5], name="target"), columns=columns)

        # print(waiting_df)
        for i, values in enumerate(curr_state_history):
            t = curr_scheduled_events.index[i]
            # display(waiting_df.loc[:, (iteration, t)])
            add_waiting_df.loc[:, (iteration, t)] = pd.DataFrame(values["waiting"]).stack()
            add_elevator_df.loc[:, (iteration, t)] = values["elevator"]
        
        if waiting_df is None:
            waiting_df = add_waiting_df
        else:
            waiting_df_list_to_add.append(add_waiting_df)

        if elevator_df is None:
            elevator_df = add_elevator_df
        else:
            elevator_df_list_to_add.append(add_elevator_df)
    
    waiting_df.join(waiting_df_list_to_add) #type:ignore
    elevator_df.join(elevator_df_list_to_add) # type:ignore
    DATA[str(strategy_name)] = {"waiting": waiting_df, "elevator": elevator_df}

100it [01:22,  1.21it/s]
100it [01:22,  1.22it/s]
100it [01:19,  1.26it/s]


### Event data

In [47]:
event_data_columns = pd.MultiIndex.from_arrays([[], []]).set_names(["strategy", "iteration"])
event_data_index = pd.Index([]).set_names("event_number")
EVENT_DATA: pd.DataFrame = pd.DataFrame({}, columns=event_data_columns, index=event_data_index)

for strategy_name, d in RAW_DATA.items():
    max_length = 0
    max_event_happened = 0
    for iteration, curr_scheduled_events in enumerate(d["scheduled_events"]):
        max_length = max(max_length, curr_scheduled_events.shape[0])
        EVENT_DATA = EVENT_DATA.reindex(range(max_length))
        EVENT_DATA.loc[:, (strategy_name, iteration)] = pd.DataFrame(list(curr_scheduled_events.idxmin(axis=1)))
     
EVENT_DATA

strategy,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,ground_floor_rest,...,base,base,base,base,base,base,base,base,base,base
iteration,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
event_number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0,ElevatorArrival4,ElevatorArrival4,"Arrival2,0","Arrival4,0","Arrival0,5",ElevatorArrival4,ElevatorArrival4,ElevatorArrival4,ElevatorArrival4,"Arrival2,0",...,"Arrival2,0","Arrival5,0","Arrival0,4","Arrival0,4","Arrival0,2","Arrival3,0","Arrival4,0","Arrival0,2","Arrival1,0","Arrival4,0"
1,"Arrival0,3",ElevatorArrival3,ElevatorArrival4,"Arrival0,4",ElevatorArrival4,ElevatorArrival3,ElevatorArrival3,ElevatorArrival3,ElevatorArrival3,ElevatorArrival4,...,ElevatorArrival4,"Arrival3,0",ElevatorArrival4,ElevatorArrival4,ElevatorArrival4,"Arrival0,1",ElevatorArrival4,ElevatorArrival4,"Arrival2,0",ElevatorArrival4
2,ElevatorArrival3,"Arrival0,1",ElevatorArrival3,ElevatorArrival4,ElevatorArrival3,ElevatorArrival2,ElevatorArrival2,ElevatorArrival2,"Arrival0,4",ElevatorArrival3,...,"Arrival4,0",DoorClose,ElevatorArrival3,ElevatorArrival3,ElevatorArrival3,ElevatorArrival4,DoorClose,"Arrival5,3",ElevatorArrival4,"Arrival4,0"
3,ElevatorArrival2,ElevatorArrival2,"Arrival0,5",DoorClose,ElevatorArrival2,ElevatorArrival1,"Arrival0,4",ElevatorArrival1,ElevatorArrival2,ElevatorArrival2,...,ElevatorArrival3,ElevatorArrival4,ElevatorArrival2,"Arrival1,0",ElevatorArrival2,ElevatorArrival3,"Arrival3,5",ElevatorArrival3,ElevatorArrival3,DoorClose
4,ElevatorArrival1,ElevatorArrival1,ElevatorArrival2,ElevatorArrival3,ElevatorArrival1,"Arrival5,2",ElevatorArrival1,ElevatorArrival0,ElevatorArrival1,DoorClose,...,ElevatorArrival2,ElevatorArrival3,"Arrival2,0",ElevatorArrival2,ElevatorArrival1,"Arrival3,0",ElevatorArrival3,ElevatorArrival2,ElevatorArrival2,ElevatorArrival3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
584,,,,,,,,,,,...,,,,,,,,,,
585,,,,,,,,,,,...,,,,,,,,,,
586,,,,,,,,,,,...,,,,,,,,,,
587,,,,,,,,,,,...,,,,,,,,,,


# Key Indicators
We have the following key indicators:
- Average waiting clients (per floor, and in total)
- Average people in the system
- Average travel time (total, and in elevator)
- Average elevator occupancy
- Average elevator movements

## Average elevator movements

**We moeten er nog wel voor zorgen dat de data analyse, de statistische methoden gebruikt die we hebben geleerd.**

In [48]:
elevator_movements = EVENT_DATA.stack().stack().str.contains("Elevator").unstack().unstack().sum().unstack()
mean = elevator_movements.mean(axis=1)
std = elevator_movements.std(axis=1)/np.sqrt(len(elevator_movements.columns))
conf = pd.DataFrame([(mean - std * 1.96).rename("lower"), mean.rename("mean"),(mean + std * 1.96).rename("upper")]).transpose()
conf

Unnamed: 0_level_0,lower,mean,upper
strategy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ground_floor_rest,256.292933,258.16,260.027067
top_floor_rest,255.059452,256.94,258.820548
base,259.435569,261.04,262.644431


## Average waiting clients

Oke de strategie om dit te berekenen gaat als volgt:
1. Verander de index in het verschil tot het getal ervoor
2. Vermenigvuldig dit met de huidige status
3. Sum dat bij elkaar
4. Deel dat door de totale tijd

Dit kan gedaan worden voor de lift en de wachtenden, dat kan je daarna ook nog bij elkaar optellen door het aantal mensen in het systeem te krijgen.

In [49]:
for strategy_name, d in DATA.items():
    print(f"Strategy: {strategy_name}")
    people_waiting_inline_per_floor = d["waiting"].groupby(["arrival"]).sum()
    people_waiting_inline_per_floor.index.set_names("floor", inplace=True)
    t = np.array(people_waiting_inline_per_floor.columns.get_level_values(1))
    t_1 = np.append(people_waiting_inline_per_floor.columns.get_level_values(1)[1:], 0)
    time_active = t_1 - t
    time_active[time_active < 0] = (np.maximum(SIMULATION_TIME - t, 0))[time_active < 0]
    time_active = np.minimum(time_active, SIMULATION_TIME)
    waiting_times_time = people_waiting_inline_per_floor * time_active
    average_people_in_line_per_floor_per_iteration = waiting_times_time.groupby(["iteration"], axis=1).sum()/SIMULATION_TIME
    average_people_in_line_per_floor = average_people_in_line_per_floor_per_iteration.mean(axis=1) # Hier moet dan ook nog een statistische test ofzo
    display(average_people_in_line_per_floor)
    display(f"Total average waiting: {average_people_in_line_per_floor.sum()}")

Strategy: ground_floor_rest


floor
0    1.295561
1    0.300607
2    0.256044
3    0.264464
4    0.167133
5    0.295031
dtype: float64

'Total average waiting: 2.5788413742776366'

Strategy: top_floor_rest


floor
0    1.003260
1    0.198107
2    0.217622
3    0.141782
4    0.215612
5    0.254293
dtype: float64

'Total average waiting: 2.0306757236818687'

Strategy: base


floor
0    0.869303
1    0.233358
2    0.137827
3    0.221153
4    0.209065
5    0.445122
dtype: float64

'Total average waiting: 2.115828451026509'

## Average elevator occupancy

In [50]:
for strategy_name, d in DATA.items():
    print(f"Strategy: {strategy_name}")
    people_in_elevator = d["elevator"].sum().to_frame().transpose()
    t = np.array(people_in_elevator.columns.get_level_values(1))
    t_1 = np.append(people_in_elevator.columns.get_level_values(1)[1:], 0)
    time_active = t_1 - t
    time_active[time_active < 0] = (np.maximum(SIMULATION_TIME - t, 0))[time_active < 0]
    time_active = np.minimum(time_active, SIMULATION_TIME)
    elevator_times_time = people_in_elevator * time_active
    average_people_in_elevator_per_iteration = elevator_times_time.groupby(["iteration"], axis=1).sum()/SIMULATION_TIME
    average_people_in_line_per_floor = float(average_people_in_elevator_per_iteration.mean(axis=1)) # Hier moet dan ook nog een statistische test ofzo
    display(f"Average elevator occupancy: {average_people_in_line_per_floor}")

Strategy: ground_floor_rest


'Average elevator occupancy: 2.989347227488517'

Strategy: top_floor_rest


'Average elevator occupancy: 2.808088465680353'

Strategy: base


'Average elevator occupancy: 2.33761773599632'

## Average travel time
M'n idee om dit te berekenen gaat als volgt:
1. Omdat de gemiddelde tijd dat je ergens moet wachten niet afhankelijk is van hoe laat je er komt, maar hoe lang hij is in de situatie.
2. Dus als ik een manier kan bedenken om te meten hoe lang hij in elke situatie zit, dan kan je daar het gemiddelde van nemen (afhankelijk van hoe lang hij in elke situatie zit).
3. Dit kan je doen voor de wachtenden en de lift, dan kan je daarna dat bij elkaar optellen om de gemiddelde wachttijd te berekenen.

In [51]:
for strategy_name, d in DATA.items():
    print(f"Strategy: {strategy_name}")
    total_people_waiting_at_time = d["waiting"].sum()
    waiting_in_line_change = total_people_waiting_at_time.diff()
    only_waiting_in_line_change = waiting_in_line_change[waiting_in_line_change != 0].dropna()


    determine_time = only_waiting_in_line_change.copy()
    time_diff_list = []
    time_diff_dict = {}
    for (iter, time), v in only_waiting_in_line_change[only_waiting_in_line_change > 0].iteritems():
        for _ in range(int(v)):
            determine_time.sort_index(inplace=True)

            try: # fails for last arrivals, which do not complete their journey thus we ignore them
                _curr_index = determine_time[determine_time < 0].loc[iter].index

                if len(_curr_index) > 0: # Only run if still left
                    time_diff = _curr_index[0] - time
                    time_diff_dict[iter] = time_diff_dict.get(iter, []) + [time_diff]
                    time_diff_list.append(time_diff)
                    determine_time.loc[iter, _curr_index[0]] += 1
            
            except Exception:
                pass
            
            # determine_time[determine_time < 0].loc[iter].iloc[0] += 1
        
    waiting_approx = pd.DataFrame(time_diff_dict)
    waiting_per_iteration = waiting_approx.mean()
    average_waiting = waiting_per_iteration.mean()
    display(f"Average waiting time (s): {average_waiting}")
    

Strategy: ground_floor_rest


'Average waiting time (s): 48.18800383433719'

Strategy: top_floor_rest


'Average waiting time (s): 41.52141269174124'

Strategy: base


'Average waiting time (s): 44.284781533113005'