# Data Collection

### Automate API Requests for Collecting Data

In [None]:
import json
import requests
from dotenv import load_dotenv
from  os import getenv

load_dotenv()
api = getenv('BREAKDOWN_API')

with open('dates-7007.json', 'r') as f:
    dates = json.load(f)

for date in dates['date']:
    url = api + 'primemover_id=7007&date=' + date

    response = requests.get(url)

    file_name = './avg-7007/' + date + '.json'

    with open(file_name, 'w') as f:
        f.write(json.dumps(response.json(), indent=4))

In [None]:
import json
import requests
from dotenv import load_dotenv
from  os import getenv

load_dotenv()
api = getenv('BREAKDOWN_API')

with open('dates-7008.json', 'r') as f:
    dates = json.load(f)

for date in dates['date']:
    url = api + 'primemover_id=7008&date=' + date

    response = requests.get(url)

    file_name = './avg-7008/' + date + '.json'

    with open(file_name, 'w') as f:
        f.write(json.dumps(response.json(), indent=4))

In [None]:
import json
import requests
from dotenv import load_dotenv
from  os import getenv

load_dotenv()
api = getenv('SUBACT_API')

with open('dates-7007.json', 'r') as f:
    dates = json.load(f)

for date in dates['date']:
    subact = ["Travel Empty", "Stopped Empty", "Loading", "Travel Loaded", "Stopped Loaded", "Dumping", "Nett Cycle Time"]
    
    for index, act in enumerate(subact):
        url = api + 'primemover_id=7007&date=' + date + '&sub_activity=' + act
        response = requests.get(url)
        
        new_key = act.lower().replace(' ', '_')
        if index == 0:
            data = response.json()
            data[new_key] = data.pop('cycle_times')
        else:
            data[new_key] = response.json()['cycle_times']

    file_name = './cycle-7007/' + date + '.json'

    with open(file_name, 'w') as f:
        f.write(json.dumps(data, indent=4))

In [None]:
import json
import requests
from dotenv import load_dotenv
from  os import getenv

load_dotenv()
api = getenv('SUBACT_API')

with open('dates-7008.json', 'r') as f:
    dates = json.load(f)

for date in dates['date']:
    subact = ["Travel Empty", "Stopped Empty", "Loading", "Travel Loaded", "Stopped Loaded", "Dumping", "Nett Cycle Time"]
    
    for index, act in enumerate(subact):
        url = api + 'primemover_id=7008&date=' + date + '&sub_activity=' + act
        response = requests.get(url)
        
        new_key = act.lower().replace(' ', '_')
        if index == 0:
            data = response.json()
            data[new_key] = data.pop('cycle_times')
        else:
            data[new_key] = response.json()['cycle_times']

    file_name = './cycle-7008/' + date + '.json'

    with open(file_name, 'w') as f:
        f.write(json.dumps(data, indent=4))

### Json to Data Frames to CSV

In [None]:
import json
import pandas as pd
import os

avg = {}

directories = ['./avg-7007', './avg-7008']
for directory in directories:
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)

        with open(filepath, 'r') as f:
            data = json.load(f)
            
        for key in data.keys():
            if key not in avg.keys():
                avg[key] = []
            
            avg[key].append(data[key])

df_avg = pd.DataFrame.from_dict(avg)
df_avg.sort_values('date', ignore_index=True, inplace=True)

df_avg.to_csv('avg.csv', index=False)

In [None]:
import json
import pandas as pd
import os

cycle = {}

directories = ['./cycle-7007', './cycle-7008']
for directory in directories:
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)

        with open(filepath, 'r') as f:
            data = json.load(f)
        
        data.pop('sub_activity')
        
        for index, time in enumerate(data['time']):
            for key in data.keys():
                if key not in cycle.keys():
                    cycle[key] = []
                    
                if key in ['date', 'primemover_id']:
                    cycle[key].append(data[key])
                else:
                    cycle[key].append(data[key][index])

df_cycle = pd.DataFrame.from_dict(cycle)
df_cycle.sort_values(['date', 'time'], ignore_index=True, inplace=True)

df_cycle.to_csv('cycle.csv', index=False)

# Interactive Plots using Plotly

In [10]:
import plotly.graph_objects as go
from ipywidgets import widgets
import pandas as pd
import numpy as np

df_avg = pd.read_csv('avg.csv')
df_avg['date'] = pd.to_datetime(df_avg['date'], format="%Y-%m-%d").dt.date

################ starting values ################ 
starting_date = max(df_avg['date'])
starting_id = min(df_avg['primemover_id'].unique())
mask = (df_avg['date'] == starting_date) & (df_avg['primemover_id'] == starting_id)
cycle_count = str(df_avg.cycle_count.loc[mask].values[0])

x = ["Travel Empty", "Stopped Empty", "Loading", "Travel Loaded", "Stopped Loaded", "Dumping", "Nett Cycle Time"]
y = [df_avg['travel_empty_avg'].loc[mask].values[0], df_avg['stopped_empty_avg'].loc[mask].values[0], 
    df_avg['loading_avg'].loc[mask].values[0], df_avg['travel_loaded_avg'].loc[mask].values[0], df_avg['stopped_loaded_avg'].loc[mask].values[0],
    df_avg['dumping_avg'].loc[mask].values[0], df_avg['nett_cycle_time_avg'].loc[mask].values[0]]

################ widgets ################
datepicker_widget = widgets.DatePicker(
    description='Date:',
    disabled=False,
    value=starting_date
)

dropdown_widget = widgets.Dropdown(
    options=df_avg['primemover_id'].unique(),
    value=starting_id,
    description='ID:',
)

################ plot graph ################
fig = go.FigureWidget(go.Waterfall(
    x = x,
    measure = ["relative", "relative", "relative", "relative", "relative", "relative", "total"],
    y = y, 
    base = 0,
    decreasing = {"marker":{"color":"Maroon", "line":{"color":"red", "width":2}}},
    increasing = {"marker":{"color":"Teal"}},
    totals = {"marker":{"color":"deep sky blue", "line":{"color":"blue", "width":3}}},
    text = y,
    textposition='auto'
))

fig.update_layout(
    title = "<b>Average Nett Cycle Time Breakdown</b>" + "<br>" + "Total No. of Cycles: " + cycle_count,
    yaxis_title="Minutes",
    waterfallgap = 0.3, 
    height=600
)

################ update graph ################
def update_dataset(change):
    new_date = datepicker_widget.value
    new_id = dropdown_widget.value
    new_mask = (df_avg['date'] == new_date) & (df_avg['primemover_id'] == new_id)
    
    if new_date in df_avg.loc[df_avg['primemover_id'] == new_id]['date'].unique():
        y = [df_avg['travel_empty_avg'].loc[new_mask].values[0], df_avg['stopped_empty_avg'].loc[new_mask].values[0], 
            df_avg['loading_avg'].loc[new_mask].values[0], df_avg['travel_loaded_avg'].loc[new_mask].values[0], df_avg['stopped_loaded_avg'].loc[new_mask].values[0],
            df_avg['dumping_avg'].loc[new_mask].values[0], df_avg['nett_cycle_time_avg'].loc[new_mask].values[0]]
        cycle_count = str(df_avg.cycle_count.loc[new_mask].values[0])
    
    else:
        y = np.zeros(7)
        cycle_count = "None"
    
    with fig.batch_update():
        fig.data[0].y = y
        fig.data[0].text = y
        fig.layout.title = "<b>Average Nett Cycle Time Breakdown</b>" + "<br>" + "Total No. of Cycles: " + cycle_count

datepicker_widget.observe(update_dataset, names="value")
dropdown_widget.observe(update_dataset, names="value")

widgets.VBox([widgets.HBox([datepicker_widget, dropdown_widget]), fig])

VBox(children=(HBox(children=(DatePicker(value=datetime.date(2023, 3, 31), description='Date:', step=1), Dropd…

In [11]:
import plotly.graph_objects as go
from ipywidgets import widgets
import pandas as pd
import numpy as np

df_cycle = pd.read_csv('cycle.csv')
df_cycle['date'] = pd.to_datetime(df_cycle['date'], format="%Y-%m-%d").dt.date

################ starting values ################ 
starting_date = max(df_cycle['date'])
starting_id = min(df_cycle['primemover_id'].unique())
mask_cycle = (df_cycle['date'] == starting_date) & (df_cycle['primemover_id'] == starting_id)

y = df_cycle['nett_cycle_time'].loc[mask_cycle]
x = df_cycle['time'].loc[mask_cycle]
threshold = 60

colors = pd.Series([],dtype=pd.StringDtype())
for index, nett_cycle_time in enumerate(y):
    if nett_cycle_time < threshold:
        colors[index] = 'green'
    else:
        colors[index] = 'red'

################ widgets ################
datepicker_widget = widgets.DatePicker(
    description='Date:',
    disabled=False,
    value=starting_date
)

dropdown_id_widget = widgets.Dropdown(
    description='ID:',
    value=starting_id,
    options=df_cycle['primemover_id'].unique()
)

dropdown_subact_widget = widgets.Dropdown(
    description='Sub-activity:',
    value="Nett Cycle Time",
    options=["Nett Cycle Time","Travel Empty", "Stopped Empty", "Loading", "Travel Loaded", "Stopped Loaded", "Dumping"]
)

################ plot graph ################
fig = go.FigureWidget(go.Bar(
    x=x, 
    y=y,  
    marker_color=colors, 
    text=[str(value) for value in y],
    textposition='auto'
))

fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 1,
        dtick = 1,
    ),
    margin=dict(r=150),
    title = "<b>Nett Cycle Time</b>",
    xaxis_title="Cycle Start Time", 
    yaxis_title="Minutes",
)

annotation = "Threshold = " + str(threshold) + " mins"
fig.add_hline(
    y=threshold, 
    line_dash="dash", 
    line_color="black", 
    annotation_text=annotation, 
    annotation_position='top right', 
    xref='x', 
    yref="y"
)

################ update graph ################
def update_dataset(change):
    new_date = datepicker_widget.value
    new_id = dropdown_id_widget.value
    mask_cycle = (df_cycle['date'] == new_date) & (df_cycle['primemover_id'] == new_id)

    sub_activity = dropdown_subact_widget.value
    plot_title = "<b>" + sub_activity + "</b>"

    if sub_activity == 'Nett Cycle Time':
        col_name = 'nett_cycle_time'
        threshold = 60
    elif sub_activity == 'Travel Empty':
        col_name = 'travel_empty'
        threshold = 20
    elif sub_activity == 'Stopped Empty':
        col_name = 'stopped_empty'
        threshold = 0
    elif sub_activity == 'Loading':
        col_name = 'loading'
        threshold = 10
    elif sub_activity == 'Travel Loaded':
        col_name = 'travel_loaded'
        threshold = 25
    elif sub_activity == 'Stopped Loaded':
        col_name = 'stopped_loaded'
        threshold = 0
    elif sub_activity == 'Dumping':
        col_name = 'dumping'
        threshold = 5

    if new_date in df_cycle.loc[df_cycle['primemover_id'] == new_id]['date'].unique():
        y = df_cycle[col_name].loc[mask_cycle]
        x = df_cycle['time'].loc[mask_cycle]
        
        colors = pd.Series([],dtype=pd.StringDtype())
        for index, time in enumerate(y):
            if time < threshold:
                colors[index] = 'green'
            else:
                colors[index] = 'red'
    
    else:
        y = np.zeros(1)
        x = np.zeros(1)
        threshold = 0
        colors = 'black'
    
    with fig.batch_update():
        fig.data[0].y = y
        fig.data[0].x = x
        fig.data[0].marker.color = colors
        fig.data[0].text = [str(value) for value in y]
        fig.layout.title = plot_title
        fig.layout.shapes[0].y0 = threshold
        fig.layout.shapes[0].y1 = threshold
        fig.layout.annotations[0].y = threshold
        fig.layout.annotations[0].text = "Threshold = " + str(threshold) + " mins"

datepicker_widget.observe(update_dataset, names="value")
dropdown_id_widget.observe(update_dataset, names="value")
dropdown_subact_widget.observe(update_dataset, names="value")

widgets.VBox([widgets.HBox([datepicker_widget, dropdown_id_widget, dropdown_subact_widget]), fig])

VBox(children=(HBox(children=(DatePicker(value=datetime.date(2023, 3, 31), description='Date:', step=1), Dropd…