In [2]:
import json
from os import listdir
from os.path import isfile, join

In [3]:
import pandas as pd

#### Load in Data
* Open all json files that contain traces and append them to a list with all of the traces.
    - This makes the data ready to use with pandas dataframes and easy to create visualizations with Altair


In [4]:
baseDirectory = 'data/synthetic/20210302-hipster-shop'
directories = listdir(baseDirectory)

In [5]:
traces = []
for directory in directories:
    thisDirectory = baseDirectory + '/' + directory
    try:
        with open(thisDirectory) as f:
          data = json.load(f)
    except:
        continue
    traces.append(data)

In [14]:
traces[0]

[{'traceId': '550997223f8c4b30',
  'parentId': '550997223f8c4b30',
  'id': '1234d6244fad26ab',
  'name': '/getcart',
  'timestamp': 1614738754173000,
  'duration': 190000,
  'localEndpoint': {'serviceName': 'cartservice', 'ipv4': '192.168.1.180'},
  'tags': {'http.method': 'GET',
   'http.url': 'http://cartservice/GetCart',
   'load_generator.seq_num': '14978',
   'region': 'us-east-1',
   'version': 'v5'}},
 {'traceId': '550997223f8c4b30',
  'parentId': '550997223f8c4b30',
  'id': 'fd6c6a8dad1dac47',
  'name': '/getrecommendations',
  'timestamp': 1614738754163000,
  'duration': 226000,
  'localEndpoint': {'serviceName': 'recommendationservice',
   'ipv4': '192.168.1.180'},
  'tags': {'http.method': 'GET',
   'http.url': 'http://recommendationservice/GetRecommendations',
   'load_generator.seq_num': '14979',
   'region': 'us-east-1',
   'version': 'v234'}},
 {'traceId': '550997223f8c4b30',
  'parentId': 'fd6c6a8dad1dac47',
  'id': '3deb074f82f2e50c',
  'name': '/getproducts',
  'times

In [15]:
names = []
durations = []
traceIDs = []
parentIDs = []
ids = []
for trace in traces:
    
    for element in trace:
        traceIDs.append(element['traceId'])
        ids.append(element['id'])
        try:
            parentIDs.append(element['parentId'])
        except:
            parentIDs.append('Na')
        names.append(element['name'])
        durations.append(element['duration'])

In [17]:
traceDf = pd.DataFrame({'Resource Name' : names, 'Duration':durations, 'Trace_ID': traceIDs, 'ID': ids, 'Parent_ID': parentIDs})
traceDf

Unnamed: 0,Resource Name,Duration,Trace_ID,ID,Parent_ID
0,/getcart,190000,550997223f8c4b30,1234d6244fad26ab,550997223f8c4b30
1,/getrecommendations,226000,550997223f8c4b30,fd6c6a8dad1dac47,550997223f8c4b30
2,/getproducts,5000,550997223f8c4b30,3deb074f82f2e50c,fd6c6a8dad1dac47
3,/cart,334000,550997223f8c4b30,550997223f8c4b30,Na
4,/product,501000,592363a229596c88,592363a229596c88,Na
...,...,...,...,...,...
19558,/cart,397000,529885a8ac3c2592,529885a8ac3c2592,Na
19559,/cart,485000,b9ae10ad77e3ecee,b9ae10ad77e3ecee,Na
19560,/getproducts,95000,b9ae10ad77e3ecee,a2f5e2941ecb468e,449069260f276449
19561,/getcart,94000,b9ae10ad77e3ecee,5bb0e7a04dd86297,b9ae10ad77e3ecee


In [18]:
traceDf.head(10)

Unnamed: 0,Resource Name,Duration,Trace_ID,ID,Parent_ID
0,/getcart,190000,550997223f8c4b30,1234d6244fad26ab,550997223f8c4b30
1,/getrecommendations,226000,550997223f8c4b30,fd6c6a8dad1dac47,550997223f8c4b30
2,/getproducts,5000,550997223f8c4b30,3deb074f82f2e50c,fd6c6a8dad1dac47
3,/cart,334000,550997223f8c4b30,550997223f8c4b30,Na
4,/product,501000,592363a229596c88,592363a229596c88,Na
5,/adrequest,232000,592363a229596c88,10c4856b83dbbc71,592363a229596c88
6,/getrecommendations,424000,592363a229596c88,b290c3515fad879f,592363a229596c88
7,/getproducts,96000,592363a229596c88,d8eef27c349a9e67,592363a229596c88
8,/getproducts,169000,592363a229596c88,439e9e64c290a7ae,b290c3515fad879f
9,/cart,426000,110ebb758dcef8a4,110ebb758dcef8a4,Na


# Three hypotheses

    1. The provided data will have traces that use some resources more than others. These resources may be taxing on the system if they take too long to complete and are commonly used.
    
    2. Traces within this system architecture will have different run times based on how many resources they use to complete the task at hand.
    
    3. Specific traces may have errors that lead to longer runtimes that we must identify to trouble shoot and improve system performance.

## Hypothesis #1
    The provided data will have traces that use some resources more than others. These resources may be taxing on the system if they take too long to complete and are commonly used.

In [57]:
averagetraceDf = traceDf.groupby('Resource Name')['Duration'].agg(['mean', 'count']).reset_index().rename(columns = {'mean':'Mean Duration'})
averagetraceDf

Unnamed: 0,Resource Name,Mean Duration,count
0,/address,50682.7,208
1,/adrequest,246023.4,728
2,/cart,360512.1,3644
3,/checkout,1625361.0,119
4,/creditcardinfo,25609.21,87
5,/currency,182078.4,51
6,/getcart,100176.4,3731
7,/getconversion,148326.1,138
8,/getproducts,55422.75,5100
9,/getquote,307272.7,121


In [58]:
import altair as alt

source = averagetraceDf

bars = alt.Chart(source, title='Resource Name vs Mean Duration with Count Labels').mark_bar().encode(
    x='Resource Name',
    y='Mean Duration'
)

text = bars.mark_text(
    baseline='middle',
    dy=-10  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='count:Q'
)
(bars + text).properties(height=600)

The bar plot above shows how each resource have very different durations. /Checkout on average takes the longest time to complete but is only used 119 times in our trace dataset. For the purpose of minimizing durations throughout our system, I believe it is important to focus on processes such as /cart and /getrecommendations that are used commonly and have a relatively long duration. 

### Refine 
With a better understanding of average durations for each proess and how often they occur in our trace data, it would be helpful to see the duration distributions for concerning process that take a long time and occur frequently.

In [59]:
def histogram(resourceName):
    source = traceDf[traceDf['Resource Name'] == resourceName]
    return alt.Chart(source, title = "Duration Histogram for " + resourceName).mark_bar().encode(
        alt.X("Duration:Q", bin=True),
        y='count()',
    )
    

In [60]:
histogram("/getrecommendations")

In [61]:
histogram("/getcart")

In [62]:
histogram("/product")

Generally, it appears that the distribution of resources are relatively normal. However, /getcart has a normal distribution. For these concerning resources, I am curious to what is causing very low and very high durations. If a system engineer can pin point whatever is significantly improving durations, they could figure out how to improve the systems performance as a whole.

## Hypothesis #2
    Traces within this system architecture will have different run times based on how many resources they use to complete the task at hand.

In [63]:
traceNetDurationResourceCount = traceDf.groupby('Trace_ID')['Duration'].agg(['sum', 'count']).reset_index().rename(columns = {'sum':'Trace Duration', 'count':'Resources Used'})
traceNetDurationResourceCount

Unnamed: 0,Trace_ID,Trace Duration,Resources Used
0,00068a67bc793add,858000,4
1,000ba11f524d250f,809000,4
2,00200ac17dbf541d,593000,4
3,0021a1abb3546ead,759000,5
4,0044a79dfe680331,572000,4
...,...,...,...
4658,ffdb50d2a2d18c5b,795000,4
4659,ffe88d8088ee8d5e,997000,4
4660,ffea94949d425fe9,4380000,9
4661,fff712da02528d14,959000,4


In [64]:
source = traceNetDurationResourceCount
alt.Chart(source, title = 'Resources Used vs Trace Duration').mark_circle(size=60).encode(
    x='Resources Used',
    y='Trace Duration',
    tooltip = ['Trace_ID','Resources Used', 'Trace Duration']
)

The plot above suggests that generally more resources used does lead to a greater duration. Additionally, it is interesting to see the range of duration for each number of resources used. When 2 resources are used they take a surprisingly long time compared to the other groups. I assume that when 2 resources are used they are long resources that take a long time to complete.

## Hypothesis #3
    Specific traces may have errors that lead to longer runtimes that we must identify to trouble shoot and improve system performance.

In [75]:
traceDf = traceDf.sort_values('Trace_ID')
traceDf

Unnamed: 0,Resource Name,Duration,Trace_ID
3359,/getcart,162000,00068a67bc793add
3360,/getrecommendations,282000,00068a67bc793add
3361,/getproducts,42000,00068a67bc793add
3362,/cart,372000,00068a67bc793add
13309,/getproducts,86000,000ba11f524d250f
...,...,...,...
16014,/getrecommendations,353000,fff712da02528d14
1431,/cart,265000,fffe889dcec9d1b5
1430,/getcart,97000,fffe889dcec9d1b5
1433,/getrecommendations,165000,fffe889dcec9d1b5


In [91]:
def plotTrace(traceID):
    source = traceDf[traceDf['Trace_ID'] == traceID]
    return alt.Chart(source, title='Trace: ' + traceID).mark_bar().encode(
        x='Resource Name',
        y='Duration:Q',
        color = 'Resource Name:N'
    )

In [85]:
traceSum = 0 
currTraceID = ''
duration_starts = []
duration_ends = []
duration_start = 0
duration_end = 0
for i in range(len(traceDf)):
    val = traceDf.iloc[i]
    if val['Trace_ID'] != currTraceID:
        duration_start = 0
        currTraceID = val['Trace_ID']
    else:
        duration_start = duration_end
    duration_end = duration_start +  val['Duration']
    duration_starts.append(duration_start)
    duration_ends.append(duration_end)

In [90]:
traceDf['duration_start'] = duration_starts
traceDf['duration_end'] = duration_ends
traceDf.head(20)

Unnamed: 0,Resource Name,Duration,Trace_ID,duration_start,duration_end
3359,/getcart,162000,00068a67bc793add,0,162000
3360,/getrecommendations,282000,00068a67bc793add,162000,444000
3361,/getproducts,42000,00068a67bc793add,444000,486000
3362,/cart,372000,00068a67bc793add,486000,858000
13309,/getproducts,86000,000ba11f524d250f,0,86000
13310,/getrecommendations,298000,000ba11f524d250f,86000,384000
13311,/getcart,78000,000ba11f524d250f,384000,462000
13308,/cart,347000,000ba11f524d250f,462000,809000
9393,/getproducts,54000,00200ac17dbf541d,0,54000
9394,/getrecommendations,204000,00200ac17dbf541d,54000,258000


In [125]:
import numpy as np

In [130]:
np.random.uniform(4,10000)

4333.657380534576

In [135]:
traceDf['Data Transfered'] = [round(np.random.uniform(4,10000),1) for i in range(len(traceDf))]
traceDf

Unnamed: 0,Resource Name,Duration,Trace_ID,duration_start,duration_end,Data Transfered:Q,Data Transfered
3359,/getcart,162000,00068a67bc793add,0,162000,5488.3,2941.5
3360,/getrecommendations,282000,00068a67bc793add,162000,444000,1746.0,8988.7
3361,/getproducts,42000,00068a67bc793add,444000,486000,5835.2,7437.6
3362,/cart,372000,00068a67bc793add,486000,858000,547.3,1035.7
13309,/getproducts,86000,000ba11f524d250f,0,86000,2009.1,6709.0
...,...,...,...,...,...,...,...
16014,/getrecommendations,353000,fff712da02528d14,606000,959000,9425.3,4427.1
1431,/cart,265000,fffe889dcec9d1b5,0,265000,8303.4,8008.0
1430,/getcart,97000,fffe889dcec9d1b5,265000,362000,5094.0,601.5
1433,/getrecommendations,165000,fffe889dcec9d1b5,362000,527000,4531.1,5485.5


In [146]:
traceDf[traceDf['Error'] == "ERROR"]

Unnamed: 0,Resource Name,Duration,Trace_ID,duration_start,duration_end,Data Transfered:Q,Data Transfered,Error
17288,/getproducts,19000,03284b2e14c2fb01,151000,170000,7824.0,8260.6,ERROR
17053,/getproducts,79000,106b33d6227a4e20,893000,972000,5397.9,2645.3,ERROR
15998,/cart,410000,12ffa95a964081ea,30000,440000,8416.7,7290.4,ERROR
4328,/getproducts,8000,3c5c4eacce233605,99000,107000,4828.0,3667.8,ERROR
8860,/cart,284000,45bf756d0e2a9e01,308000,592000,6462.6,4550.2,ERROR
15300,/getcart,115000,4742d3a005613c73,662000,777000,2001.6,5553.7,ERROR
4859,/getproducts,84000,5fe54c6f41d0265d,649000,733000,9749.5,500.7,ERROR
5687,/cart,447000,657ba3cf6a9f5568,462000,909000,9898.5,5077.0,ERROR
4584,/getproducts,44000,80d2c685470521b9,554000,598000,7228.7,2126.5,ERROR
9025,/getproducts,89000,8c99a4b8f12a0f9e,0,89000,8066.0,5743.1,ERROR


In [138]:
traceDf['Error'] = ["No Error" if round(np.random.uniform(1,100),1) != 4 else "ERROR" for i in range(len(traceDf))]

In [19]:
def plotTrace2(traceID):
    source = traceDf[traceDf['Trace_ID'] == traceID]
    return alt.Chart(source, title='Trace: ' + traceID).mark_bar().encode(
        y=alt.Y('Resource Name', type='nominal', sort=None),
        x = alt.X("duration_start:Q", title= "Duration"),
        x2 = "duration_end:Q",
        color = 'Resource Name',
        tooltip = ['Duration:Q', 'Data Transfered:Q', 'Error']
    )

In [20]:
plotTrace2('106b33d6227a4e20')

NameError: name 'alt' is not defined

In [151]:
plotTrace2('b9ae10ad77e3ecee')

In [152]:
plotTrace2('ffea94949d425fe9')

In [155]:
traceDf.append(traceDf[traceDf['Trace_ID'] == 'ffea94949d425fe9'])

Unnamed: 0,Resource Name,Duration,Trace_ID,duration_start,duration_end,Data Transfered:Q,Data Transfered,Error
3359,/getcart,162000,00068a67bc793add,0,162000,5488.3,2941.5,No Error
3360,/getrecommendations,282000,00068a67bc793add,162000,444000,1746.0,8988.7,No Error
3361,/getproducts,42000,00068a67bc793add,444000,486000,5835.2,7437.6,No Error
3362,/cart,372000,00068a67bc793add,486000,858000,547.3,1035.7,No Error
13309,/getproducts,86000,000ba11f524d250f,0,86000,2009.1,6709.0,No Error
...,...,...,...,...,...,...,...,...
19538,/orderresult,94000,ffea94949d425fe9,3500000,3594000,5103.2,4491.2,No Error
19539,/sendorderconfirmation,580000,ffea94949d425fe9,3594000,4174000,2201.2,9104.0,No Error
19541,/money,8000,ffea94949d425fe9,4174000,4182000,2694.8,698.4,No Error
19534,/address,85000,ffea94949d425fe9,4182000,4267000,4128.8,5324.1,No Error


In [177]:
traceDf = traceDf.append(traceDf[traceDf['Trace_ID'] == 'b9ae10ad77e3ecee'])

traceDf

Unnamed: 0,Resource Name,Duration,Trace_ID,duration_start,duration_end,Data Transfered:Q,Data Transfered,Error
3359,/getcart,162000,00068a67bc793add,0,162000,5488.3,2941.5,No Error
3360,/getrecommendations,282000,00068a67bc793add,162000,444000,1746.0,8988.7,No Error
3361,/getproducts,42000,00068a67bc793add,444000,486000,5835.2,7437.6,No Error
3362,/cart,372000,00068a67bc793add,486000,858000,547.3,1035.7,No Error
13309,/getproducts,86000,000ba11f524d250f,0,86000,2009.1,6709.0,No Error
...,...,...,...,...,...,...,...,...
19542,/getconversion,113000,Place Order,4267000,4380000,7199.2,9847.0,No Error
19559,/cart,485000,b9ae10ad77e3ecee,0,485000,6185.4,9058.2,No Error
19560,/getproducts,95000,b9ae10ad77e3ecee,485000,580000,9653.7,3160.5,No Error
19562,/getrecommendations,433000,b9ae10ad77e3ecee,580000,1013000,2398.8,1224.1,No Error


In [181]:
traceDf.iloc[-4:]['Trace_ID'] = 'Cart Recommendations'
traceDf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Resource Name,Duration,Trace_ID,duration_start,duration_end,Data Transfered:Q,Data Transfered,Error
3359,/getcart,162000,00068a67bc793add,0,162000,5488.3,2941.5,No Error
3360,/getrecommendations,282000,00068a67bc793add,162000,444000,1746.0,8988.7,No Error
3361,/getproducts,42000,00068a67bc793add,444000,486000,5835.2,7437.6,No Error
3362,/cart,372000,00068a67bc793add,486000,858000,547.3,1035.7,No Error
13309,/getproducts,86000,000ba11f524d250f,0,86000,2009.1,6709.0,No Error
...,...,...,...,...,...,...,...,...
19542,/getconversion,113000,Place Order,4267000,4380000,7199.2,9847.0,No Error
19559,/cart,485000,Cart Recommendations,0,485000,6185.4,9058.2,No Error
19560,/getproducts,95000,Cart Recommendations,485000,580000,9653.7,3160.5,No Error
19562,/getrecommendations,433000,Cart Recommendations,580000,1013000,2398.8,1224.1,No Error


In [182]:
aggTraceDf = traceDf.iloc[-9:]
aggTraceDf = aggTraceDf.rename(columns = {'Duration':'Avg Duration', 'Data Transfered':'Median Data Transfered', 'Error':'Error Rate'})

In [183]:
aggTraceDf.iloc[-9:]['Error Rate'] = .64

In [184]:
def plotTraceAgg(traceID):
    source = aggTraceDf[aggTraceDf['Trace_ID'] == traceID]
    return alt.Chart(source, title='Trace: ' + traceID).mark_bar().encode(
        y=alt.Y('Resource Name', type='nominal', sort=None),
        x = alt.X("duration_start:Q", title= "Duration"),
        x2 = "duration_end:Q",
        color = 'Resource Name',
        tooltip = ['Avg Duration:Q', 'Median Data Transfered:Q', 'Error Rate']
    )

In [185]:
plotTraceAgg('Cart Recommendations')

These detailed looks into specific traces are helpful at understanding how our system processes specific requests. Additionally, these trace visualizations show how individual traces can be improved and which aspects take up the majority of the time. Systems engineer would likely use these visualizations to test new implementation and identify issues within the system.

In [71]:
source = traceDf[traceDf['Trace_ID'] == traceID]
    return alt.Chart(source, title='Trace: ' + traceID).mark_bar().encode(
        x='Resource Name',
        y='Duration:Q',
        color = 'Resource Name:N'
    )

IndentationError: unexpected indent (<ipython-input-71-9edcef70c625>, line 2)

In [70]:
color_scale <-
  alt$Scale(
    domain = list(
      "Strongly disagree",
      "Disagree",
      "Neither agree nor disagree",
      "Agree",
      "Strongly agree"
    ),
    range = list("#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab")
  )

y_axis <-
  alt$Axis(
    title = "Question",
    offset = 5,
    ticks = FALSE,
    minExtent = 60,
    domain = FALSE
  )

chart <- 
  alt$Chart(data)$
  mark_bar()$
  encode(
    x = "percentage_start:Q",
    x2 = "percentage_end:Q",
    y = alt$Y("question:N", axis = y_axis),
    color = alt$Color(
      "type:N",
      legend = alt$Legend(title = "Response"),
      scale = color_scale
    )
  )

chart

SyntaxError: invalid syntax (<ipython-input-70-160b20d0f6af>, line 1)