# PyMongo Query

db = "test"
collection = "benchmarklogs"

In [1]:
from pymongo import MongoClient
client = MongoClient()

db = client['test']
collection = db['benchmarklogs']

# PyMongo Query ->Pandas DataFrame

In [None]:
import numpy as np
import pandas as pd

query = {"$and":[ {"$or":[{"instanceType":"c5.large"},{"instanceType":"t2.xlarge"},{"instanceType":"c5.xlarge"},{"instanceType":"c5.2xlarge"},{"instanceType":"c5.4xlarge"}]}, 
        {"usedGenesisJson":"genesis_pow_max_performance.json"}, {"maxTransactions": 1000}, {"scenario":"account"}, {"nodes":5}]}

# query = {}

# Make a query to the specific DB and Collection
cursor = collection.find(query)

# Expand the cursor and construct the DataFrame
df = pd.DataFrame(list(cursor))

#display(df.head(200))

# Pandas DataFrame -> Plot
## Throughput

In [None]:
import matplotlib.pyplot as plt

data_tps_instanceType = df[['txPerSecond','instanceType']]
data_hashRate_instanceType = df[['hashRate','instanceType']]

title = "Throughput against Node Configuration (PoW)"
xlabel = "Node Configuration"
ylabel = "Transactions Per Second (TPS)"
ylabelAverage = "Average Transactions Per Second (TPS)"

avg1_tps = data_tps_instanceType.loc[df['instanceType'] == "c5.large"].iloc[:,0].mean()
avg2_tps = data_tps_instanceType.loc[df['instanceType'] == "c5.xlarge"].iloc[:,0].mean()
avg3_tps = data_tps_instanceType.loc[df['instanceType'] == "t2.xlarge"].iloc[:,0].mean()
avg4_tps = data_tps_instanceType.loc[df['instanceType'] == "c5.2xlarge"].iloc[:,0].mean()
avg5_tps = data_tps_instanceType.loc[df['instanceType'] == "c5.4xlarge"].iloc[:,0].mean()

hashRate1 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.large"].iloc[:,0].mean()
hashRate2 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.xlarge"].iloc[:,0].mean()
hashRate3 = data_hashRate_instanceType.loc[df['instanceType'] == "t2.xlarge"].iloc[:,0].mean()
hashRate4 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.2xlarge"].iloc[:,0].mean()
hashRate5 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.4xlarge"].iloc[:,0].mean()

In [None]:
fig = plt.figure(1, figsize=(9, 6))

# Create an axes instance
ax = fig.add_subplot(111)

ax.set_title(title)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)

# Create the boxplot

## combine these different collections into a list    
data_to_plot = [data_tps_instanceType.loc[df['instanceType'] == "c5.large"].iloc[:,0],
                data_tps_instanceType.loc[df['instanceType'] == "c5.xlarge"].iloc[:,0],
                data_tps_instanceType.loc[df['instanceType'] == "t2.xlarge"].iloc[:,0],
                data_tps_instanceType.loc[df['instanceType'] == "c5.2xlarge"].iloc[:,0],
                data_tps_instanceType.loc[df['instanceType'] == "c5.4xlarge"].iloc[:,0]]

bp = ax.boxplot(data_to_plot,labels=list(["c5.large","c5.xlarge","t2.xlarge","c5.2xlarge","c5.4xlarge"]))
fig.savefig("nodes_configuration_pow_throughput_boxplot.svg", format="svg")



In [None]:

objects = ('c5.large', 'c5.xlarge', 't2.xlarge', 'c5.2xlarge', 'c5.4xlarge')
y_pos = np.arange(len(objects))
performance = [avg1_tps,avg2_tps,avg3_tps,avg4_tps,avg5_tps]
 
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel(ylabelAverage)
plt.title(title)
plt.savefig("nodes_configuration_pow_throughput_barchart.svg", format="svg")
plt.show()

### HashRate

In [None]:
plt.plot([hashRate1,hashRate2,hashRate3,hashRate4,hashRate5],[avg1_tps,avg2_tps,avg3_tps,avg4_tps,avg5_tps], marker='o')
plt.title("Throughput against HashRate (PoW)")
plt.ylabel("Average Transactions Per Second (TPS)")
plt.xlabel("Average HashRate per Node Configuration")
plt.savefig("node_configuration_hashrate_throughput_lines_average.svg", format="svg")
plt.show()

## Latency

In [None]:
data_latency_instanceType = df[['averageDelay','instanceType']]
data_hashRate_instanceType = df[['hashRate','instanceType']]

title = "Latency against Node Configuration (PoW)"
xlabel = "Node Configuration"
ylabel = "Transaction Latency [seconds]"
ylabelAverage = "Average Transaction Latency [seconds]"


avg1_latency = data_latency_instanceType.loc[df['instanceType'] == "c5.large"].iloc[:,0].mean()
avg2_latency = data_latency_instanceType.loc[df['instanceType'] == "c5.xlarge"].iloc[:,0].mean()
avg3_latency = data_latency_instanceType.loc[df['instanceType'] == "t2.xlarge"].iloc[:,0].mean()
avg4_latency = data_latency_instanceType.loc[df['instanceType'] == "c5.2xlarge"].iloc[:,0].mean()
avg5_latency = data_latency_instanceType.loc[df['instanceType'] == "c5.4xlarge"].iloc[:,0].mean()

hashRate1 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.large"].iloc[:,0].mean()
hashRate2 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.xlarge"].iloc[:,0].mean()
hashRate3 = data_hashRate_instanceType.loc[df['instanceType'] == "t2.xlarge"].iloc[:,0].mean()
hashRate4 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.2xlarge"].iloc[:,0].mean()
hashRate5 = data_hashRate_instanceType.loc[df['instanceType'] == "c5.4xlarge"].iloc[:,0].mean()

In [None]:
fig = plt.figure(1, figsize=(9, 6))

# Create an axes instance
ax = fig.add_subplot(111)

ax.set_title(title)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)

# Create the boxplot

## combine these different collections into a list    
data_to_plot = [data_latency_instanceType.loc[df['instanceType'] == "c5.large"].iloc[:,0],
                data_latency_instanceType.loc[df['instanceType'] == "c5.xlarge"].iloc[:,0],
                data_latency_instanceType.loc[df['instanceType'] == "t2.xlarge"].iloc[:,0],
                data_latency_instanceType.loc[df['instanceType'] == "c5.2xlarge"].iloc[:,0],
                data_latency_instanceType.loc[df['instanceType'] == "c5.4xlarge"].iloc[:,0]]

bp = ax.boxplot(data_to_plot,labels=list(["c5.large","c5.xlarge","t2.xlarge","c5.2xlarge","c5.4xlarge"]))
fig.savefig("nodes_configuration_pow_latency_boxplot.svg", format="svg")

In [None]:

objects = ('c5.large', 'c5.xlarge', 't2.xlarge', 'c5.2xlarge', 'c5.4xlarge')
y_pos = np.arange(len(objects))
performance = [avg1_latency,avg2_latency,avg3_latency,avg4_latency,avg5_latency]
 
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel(ylabelAverage)
plt.title(title)
plt.savefig("nodes_configuration_pow_latency_barchart.svg", format="svg")
plt.show()

In [None]:
### HashRate

In [None]:
plt.plot([hashRate1,hashRate2,hashRate3,hashRate4,hashRate5],[avg1_latency,avg2_latency,avg3_latency,avg4_latency,avg5_latency], marker='o')
plt.title("Latency against HashRate (PoW)")
plt.ylabel(ylabelAverage)
plt.xlabel("Average HashRate per Node Configuration")
plt.savefig("node_configuration_hashrate_latency_lines_average.svg", format="svg")
plt.show()

## Throughput and Latency Combined

In [None]:
means_tps = [avg1_tps, avg2_tps, avg3_tps, avg4_tps, avg5_tps]
means_latency = [avg1_latency, avg2_latency, avg3_latency, avg4_latency, avg5_latency]

df = pd.DataFrame({'Average Throughput [TPS]':means_tps, 'Average Latency [seconds]':means_latency})
print(df)

fig = plt.figure()
ax = df.plot( kind= 'bar' , secondary_y= ['Average Latency [seconds]'] ,mark_right=True,ylim=(0, 200), use_index=False)
ax.set_xticklabels(objects)
ax.set_title('Throughput and Latency against Node Configuration (PoW)')
ax.figure.savefig("nodes_configuration_pow_combined_barchart.svg", format="svg",bbox_inches='tight')
plt.show()