In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv('speed_tests.csv')

In [3]:
data.head()

Unnamed: 0,method,blocks,cpu_speed,gpu_speed
0,mse,1,6.788332,4.346976
1,mse,1,6.891164,6.30121
2,mse,1,5.352927,4.223653
3,mse,1,5.323718,4.211751
4,mse,1,5.414093,4.170353


In [4]:
# data = data[data['method'] != 'd0rj/e5-small-en-ru']

In [5]:
fig = px.box(data, x="blocks", y="cpu_speed", color="method", title="cpu_speed")
fig.show()

fig.write_image('metrics/cpu_speed_boxplot.png')

In [6]:
fig = px.box(data, x="blocks", y="gpu_speed", color="method", title='gpu speed')
fig.show()
fig.write_image('metrics/gpu_speed_boxplot.png')

In [7]:
cpu_data = data.groupby(by=['blocks'], as_index=False)['cpu_speed'].describe().round(2).drop(columns=['count'])

In [8]:
colums = ['mean', 'min', '25%', '50%', '75%', 'max']

In [9]:
for x in colums:
    cpu_data[x] = round(
        (cpu_data[cpu_data['blocks'] == 12][x].values[0] - cpu_data[x]) / cpu_data[cpu_data['blocks'] == 12][x].values[
            0] * 100, 2)

In [10]:
cpu_data = cpu_data[cpu_data['blocks'] != 12]
cpu_data.rename(
    columns={'blocks': 'Количество блоков',
             'mean': 'среднее',
             'std': 'среднеквадратическое отклонение',
             'min': 'минимум',
             '25%': '25% перцентиль',
             '50%': '50% перцентиль',
             '75%': '75% перцентиль',
             },
    inplace=True
)
cpu_data.T

Unnamed: 0,0,1,2,3,4,5
Количество блоков,1.0,2.0,4.0,6.0,8.0,10.0
среднее,86.26,79.49,64.3,47.73,32.43,16.58
среднеквадратическое отклонение,1.0,1.17,1.74,2.08,2.56,0.9
минимум,87.65,80.98,66.94,52.2,37.13,18.78
25% перцентиль,87.46,81.07,66.41,50.17,35.52,16.06
50% перцентиль,87.32,80.84,65.3,48.91,32.65,16.62
75% перцентиль,85.42,78.31,62.59,43.7,29.7,16.23
max,81.26,75.58,53.35,43.77,21.11,17.4


In [11]:
gpu_data = data.groupby(by=['blocks'], as_index=False)['gpu_speed'].describe().round(2).drop(columns=['count'])

In [12]:
for x in colums:
    gpu_data[x] = round(
        (gpu_data[gpu_data['blocks'] == 12][x].values[0] - gpu_data[x]) / gpu_data[gpu_data['blocks'] == 12][x].values[
            0] * 100, 2)

In [13]:
gpu_data = gpu_data[gpu_data['blocks'] != 12]
gpu_data.rename(
    columns={'blocks': 'Количество блоков',
             'mean': 'среднее',
             'std': 'среднеквадратическое отклонение',
             'min': 'минимум',
             '25%': '25% перцентиль',
             '50%': '50% перцентиль',
             '75%': '75% перцентиль',
             },
    inplace=True
)
gpu_data.T

Unnamed: 0,0,1,2,3,4,5
Количество блоков,1.0,2.0,4.0,6.0,8.0,10.0
среднее,65.7,62.1,44.6,35.29,24.56,8.89
среднеквадратическое отклонение,0.89,0.92,1.45,1.56,1.67,1.64
минимум,67.01,62.87,49.72,37.05,28.37,12.67
25% перцентиль,66.72,62.73,48.91,36.17,26.48,6.33
50% перцентиль,66.69,63.11,46.19,36.81,25.08,4.04
75% перцентиль,65.21,63.63,40.99,38.77,23.15,14.8
max,62.01,60.64,42.46,31.06,18.6,4.68


In [14]:
task = 'среднее'

fig = px.bar(gpu_data,
             x="Количество блоков",
             y=task,
             text=task,
             height=500,
             width=900,
             labels={task: "Прирост скорости (%)"}
             )
fig.update_traces(
    textposition="outside"
)
fig.update_layout(bargroupgap=0.15,
                  xaxis=dict(tickvals=gpu_data["Количество блоков"]),
                  uniformtext_minsize=8,
                  uniformtext_mode='show'
                  )
fig.show()
fig.write_image('metrics/gpu_speed_mean_growth.png')

In [15]:
task = 'среднее'

fig = px.bar(cpu_data,
             x="Количество блоков",
             y=task,
             text=task,
             height=500,
             width=900,
             labels={task: "Прирост скорости (%)"}
             )
fig.update_traces(
    textposition="outside"
)
fig.update_layout(bargroupgap=0.15,
                  xaxis=dict(tickvals=cpu_data["Количество блоков"]),
                  uniformtext_minsize=8,
                  uniformtext_mode='show'
                  )
fig.show()
fig.write_image('metrics/cpu_speed_mean_growth.png')

In [16]:
cpu_data['Устройство'] = 'CPU'
gpu_data['Устройство'] = 'GPU'
d = pd.concat([cpu_data, gpu_data], ignore_index=True)
d

Unnamed: 0,Количество блоков,среднее,среднеквадратическое отклонение,минимум,25% перцентиль,50% перцентиль,75% перцентиль,max,Устройство
0,1,86.26,1.0,87.65,87.46,87.32,85.42,81.26,CPU
1,2,79.49,1.17,80.98,81.07,80.84,78.31,75.58,CPU
2,4,64.3,1.74,66.94,66.41,65.3,62.59,53.35,CPU
3,6,47.73,2.08,52.2,50.17,48.91,43.7,43.77,CPU
4,8,32.43,2.56,37.13,35.52,32.65,29.7,21.11,CPU
5,10,16.58,0.9,18.78,16.06,16.62,16.23,17.4,CPU
6,1,65.7,0.89,67.01,66.72,66.69,65.21,62.01,GPU
7,2,62.1,0.92,62.87,62.73,63.11,63.63,60.64,GPU
8,4,44.6,1.45,49.72,48.91,46.19,40.99,42.46,GPU
9,6,35.29,1.56,37.05,36.17,36.81,38.77,31.06,GPU


In [17]:
fig = px.line(d, x="Количество блоков", y="среднее", color="Устройство", text="среднее", labels={"среднее": "Прирост скорости (%)"}, height=500,
             width=900,)
fig.update_traces(textposition="top center")
fig.update_layout(
                  xaxis=dict(tickvals=cpu_data["Количество блоков"]),
                  uniformtext_minsize=8
                  )
fig.show()
fig.write_image('metrics/cpu_gpu_speed_growth.png')