# Analyzer for task 1

In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import statistics as stats
import re
import numpy as np

In [None]:
def read_and_parse_data_files(folder: Path) -> pd.DataFrame:
    tmp_pandas_list = []

    for file in folder.glob('*.csv'):
        dataset = pd.read_csv(file, delimiter=',')
        
        # Convert time columns to floats
        dataset['send time'] = dataset['send time'].apply(lambda x: float(x.split(':')[-1]))
        dataset['answer time'] = dataset['answer time'].apply(lambda x: float(x.split(':')[-1]))
        
        # Insert mean values
        dataset['send time (mean)'] = dataset['send time'].mean()
        dataset['answer time (mean)'] = dataset['answer time'].mean()

        # Insert RTT
        dataset['RTT'] = dataset['send time'] + dataset['answer time']
        
        tmp_pandas_list.append(dataset)
        
    return pd.concat(tmp_pandas_list, axis=0, ignore_index=True)

In [None]:
data = read_and_parse_data_files(Path('.')) 

In [None]:
print(f'Total data collected: {data.shape}')
print(f'Different test approaches: {len(data["test group"].drop_duplicates())}')

In [None]:
data

## Details about traceroute

In [None]:
hops_group = data.groupby('test name')['trace hops'].max().reset_index().sort_values(['trace hops'])

In [None]:
fig, ax = plt.subplots()
ax.barh(hops_group['test name'], hops_group['trace hops'])
ax.set_xlabel('Overall hops (max 30)')     
plt.show()

Example trace route with over 30 hops

In [None]:
with open('andreas-ubuntu-hotspot-20_23_45_888841.csv.trace', 'r') as file: print(file.read()) 

## Mean times over different test setups

In [None]:
avg_group = data.groupby('test name')['answer time (mean)'].max().reset_index().sort_values(['answer time (mean)'])

In [None]:
avg_group['answer time (mean)'] *= 1000

avg_group_small = avg_group[avg_group['answer time (mean)'] <= 100]
fig, ax, = plt.subplots()
ax.barh(avg_group_small['test name'], avg_group_small['answer time (mean)'])
ax.set_xlabel('Avg time without timeouts <= 100 ms ')
plt.show()

avg_group_big = avg_group[avg_group['answer time (mean)'] > 100]
fig, bx = plt.subplots()
bx.barh(avg_group_big['test name'], avg_group_big['answer time (mean)'])
bx.set_xlabel('Avg time without timeouts > 100 ms ')
plt.show()


## Statistics about timed out packages

In [None]:
timed_out = data[data['message'] == 'timed out']
tests_with_time_out = data[data['test name'].isin(timed_out['test name'].drop_duplicates().tolist())]

print(f'Over {data.shape[0]} messages timed {timed_out.shape[0]} out!')

In [None]:
listed_groups = tests_with_time_out['test name'].drop_duplicates().tolist()

print(f'Timeouts in {len(listed_groups)} of {len(data["test name"].drop_duplicates())} test runs!')

In [None]:
listed_groups