In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import requests
import json
import pandas as pd

service = "http://192.170.227.234:80"
# service = "http://localhost:80"

sites = ['AGLT2', 'MWT2']
dataset = 'AUG'

GB = 1024 * 1024 * 1024
TB = 1024 * GB
PB = 1024 * TB

### Import data

In [2]:
all_accesses = []
for si, site in enumerate(sites):
    print('Loading:', site)
    all_accesses.append(pd.read_hdf(site + '_' + dataset + '.h5', key=site, mode='r'))
    all_accesses[si]['site'] = 'xc_' + site
    # print(all_accesses[si].head())
    print(all_accesses[si].filesize.count(), "files")
    print(all_accesses[si].index.unique().shape[0], " unique files")
    print(all_accesses[si].filesize.sum() / PB, "PB")
    print(all_accesses[si].filesize.mean() / GB, "GB avg. file size")
    print('----------------------------')

all_data = pd.concat(all_accesses).sort_values('transfer_start')

print('---------- merged data -----------')
print(all_data.shape[0], 'files\t\t', all_data.index.unique().shape[0], 'unique files' )
print(all_data.filesize.sum() / PB, "PB")
print(all_data.filesize.mean() / GB, "GB avg. file size")
# print(all_data.head(100))

Loading: AGLT2
2426114 files
1079025  unique files
3.40435105065868 PB
1.4713738955776505 GB avg. file size
----------------------------
Loading: MWT2
7909781 files
3092063  unique files
9.52159464423143 PB
1.262249312044117 GB avg. file size
----------------------------
---------- merged data -----------
10335895 files		 4128678 unique files
12.92594569489011 PB
1.3113365057370547 GB avg. file size


### running requests

In [None]:
print('---------- start requests ----------')
acs = []
dac = []
accesses = [0, 0, 0, 0]
dataaccc = [0, 0, 0, 0]
count = 0
payload = []
with requests.Session() as session:
    for index, row in all_data.iterrows():
        if count > 300000:
            break
        fs = row['filesize']
        payload.append({'filename': index, 'site': row['site'], 'filesize': fs, 'time': row['transfer_start']})
        # print(payload)
        try:
            if count % 100 and count > 0:
                r = session.post(service + '/simulate', json=payload)
                if r.status_code != 200:
                    print(r)
                accs = r.json()
                for i, j in enumerate(accs['counts']):
                    accesses[i] += int(j)
                    dataaccc[i] += accs['sizes'][i]
                payload = []
        except requests.exceptions.RequestException as e:
            print(e)
        if not count % 5000 and count > 0:
            # print(count, accesses, dataaccc)
            acs.append(accesses.copy())
            dac.append(dataaccc.copy())
            pacce = []
            pdata = []
            for i in range(len(accesses)):
                pacce.append(accesses[i] / sum(accesses))
                pdata.append(dataaccc[i] / sum(dataaccc))
            print(count, pacce, pdata)
        count += 1

print('final: ', accesses, dataaccc)

accdf = pd.DataFrame(acs)
dacdf = pd.DataFrame(dac)

dacdf=dacdf/(1024*1024*1024*1024)

---------- start requests ----------
5000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
10000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
15000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
20000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
25000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
30000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
35000 [1.0, 0.0, 0.0, 0.0] [1.0, 0.0, 0.0, 0.0]
40000 [0.948375, 0.0, 0.0, 0.051625] [0.9892622530174727, 0.0, 0.0, 0.010737746982527307]
45000 [0.8715111111111111, 0.0, 0.0, 0.1284888888888889] [0.9012021299159659, 0.0, 0.0, 0.0987978700840341]
50000 [0.8079, 0.0033, 0.0, 0.1888] [0.8196479825665484, 0.008781623724205764, 0.0, 0.17157039370924582]
55000 [0.7616545454545455, 0.005436363636363637, 0.0, 0.2329090909090909] [0.7445848813113738, 0.021478526373855084, 0.0, 0.23393659231477115]
60000 [0.7311, 0.007683333333333334, 0.0, 0.26121666666666665] [0.7099474742945413, 0.03255611793668364, 0.0, 0.257496407768775]
65000 [0.7043538461538461, 0.007184615384615385, 0.0,

### ploting results

In [None]:
accdf.columns = ['level 1', 'level 2', 'level 3', 'origin']
dacdf.columns = ['level 1', 'level 2', 'level 3', 'origin']

fig, axs = plt.subplots(nrows=2, ncols=1, constrained_layout=True,figsize=(8,10))

# plt.subplot(211)
accdf.plot(ax=axs[0])
axs[0].set_ylabel('hits')
axs[0].set_xlabel('reqeusts [x1000]')
axs[0].legend()

dacdf.plot(ax=axs[1])
axs[1].set_ylabel('data delivered [TB]')
axs[1].set_xlabel('reqeusts [x1000]')
axs[1].legend()

plt.show()

fig.savefig('filling_up.png')

### Network states

In [None]:
res = requests.get(service + '/status')
status = json.loads(res.json())
# print(status)
tp=[]
for site in status:
#     print(site[0])
#     print(site[1])
    tp.append([site[0],site[1]['requests_received'],site[1]['files_delivered'],site[1]['data_delivered']/(1024*1024*1024*1024)])

sites=pd.DataFrame(tp)
sites.columns=['xcache','requests','hits','data delivered']
sites = sites[sites.requests!=0]
sites.head(20)

In [None]:
fig, ax = plt.subplots(constrained_layout=True,figsize=(8,8))

sites.plot(x="xcache", y=["requests", "hits", "data delivered"], kind="bar", ax=ax,secondary_y= 'data delivered')
fig.savefig('xcache_sites.png')
