In [1]:

import itertools
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from multiprocessing import Manager
import datetime as dt


import pandas as pd
from IPython.display import display
import numpy as np

from data_objects.NodesMetaData import NodesMetaData
import utils.helpers as hp
from utils.helpers import timer
from alarms import alarms

# the query
def queryPSTrace(dt):
    # include=["timestamp","src","dest","asns","src_host","dest_host","src_site","dest_site","destination_reached",'path_complete','looping','hops']
    include=["timestamp","src","dest","asns","src_host","dest_host","src_site","dest_site",'hops','destination_reached']
    query = {
      "query" : {
        "bool" : {
          "must" : [
            {
              "range" : {
                "timestamp" : {
                  "gt" : dt[0],
                  "lte": dt[1]
                }
              }
            }
          ]
        }
      }
    }
#     print(str(query).replace("\'", "\""))
    try:
        return scan_gen(scan(hp.es,index="ps_trace",query=query, filter_path=['_scroll_id', '_shards', 'hits.hits._source']))
    except Exception as e:
        print(e)
        
def scan_gen(scan):
    while True:
        try:
            yield next(scan)['_source']
        except:
            break

# get the data from ES
def ps_trace(dt):
    scan_gen = queryPSTrace(dt)
    items = []

    for meta in scan_gen:
        items.append(meta)

    return items


# create a shared variable to store the data
manager = Manager()
data = manager.list()

# query in chunks based on time ranges
def getTraceData(dtRange):
    traceData = ps_trace(dtRange)
    if len(traceData)>0:
        data.extend(traceData)

# laod the data in parallel
def run(dateFrom, dateTo):
    # query the past 24 hours and split the period into 8 time ranges
    dtList = hp.GetTimeRanges(dateFrom, dateTo, 8)
    with ProcessPoolExecutor(max_workers=8) as pool:
        result = pool.map(getTraceData, [[dtList[i], dtList[i+1]] for i in range(len(dtList)-1)])


dateFrom, dateTo = hp.defaultTimeRange(6)
print(dateFrom, dateTo)
run(dateFrom, dateTo)
df = pd.DataFrame(list(data))

df['pair'] = df['src']+'-'+df['dest']

Connected
2022-04-04 04:28 2022-04-04 10:28


In [2]:
len(df)

696816

In [None]:
pip install ipwhois

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install plotly

#### Isolate only the relevant fields

In [3]:
relDf = df[['src', 'dest', 'asns', 'hops', 'pair', 'destination_reached']].copy()

In [4]:
asn2Hop,hop2ASN = {},{}
strange = []

pch, ipl = [],[]
relDf = df[['src', 'dest', 'asns', 'hops', 'pair', 'destination_reached']].copy()
chunk = relDf[['asns','hops', 'pair']].values.tolist()

for asns, hops, pair in chunk:
    if len(asns)==len(hops):
        for i in range(len(asns)):
#             print(asns[i], hops[i])
            if asns[i] not in asn2Hop.keys():
                asn2Hop[asns[i]] = [hops[i]]
            else:
                temp = asn2Hop[asns[i]]
                if hops[i] not in temp:
                    temp.append(hops[i])
                    asn2Hop[asns[i]] = temp

                # print(pair, hops[i])
            if hops[i] not in hop2ASN.keys():
                hop2ASN[hops[i]] = [asns[i]]
            else:
                temp = hop2ASN[hops[i]]
                if asns[i] not in temp:
                    temp.append(asns[i])
                    hop2ASN[hops[i]] = temp
    else: 
        # print(pair, len(asns), len(hops))
        print('this shouldn not happen')
        strange.append([pair,asns,hops])

#### Fix zero ASNs by picking the IP at the same position and checking for it's AS number for previous measures

In [10]:
fix_asns = []


relDf = df[['src', 'dest', 'asns', 'hops', 'pair', 'destination_reached']].copy()

# add a flag showing if all measures for a given pair have reached the destination
all_reachedDf = relDf.groupby('pair')[['destination_reached']].apply(lambda x: all(x.values)).to_frame().rename(columns={0:'all_dests_reached'}).reset_index()

relDf = pd.merge(relDf, all_reachedDf, on='pair', how='left')

relDf['asns_updated'] = None
relDf['hops_updated'] = None
relDf['first'] = None
relDf['last'] = None

zfix = []


try:
    for idx, asns, hops, s, d, destination_reached in relDf[['asns','hops','src','dest','all_dests_reached']].itertuples():
        asns_updated  = asns.copy()
        hops_updated = hops.copy()
        # print(idx, asns, hops, s, d, destination_reached)

        for pos, n in enumerate(asns):
            # when AS number is 0 (unknown) get the IP at this position and find all ASNs for it
            if n == 0:
                # print(n)
                ip = hops[pos]
                asns4IP = hop2ASN[ip]
                # print(ip,asns4IP)

                if 0 in asns4IP: asns4IP.remove(0)
                
                asns_updated[pos] = n
                if len(asns4IP) == 1:
                    asns_updated[pos] = asns4IP[0]
                    if idx not in zfix: zfix.append(idx)
                elif len(asns4IP) > 1:
                    for ip in asns4IP:
                        if len(asns4IP)==2:
                            alt = [el for el in asns4IP if el != n]
                            # alternative_asns_dict[asn] = alt[0]
                            asns_updated[pos] = alt[0]
                            if idx not in zfix: zfix.append(idx)
                            # print(n,'.............', alt[0])
                        elif len(hop2ASN[ip])>2:
                            print('There are more possibilities ........................', idx, asns, pos, alt)

        relDf.at[idx, 'asns_updated'] = asns_updated
        relDf.at[idx, 'hops_updated'] = hops_updated
        
        if len(asns_updated)>2:
            relDf.at[idx, 'first'] = asns_updated[0]
            relDf.at[idx, 'last'] = asns_updated[-1]
except Exception as e:
    print('error', e)
    print(idx, asns, hops, s, d, destination_reached)
    
print(f'Number of repaired measures {len(zfix)}')

Number of repaired measures 9577


#### Find the alternativ AS numbers based on the IP addresses at the same position

In [9]:
def getAltASNs():
    alt_dict = {}
    for asn, l in asn2Hop.items():
        if asn != 0:
            for ip in l:

                others = hop2ASN[ip]
                if 0 in others:
                    others.remove(0)

                if len(others)==2:
                    # if 0 not in hop2ASN[ip]:
                    alt = [el for el in hop2ASN[ip] if el != asn]
                    if len(alt)>1:
                        print(alt)

                    altASN = [alt[0]]
                    if asn in alt_dict.keys():
                        temp = alt_dict[asn]
                        if alt[0] not in temp:
                            temp.append(alt[0])
                        altASN = temp

                    alt_dict[asn] = altASN
                elif len(others)>2:
                    print(asn, others)
                    print('There are more possibilities ........................')
    return alt_dict
            
alternative_asns_dict = getAltASNs()
sorted(alternative_asns_dict.items())

[(38, [40387]),
 (62, [32361, 20080]),
 (231, [237]),
 (237, [231, 36375]),
 (680, [58069]),
 (2907, [7660]),
 (7660, [2907]),
 (10680, [19782]),
 (16905, [22645]),
 (19782, [10680]),
 (20080, [62]),
 (22645, [16905]),
 (24489, [24490]),
 (24490, [24489]),
 (32361, [62]),
 (34878, [58069]),
 (36375, [237]),
 (40387, [38]),
 (58069, [34878, 680])]

#### Find the alternative AS numbers of the alternatives. Add some manually 

In [8]:
alt_dict = alternative_asns_dict.copy()
altsOfAlts = {}
allVals = []
for key, vals in alt_dict.items():
    allVals = vals
    # print(key, vals)
    for aslist in alt_dict.values():
        temp=[]
        if key in aslist and len(aslist)>1:
            temp = aslist.copy()
            allVals.extend(list(set(temp)))
            
    allVals = list(set(allVals))
    if key in allVals:
        allVals.remove(key)
    altsOfAlts[key] = allVals

def addASNsManualy(asn1, asn2):
    if asn1 not in altsOfAlts.keys():
        altsOfAlts[asn1] = [asn2]
    else:
        temp = altsOfAlts[asn1]
        temp.extend(asn2)
        altsOfAlts[asn1]

addASNsManualy(513, 20969)
addASNsManualy(20969, 513)

sorted(altsOfAlts.items())

[(38, [40387]),
 (62, [20080, 32361]),
 (231, [237, 36375]),
 (237, [36375, 231]),
 (513, [20969]),
 (680, [58069, 34878]),
 (2907, [7660]),
 (7660, [2907]),
 (10680, [19782]),
 (16905, [22645]),
 (19782, [10680]),
 (20080, [32361, 62]),
 (20969, [513]),
 (22645, [16905]),
 (24489, [24490]),
 (24490, [24489]),
 (32361, [20080, 62]),
 (34878, [680, 58069]),
 (36375, [231, 237]),
 (40387, [38]),
 (58069, [680, 34878])]

In [13]:
def getStats(val_list):
    dicti = {}
    for idx, val_list, destination_reached, pair in relDf[[val_list,'destination_reached','pair']].itertuples():
        aslist = list(set(val_list))

        if 0 in aslist:
            aslist.remove(0)
        # print(idx, val_list, destination_reached)

        for n in aslist:
            
            # flag where the ASN appeared - first, last of middle
            for i in val_list:
                if n == val_list[0]:
                    pos = [1,0,0]
                elif n == val_list[-1]:
                    pos = [0,0,1]
                else:
                    pos = [0,1,0]

            if n not in dicti.keys():
                dicti[n] = {'reached': 0,
                            'not_reached':0,
                            'total':0,
                            'first':0,
                            'middle':0,
                            'last':0,
                            'pair':[]}

            if destination_reached:
                dicti[n]['reached'] = dicti[n]['reached']+1
            else: dicti[n]['not_reached'] = dicti[n]['not_reached']+1

            dicti[n]['total'] = dicti[n]['total']+1
            dicti[n]['first'] = dicti[n]['first']+pos[0]
            dicti[n]['middle'] = dicti[n]['middle']+pos[1]
            dicti[n]['last'] = dicti[n]['last']+pos[-1]
            
            temp = dicti[n]['pair']
            if pair not in temp:
                temp.append(pair)
                dicti[n]['pair'] = temp
                        
    return dicti

drasns = getStats('asns_updated')
drasnsDf = pd.DataFrame(drasns).T
display(drasnsDf[(drasnsDf['reached']==0)&(drasnsDf['total']>100)])

# drhops = getStats('hops')
# drhopsDf = pd.DataFrame(drhops).T
# display(drhopsDf[(drhopsDf['reached']==0)&(drhopsDf['total']>100)])


Unnamed: 0,reached,not_reached,total,first,middle,last,pair
2611,0,4637,4637,0,4,4633,"[2607:f388:101c:1000::442-2001:6a8:1080::16, 2..."
378,0,1790,1790,0,0,1790,"[192.108.47.6-192.114.101.125, 193.48.99.76-19..."
376,0,3536,3536,0,142,3394,"[134.75.125.241-132.206.245.253, 193.48.83.97-..."
2259,0,547,547,0,544,3,"[134.158.84.140-134.158.150.53, 193.171.188.21..."
3257,0,351,351,0,351,0,"[129.107.255.30-132.206.245.253, 129.107.255.2..."


In [14]:
asn = 3257
vals = drasns[asn]['pair']
print(len(vals))
for p in vals:
    print(asn, p)
    print(relDf[relDf['pair']==p]['asns_updated'].values[2:5])
    print(df[df['pair']==p][['src_site', 'dest_host']].values[0])
    print()

10
3257 129.107.255.30-132.206.245.253
[list([18515, 22645, 3257, 3257, 3257, 376, 0])
 list([18515, 22645, 3257, 3257, 3257, 376, 0])
 list([18515, 22645, 3257, 3257, 3257, 376, 0])]
['UTA_SWT2' 'ps-bandwidth.clumeq.mcgill.ca']

3257 129.107.255.29-132.206.245.252
[list([18515, 22645, 3257, 3257, 3257, 376, 0])
 list([18515, 22645, 3257, 3257, 3257, 376, 0])
 list([18515, 22645, 3257, 3257, 3257, 376, 0])]
['UTA_SWT2' 'ps-latency.clumeq.mcgill.ca']

3257 129.107.255.29-132.206.245.253
[list([18515, 22645, 3257, 3257, 3257, 376, 0])
 list([18515, 22645, 3257, 3257, 3257, 376, 0])
 list([18515, 22645, 3257, 3257, 3257, 376, 0])]
['UTA_SWT2' 'ps-bandwidth.clumeq.mcgill.ca']

3257 144.206.237.142-132.206.245.253
[list([59624, 59624, 9002, 9002, 3257, 3257, 3257, 376, 376])
 list([59624, 59624, 9002, 9002, 3257, 3257, 3257, 376, 376])
 list([59624, 59624, 9002, 9002, 3257, 3257, 3257, 376, 376])]
['RRC-KI' 'ps-bandwidth.clumeq.mcgill.ca']

3257 129.107.255.30-132.206.245.252
[list([18515, 

In [10]:
df_backup = df.copy()

In [4]:
df = df_backup.copy()

In [16]:
import hashlib
allListsRp,pairListRp = [],[]


def hashASNs(group):
    try:
        hex_list = []
        if len(group.asns_updated.values)>1:
#             print(group.asns.values)
            for g in group.asns_updated.values:
                # print('---',g)
                if g is not None and g==g:
                    asnList = list(set(g.copy()))

                if 0 in asnList: asnList.remove(0)
                
                hash_object = hashlib.sha1(str(asnList).encode())
                hex_dig = hash_object.hexdigest()
                
                if hex_dig not in hex_list:
                    # print(asnList,hex_dig)
                    hex_list.append(hex_dig)

                # pathDict[hex_dig] = {'src':group.name[0], 'dest':group.name[1], 
                #                      'asn':asnList, 'cnt_asn':len(asnList), 'cnt_total_measures':len(group.values)}
                
                    # print(group.name[0], group.name[1], asnList, len(asnList), len(group.values), hex_dig, g[0], g[-1])
                    
                    firsts = list(set(group['first'].values))
                    lasts = list(set(group['last'].values))
                    
                    if len(g)>0:
                        pairListRp.append([group.name[0], group.name[1], asnList, len(asnList), len(group.values), hex_dig, group.all_dests_reached.values[0], firsts, lasts])
                    # print(asnList)
                # print([group.name[0], group.name[1], asnList, len(asnList), len(group.values), hex_dig])
                
                if len(g)>0:
                    allListsRp.append([group.name[0], group.name[1], asnList, len(asnList), len(group.values), hex_dig])
                
    except Exception as e:
        print('Issue wtih:',group.name, asnList)
        print(e)
#         print()


# p = '130.246.176.109-148.6.8.251'
# relDf[relDf['pair']==p][['src','dest','asns_updated','hops_updated']].groupby(['src','dest']).apply(lambda x: hashASNs(x))
relDf[['src','dest','asns_updated','hops', 'all_dests_reached', 'first', 'last']].groupby(['src','dest']).apply(lambda x: hashASNs(x))



cleanRp = pd.DataFrame(pairListRp).rename(columns={0:'src', 1:'dest', 2:'asns_updated', 3:'cnt_asn', 4:'cnt_total_measures', 5:'hash', 6:'all_dests_reached', 7:'first', 8:'last'})


# cleanRp = pd.DataFrame(pathDict).T.reset_index().rename(columns={'index':'hash'})
cleanRp['pair'] = cleanRp['src']+'-'+cleanRp['dest']

cleanPathsRp = pd.DataFrame(allListsRp).rename(columns={0:'src', 1:'dest', 2:'asns_updated', 3:'cnt_asn', 4:'cnt_total_measures', 5:'hash'})
print(len(cleanPathsRp), len(cleanRp))


pathFreq = cleanPathsRp.groupby(['src','dest'])['hash'].apply(lambda x: x.value_counts(normalize=True))
pathFreq = pd.DataFrame(pathFreq).reset_index().rename(columns={'hash':'hash_freq','level_2':'hash'})

clean = pd.merge(cleanRp, pathFreq, how="inner", on=['src','dest','hash'])

sub = df[['dest', 'src_site', 'src', 'dest_site', 'src_host', 'dest_host', 'pair']].drop_duplicates()
clean = pd.merge(clean, sub, on=['pair','src', 'dest'], how='left')


707409 25699


In [9]:
freqAsBaseList = []

def getBaseline(group):
    try:
        cnt_max = max(group.cnt_asn.values)
        freq_max = max(group.hash_freq.values)

        cnt_max_position = [pos for pos, i in enumerate(group.cnt_asn) if i==cnt_max]
        freq_max_position = [pos for pos, i in enumerate(group.hash_freq) if i==freq_max]

        # print(group.cnt_asn.values)
        # print(cnt_max,freq_max)
        
        # print(cnt_max_position,freq_max_position)

        if freq_max>=0.75:
            # print('***',cnt_max, freq_max, cnt_max_position)
            diversity = 0
            position = freq_max_position[0]
            for pos in freq_max_position:
                # print(group.hash_freq.values[pos])
                if diversity < group.cnt_asn.values[pos]:
                    position = pos
                    diversity = group.cnt_asn.values[pos]
                # value, position = group.hash_freq.values[pos], pos if value < group.hash_freq.values[pos] else value,pos
            max_position = [position]
            freq_max = group.hash_freq.values[position]

        else: 
            # print('=======',cnt_max, freq_max, cnt_max_position)
            if len(cnt_max_position)>1:
                path_freq = 0
                for pos in cnt_max_position:
                    # print(group.hash_freq.values[pos])
                    if path_freq < group.hash_freq.values[pos]:
                        position = pos
                        path_freq = group.hash_freq.values[pos]
                    # value, position = group.hash_freq.values[pos], pos if value < group.hash_freq.values[pos] else value,pos
                # print(2)
                max_position = [position]
            else: 
                # print(3)
                max_position = cnt_max_position
                # cnt_max = group.cnt_asn.values[position]

        # print(freq_max_position, group.index.values[freq_max_position[0]])
        freqAsBaseList.append(group.index.values[max_position[0]])
        # print()
    except Exception as e:
        print('Issue wtih:',group,e)

    
clean[clean['hash_freq']<1].groupby('pair').apply(lambda x: getBaseline(x))
# clean[clean['pair']==list(diffFr)[62]].groupby('pair').apply(lambda x: getBaseline(x))

# clean[clean['hash_freq']<1].head(50).groupby('pair').apply(lambda x: getMax(x))
len(freqAsBaseList)

3492

In [17]:
baseLine = clean[clean.index.isin(freqAsBaseList)].copy()
compare2 = clean[~clean.index.isin(baseLine.index)].copy()

compare2

Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
1,,,"[291, 20965, 293, 789, 2200, 59]",6,4,031fa51279113d0e60de6a10bb4b8181245a1bbc,False,"[7896, 59, 229]","[786, 43115, 789]",-,0.250000,,,hcc-ps01.unl.edu,gridpp-ps-band.ecdf.ed.ac.uk
2,,,"[291, 20965, 293, 789, 2200, 59]",6,4,031fa51279113d0e60de6a10bb4b8181245a1bbc,False,"[7896, 59, 229]","[786, 43115, 789]",-,0.250000,,,hcc-ps02.unl.edu,picperfsonar-latency.pic.es
3,,,"[291, 20965, 293, 789, 2200, 59]",6,4,031fa51279113d0e60de6a10bb4b8181245a1bbc,False,"[7896, 59, 229]","[786, 43115, 789]",-,0.250000,,,psmsu02.aglt2.org,lpnhe-psl.in2p3.fr
4,,,"[7896, 786, 20965]",3,4,f070524c1d64bdd9e153f06b68e2b05c87fc9cd8,False,"[7896, 59, 229]","[786, 43115, 789]",-,0.250000,,,perfsonar01.hep.wisc.edu,llrpsonar2.in2p3.fr
5,,,"[7896, 786, 20965]",3,4,f070524c1d64bdd9e153f06b68e2b05c87fc9cd8,False,"[7896, 59, 229]","[786, 43115, 789]",-,0.250000,,,hcc-ps01.unl.edu,gridpp-ps-band.ecdf.ed.ac.uk
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26103,85.122.31.119,81.180.86.62,[2614],1,72,fa3ba04018aa660e475494097c242d7ecf66ff56,True,"[12675, 2614]",[2614],85.122.31.119-81.180.86.62,0.055556,RO-16-UAIC,RO-07-NIPNE,perfsonar-grid.uaic.ro,perfsonar1.nipne.ro
26105,85.122.31.119,81.180.86.64,[2614],1,35,fa3ba04018aa660e475494097c242d7ecf66ff56,True,"[12675, 2614]",[2614],85.122.31.119-81.180.86.64,0.057143,RO-16-UAIC,RO-07-NIPNE,perfsonar-grid.uaic.ro,perfsonar2.nipne.ro
26106,85.122.31.119,85.122.31.120,[12675],1,36,8e027d2915563b8ffa10c4358834b43342eb8c2b,True,[None],[None],85.122.31.119-85.122.31.120,1.000000,RO-16-UAIC,RO-16-UAIC,perfsonar-grid.uaic.ro,perfsonar2-grid.uaic.ro
26107,85.122.31.119,90.147.67.252,"[137, 12675, 20965, 2614]",4,36,d4dfb5c8c20e8a8cd89c786013687bc0e94e079e,False,[12675],[137],85.122.31.119-90.147.67.252,1.000000,RO-16-UAIC,INFN-NAPOLI-ATLAS,perfsonar-grid.uaic.ro,perfsonar2.na.infn.it


In [224]:
p = list(diffFr)[84]

display(baseLine[baseLine['pair']==p])
display(compare2[compare2['pair']==p])

Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
16810,2001:610:108:203a::31,2001:760:4205:254::12,"[513, 137, 1103]",3,67,328821b1bcb35a8deb7741425b864da4367088dd,False,[1103],[137],2001:610:108:203a::31-2001:760:4205:254::12,0.507463,SARA-MATRIX,INFN-T1,perfsonar-latency.grid.surfsara.nl,perfsonar-ow.cnaf.infn.it


Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
16811,2001:610:108:203a::31,2001:760:4205:254::12,"[137, 20965, 1103]",3,67,d58424671825fffb19a8d0d6c7cc2f6ca13f6273,False,[1103],[137],2001:610:108:203a::31-2001:760:4205:254::12,0.492537,SARA-MATRIX,INFN-T1,perfsonar-latency.grid.surfsara.nl,perfsonar-ow.cnaf.infn.it


In [225]:
p = list(diffFr)[86]

display(baseLine[baseLine['pair']==p])
display(compare2[compare2['pair']==p])

Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
16915,2001:610:108:203a::32,2001:67c:1148:204::250,"[43115, 20965, 766, 1103]",4,67,d189b5f69fa1c4c41e9ad51ff06bbc36c58213b8,False,[1103],"[43115, 766]",2001:610:108:203a::32-2001:67c:1148:204::250,0.492537,SARA-MATRIX,pic,perfsonar-bandwidth.grid.surfsara.nl,picperfsonar-bandwidth.pic.es


Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
16916,2001:610:108:203a::32,2001:67c:1148:204::250,"[513, 20965, 43115, 1103]",4,67,8be6fd9e4c75b0cddbfc139196602acc09dbba4e,False,[1103],"[43115, 766]",2001:610:108:203a::32-2001:67c:1148:204::250,0.492537,SARA-MATRIX,pic,perfsonar-bandwidth.grid.surfsara.nl,picperfsonar-bandwidth.pic.es
16917,2001:610:108:203a::32,2001:67c:1148:204::250,"[20965, 766, 1103]",3,67,2906cedbd8966dab46eb04670a6f283e9bbff8b7,False,[1103],"[43115, 766]",2001:610:108:203a::32-2001:67c:1148:204::250,0.014925,SARA-MATRIX,pic,perfsonar-bandwidth.grid.surfsara.nl,picperfsonar-bandwidth.pic.es


20080

In [115]:
p = list(diffFr)[174]
print('diff:',diffFr[p])


display(baseLine[baseLine['pair']==p])
display(compare2[compare2['pair']==p])


print(p)
print()


# print(df[df['pair']==p][['hops']].values[2])
print()

print(relDf[relDf['pair']==p]['asns_updated'].values[0])
print(relDf[relDf['pair']==p]['hops'].values[0])

print()
print(relDf[relDf['pair']==p]['asns_updated'].values[1])
print(relDf[relDf['pair']==p]['hops'].values[1])


# print(relDf[relDf['pair']==p]['asns'])

diff: [789]


Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
19405,2001:660:5009:9:193:48:99:76,2001:67c:1148:203::242,"[2200, 513, 43115, 20965]",4,67,1ecfaf1d6aad85102708cdac02717007af7c310a,False,"[2200, 789]",[43115],2001:660:5009:9:193:48:99:76-2001:67c:1148:203...,0.343284,IN2P3-CC,pic,ccperfsonar2.in2p3.fr,picperfsonar-latency.pic.es


Unnamed: 0,src,dest,asns_updated,cnt_asn,cnt_total_measures,hash,all_dests_reached,first,last,pair,hash_freq,src_site,dest_site,src_host,dest_host
19404,2001:660:5009:9:193:48:99:76,2001:67c:1148:203::242,"[513, 43115, 789]",3,67,c47d1d97a0489116ed77001e6d6455f4ce8bc53f,False,"[2200, 789]",[43115],2001:660:5009:9:193:48:99:76-2001:67c:1148:203...,0.164179,IN2P3-CC,pic,ccperfsonar2.in2p3.fr,picperfsonar-latency.pic.es
19406,2001:660:5009:9:193:48:99:76,2001:67c:1148:203::242,"[513, 20965, 43115, 789]",4,67,5a702fdd6c197a8173790c854c922dc75e2a499f,False,"[2200, 789]",[43115],2001:660:5009:9:193:48:99:76-2001:67c:1148:203...,0.328358,IN2P3-CC,pic,ccperfsonar2.in2p3.fr,picperfsonar-latency.pic.es
19407,2001:660:5009:9:193:48:99:76,2001:67c:1148:203::242,"[2200, 513, 43115]",3,67,d84a22d14a15a76ad749c3673e3de20d7e5ba923,False,"[2200, 789]",[43115],2001:660:5009:9:193:48:99:76-2001:67c:1148:203...,0.164179,IN2P3-CC,pic,ccperfsonar2.in2p3.fr,picperfsonar-latency.pic.es


2001:660:5009:9:193:48:99:76-2001:67c:1148:203::242


[789, 513, 513, 513, 43115]
['193.48.99.100', '192.16.166.193', '192.16.166.213', '192.16.166.58', '193.109.172.242']

[2200, 513, 513, 20965, 43115]
['2001:660:5009:9:193:48:99:100', '2001:1458:302:31::1', '2001:1458:302:35::1', '2001:798:111:1::11', '2001:67c:1148:203::242']


In [148]:
# df[(df['dest']=='')|(df['src']=='')][['src','src_host','dest','dest_host']].drop_duplicates()


s = df[(df['src']=='')][['src_host']].drop_duplicates()

s.values

array([['perfsonar2.ultralight.org'],
       ['hcc-ps01.unl.edu'],
       ['hcc-ps02.unl.edu'],
       ['perfsonar01.hep.wisc.edu'],
       ['perfsonar-cms2.itns.purdue.edu'],
       ['perfsonar-cms1.itns.purdue.edu'],
       ['psmsu02.aglt2.org'],
       ['lhcmon3.bnl.gov'],
       ['perfsonar.ultralight.org'],
       ['perfsonar02.hep.wisc.edu']], dtype=object)

In [153]:
d = df[(df['dest']=='')][['dest_host']].drop_duplicates()
d.values.tolist()

[['perfson1.ppgrid1.rhul.ac.uk'],
 ['ps-02.lnl.infn.it'],
 ['ps02.iihe.ac.be'],
 ['ps-bandwidth.sfu.westgrid.ca'],
 ['psonar2.lal.in2p3.fr'],
 ['psfrascati02.lnf.infn.it'],
 ['ps-gsdc02.sdfarm.kr'],
 ['perfsonar-de-kit.gridka.de'],
 ['lcgperf.shef.ac.uk'],
 ['marperf01.in2p3.fr'],
 ['llrpsonar2.in2p3.fr'],
 ['gridpp-ps-band.ecdf.ed.ac.uk'],
 ['picperfsonar-latency.pic.es'],
 ['perfsonar02.ft.uam.es'],
 ['psific01.ific.uv.es'],
 ['lpnhe-psl.in2p3.fr'],
 ['perfsonar-bw.hepgrid.uerj.br'],
 ['ps-gsdc01.sdfarm.kr'],
 ['perfsonar2-grid.uaic.ro'],
 ['lhcb-perf.nipne.ro'],
 ['atrogr007.nipne.ro'],
 ['perfsonar03-iep-grid.saske.sk'],
 ['perfsonar2.nipne.ro'],
 ['t1-pfsn2.jinr-t1.ru'],
 ['psific02.ific.uv.es'],
 ['perfsonar1.nipne.ro'],
 ['lapp-ps01.in2p3.fr'],
 ['perfsonar-lt.hepgrid.uerj.br'],
 ['ccperfsonar2.in2p3.fr'],
 ['perfsonar01.ft.uam.es']]

In [27]:
import collections

toCheckFr, diffsFr = [],[]
diffFr = {}


def getChanged(group):
    try:
        # print(group.name)
        base = baseLine[baseLine['pair']==group.name]['asns_updated'].values.tolist()[0]
        
        casns = compare2[compare2['pair']==group.name]['asns_updated'].tolist()
        asns_expanded = list([j for i in casns for j in i])
        counter=collections.Counter(asns_expanded)

        flag = False
        diff_temp = []
        
        

        for i, asns in enumerate(group.asns_updated):
            first = group['first'].values[i]
            last = group['last'].values[i]
            freq = group['hash_freq'].values[i]

            diff = list(set(asns)-set(base))
    

            # for d in diff:
            #     if d in first:
            #         diff.remove(d)
                    
                # if len(diff)>0 and group.all_dests_reached.values[0]==True:
                #      if d in last:
                #         diff.remove(d)
            
                
            if len(diff)>0:
                for d in diff:                
                    if d in altsOfAlts.keys():
                        flag = all([False if alt in base else True for alt in altsOfAlts[d]])
                    else: flag = True


            if len(diff)>0:
                diff_temp.extend(diff)

        # if flag and len(base)>2 and (freq<0.48 or freq>0.51):
        if flag and len(base)>=2:
        # if flag:
            toCheckFr.append(group.name)
            diffsFr.append(list(set(diff_temp)))
            diffFr[group.name] = list(set(diff_temp))
    except Exception as e:
        print('Issue wtih:',group.name,e)
        print(base)


sites2remove = ['ATLAS-CBPF', 'NCP-LCG2', 'UTA_SWT2', 'RRC_KI']
cut = compare2[(~compare2['src_site'].isin(sites2remove))&(~compare2['dest_site'].isin(['NCP-LCG2', 'UTA_SWT2', 'RRC_KI']))]

cut[(cut['hash_freq']<1)&~(cut['dest']=='')&~(cut['src']=='')].groupby('pair').apply(lambda x: getChanged(x))

# cut[(cut['pair']==p)].groupby('pair').apply(lambda x: getChanged(x))

len(toCheckFr)

383

In [22]:
import collections

toCheckFr, diffsFr = [],[]
diffFr = {}


def getChanged(group):
    try:
        # print(group.name)
        base = baseLine[baseLine['pair']==group.name]['asns_updated'].values.tolist()[0]
        
        casns = compare2[compare2['pair']==group.name]['asns_updated'].tolist()
        asns_expanded = list([j for i in casns for j in i])
        counter=collections.Counter(asns_expanded)

        flag = False
        diff_temp = []
        
        

        for i, asns in enumerate(group.asns_updated):
            first = group['first'].values[i]
            last = group['last'].values[i]
            freq = group['hash_freq'].values[i]

            diff = list(set(asns)-set(base))
    

            for d in diff:
                if d in first:
                    diff.remove(d)
                    
                if len(diff)>0 and group.all_dests_reached.values[0]==True:
                     if d in last:
                        diff.remove(d)
            
            if len(diff)>0:
                for d in diff:                
                    if d in altsOfAlts.keys():
                        if not altsOfAlts[d] in base:
                            flag = True
                    else: flag = True


            if len(diff)>0:
                diff_temp.extend(diff)

        # if flag and len(base)>2 and (freq<0.48 or freq>0.51):
        if flag and len(base)>2:
            toCheckFr.append(group.name)
            diffsFr.append(list(set(diff_temp)))
            diffFr[group.name] = list(set(diff_temp))
    except Exception as e:
        print('Issue wtih:',group.name,e)
        print(base)
        print(group.first[i])

p = '10.10.0.2-131.169.168.30'

cut = compare2[(~compare2['src_site'].isin(['NCP-LCG2', 'UTA_SWT2', 'RRC_KI']))&(~compare2['dest_site'].isin(['NCP-LCG2', 'UTA_SWT2', 'RRC_KI']))]
cut[(cut['hash_freq']<1)&~(cut['dest']=='')&~(cut['src']=='')].groupby('pair').apply(lambda x: getChanged(x))
# cut[(cut['pair']==p)].groupby('pair').apply(lambda x: getChanged(x))



# cut[cut['pair']==list(diffFr)[3]].groupby('pair').apply(lambda x: getChanged(x))
len(toCheckFr)

616

In [237]:
# 482 - remove first
# down to
# 363 - remove last if all reached
# 312 - remove all baseline < 3
###### 304 - freq<0.48 or freq>0.51
# 302 - ~(cut['dest']=='')&~(cut['src']=='')

In [31]:
frl = []
for pair, diff in diffFr.items():
    for d in diff:
        frl.append([pair,d])
        
        
freqdf = pd.DataFrame(frl, columns=['pair', 'diff'])

sub = df[['dest', 'src_site', 'src', 'dest_site', 'src_host', 'dest_host', 'pair']].drop_duplicates()
freqdf = pd.merge(freqdf, sub, on='pair', how='left')
freqdf.groupby(['src_site','dest_site']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,pair,diff,dest,src,src_host,dest_host
src_site,dest_site,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AGLT2,UAM-LCG2,10,10,10,10,10,10
BEIJING-LCG2,JINR-LCG2,3,3,3,3,3,3
BEIJING-LCG2,JINR-T1,3,3,3,3,3,3
BNL-ATLAS,CBPF,4,4,4,4,4,4
BNL-ATLAS,IN2P3-CC,2,2,2,2,2,2
...,...,...,...,...,...,...,...
UKI-SCOTGRID-ECDF,JINR-LCG2,1,1,1,1,1,1
UKI-SOUTHGRID-RALPP,CBPF,4,4,4,4,4,4
USCMS-FNAL-WC1,UCSDT2,1,1,1,1,1,1
WT2,UAM-LCG2,2,2,2,2,2,2


In [30]:

import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import plotly as py
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import collections



from plotly.subplots import make_subplots
import plotly.graph_objects as go




d3 = {k:v for k,v in diffFr.items()}


pairs = d3
start,stop = 0,len(d3)
# start,stop = 150,len(d3)




def plotVals(pair, diff):
    vectors = relDf[(relDf['pair']==pair)]['asns_updated'].tolist()

    asnsExpanded = list([j for i in vectors for j in i])
    asnDisplayVals = {i:pos+1 for pos, i in enumerate(set(asnsExpanded))}
    numMeasures = len(vectors)
    positions = max([len(l) for l in vectors])

    # asns_expanded = list([j for i in clean[(clean['pair']==pair)]['asns_updated'].tolist() for j in i])
    # counter=collections.Counter(asns_expanded)
    
    asnDisplaycolors = {i:10 if i in diff else 1 for i in list(set(asnsExpanded))}
    
    def giveColorEach(l):
        return [asnDisplaycolors[i] for i in l]

    colored = [giveColorEach(j) for j in vectors]
    
    # print(colored)
    
    return [vectors, colored]


def getData(diffs):
    lv, tl = [],[]
    # for i in range(start, stop):
    i = 0
    for pair, diff in diffs.items():
        # print(pair, diff)
        lv.append(plotVals(pair, diff))

        sites = clean[(clean["pair"]==pair)][["src_site", "dest_site"]].drop_duplicates().values[0].tolist()
        # print(pair)
        # print(f'{i} Baseline: {baseLineDfFr[baseLineDfFr["pair"]==pair]["asn"].values[0]}  Diffs: {diff}')
        tl.append(f'{i} {pair} {sites[0]}-{sites[1]} Baseline: {baseLine[baseLine["pair"]==pair]["asns_updated"].values[0]}  Diffs: {diff}')
        # tl.append(f'{i}  {pair}  Diffs: {diff}')
        i+=1


    return [lv, tl]


num_rows = round((stop-start)/2)
# num_rows = 1
dataFr = getData(d3)
# height = 

fig = make_subplots(rows=num_rows, cols=2, 
                    subplot_titles=dataFr[1],
                   horizontal_spacing = 0.03, vertical_spacing = 0.001  
                   )

i=0
for row in range(1,num_rows+1):
    for col in [1,2]:
        if i<len(pairs)-start:
            fig.add_trace(
                go.Heatmap(
                    z=dataFr[0][i][1],
                    text=dataFr[0][i][0],
                    texttemplate="%{text}",
                ),
                row=row, col=col
            )
            i+=1
        

fig.update_layout(template='plotly_white',
                  height=90000
                 )
fig.update_annotations(font_size=12)
fig.update_traces(showscale=False)
py.offline.plot(fig)

'temp-plot.html'

In [35]:
### Compare!!
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import plotly as py
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import collections



from plotly.subplots import make_subplots
import plotly.graph_objects as go


# onesideonly = list(set(diffFr.keys())-set(diffUp.keys()))


# len(onesideonly)

# d1 = {k:v for k,v in diffUp.items() if k in intersection[:200]}
# d3 = {k:v for k,v in diffFr.items() if k in onesideonly[:200]}
d3 = {k:v for k,v in diffFr.items()}


pairs = d3
start,stop = 0,200



def plotVals(pair, diff):
    vectors = df[(df['pair']==pair)]['asns'].tolist()

    asnsExpanded = list([j for i in vectors for j in i])
    asnDisplayVals = {i:pos+1 for pos, i in enumerate(set(asnsExpanded))}
    numMeasures = len(vectors)
    positions = max([len(l) for l in vectors])

    asns_expanded = list([j for i in vectors for j in i])
    counter=collections.Counter(asns_expanded)
    
    asnDisplaycolors = {i:1 if i not in diff else 10 for i in list(set(asnsExpanded))}
    
    def giveColorEach(l):
        return [asnDisplaycolors[i] for i in l]

    colored = [giveColorEach(j) for j in vectors]
    
    # print(colored)
    
    return [vectors, colored]


def getData(start,stop,diffs):
    lv, tl = [],[]
    # for i in range(start, stop):
    i = 0
    for pair, diff in diffs.items():
        
        lv.append(plotVals(pair, diff))

        sites = df[(df["pair"]==pair)][["src_site", "dest_site"]].drop_duplicates().values[0].tolist()
        # print(pair)
        # print(f'{i} Baseline: {baseLineDfFr[baseLineDfFr["pair"]==pair]["asn"].values[0]}  Diffs: {diff}')
        tl.append(f'{i} {sites[0]}-{sites[1]} Baseline: {baseLineDfFr[baseLineDfFr["pair"]==pair]["asn"].values[0]}  Diffs: {diff}')
        # tl.append(f'{i}  {pair}  Diffs: {diff}')
        i+=1


    return [lv, tl]


num_rows = round((stop-start)/2)
dataFr = getData(start,stop, d3)

fig = make_subplots(rows=num_rows, cols=2, 
                    subplot_titles=dataFr[1],
                   horizontal_spacing = 0.03, vertical_spacing = 0.002)

i=0
for row in range(1,num_rows+1):
    for col in [1,2]:
        if i<len(pairs)-start:
            fig.add_trace(
                go.Heatmap(
                    z=dataFr[0][i][1],
                    text=dataFr[0][i][0],
                    texttemplate="%{text}",
                ),
                row=row, col=col
            )
            i+=1
        

fig.update_layout(template='plotly_white',
                  height=40000
                 )
fig.update_annotations(font_size=12)
fig.update_traces(showscale=False)
py.offline.plot(fig)

'temp-plot.html'