### Description:

The purpose of that lab is to compare 2 versions of the "Path changed" alarms, both of which look for unusual ASNs in the traceroute tests.

The first version is a step-by-step algorithm following rules derived by observations over a long period of time. The second verion uses Autoencoder which tries to reconstrunct the ASN sequnces and then looks for anomalies based on the recostruction errors.

The idea is analyse the results of the 2 methods and use the best for generation alarms in the future.

### The first block of code is common. Run all cells.


In [1]:
from elasticsearch.helpers import scan, parallel_bulk
from concurrent.futures import ProcessPoolExecutor
import pandas as pd
import json
import requests
import collections
import hashlib
import traceback

import helpers as hp
from helpers import timer


# Builds the trceroute query
def queryPSTrace(dt):
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "range": {
                            "timestamp": {
                                "gt": dt[0],
                                "lte": dt[1],
                                "format": "epoch_millis"
                            }
                        }
                    }
                ]
            }
        }
    }
    # print(str(query).replace("\'", "\""))
    try:
        return scan_gen(scan(hp.es, index="ps_trace", query=query,
                             filter_path=['_scroll_id', '_shards', 'hits.hits._source'],
                             _source=['timestamp', 'src_netsite', 'dest_netsite', 'src', 'dest',  'src_host', 'dest_host', \
                                      'destination_reached', 'asns', 'hops', 'pair', 'ttls']))
    except Exception as e:
        print(e)


def scan_gen(scan):
    while True:
        try:
            yield next(scan)['_source']
        except Exception:
            break


# gets the data from ES
def ps_trace(dt):
    scan_gen = queryPSTrace(dt)
    items = []

    for meta in scan_gen:
        items.append(meta)

    return items


# queries in chunks based on time ranges
def getTraceData(dtRange):
    traceData = ps_trace(dtRange)
    if len(traceData) > 0:
        print(f'For {dtRange} fetched: {len(traceData)}')
    return traceData


# laods the data in parallel
@timer
def runInParallel(dateFrom, dateTo):
    # query the past 12 hours and split the period into 8 time ranges
    # dateFrom, dateTo = hp.defaultTimeRange(12)
    # dateFrom, dateTo = ['2022-05-17 20:15', '2022-05-18 08:15']
    print(f' Run for period: {dateFrom}  -   {dateTo}')
    dtList = hp.GetTimeRanges(dateFrom, dateTo, 24)
    result = []
    with ProcessPoolExecutor(max_workers=len(dtList)) as pool:
        result.extend(pool.map(getTraceData, [[dtList[i], dtList[i+1]] for i in range(len(dtList)-1)]))

    data = []
    for d in result:
        data.extend(d)
    return data

# The traceroute measures provide a list of IP adresses
# as well as a list of correcponding AS numbers
# Below we map IP->ASN and ASN->IP
@timer
def mapHopsAndASNs(df):

    asn2ip, ip2asn = {}, {}
    strange = []

    subset = df[['asns', 'hops', 'pair', 'ttls']].values.tolist()
    # max_ttl is needed when we later build a dataframe where each column is a ttl number
    max_ttl = 0
    try:
        for asns, hops, pair, ttls in subset:
            if ttls:
                if max(ttls) > max_ttl:
                    max_ttl = max(ttls)

            if len(asns) == len(hops):
                for i in range(len(asns)):
                    if asns[i] not in asn2ip.keys():
                        asn2ip[asns[i]] = [hops[i]]
                    else:
                        temp = asn2ip[asns[i]]
                        if hops[i] not in temp:
                            temp.append(hops[i])
                            asn2ip[asns[i]] = temp

                    if hops[i] not in ip2asn.keys():
                        ip2asn[hops[i]] = []

                    if asns[i] not in ip2asn[hops[i]]:
                        ip2asn[hops[i]].append(asns[i])

            else:
                print('Size of hops and ASNs differ. This should not happen')
                strange.append([pair, asns, hops])

    except Exception as e:
        print(e)
        print(asns, hops, pair, ttls)

    return asn2ip, ip2asn, max_ttl


# Sometimes the AS number is not available and a 0 is stored instead
# However, we can repair part of the information by looking up the IP address at that position

# @timer
def fix0ASNs(df):

    zfix = []
    relDf = df[['src', 'dest', 'asns', 'hops', 'pair',
                'destination_reached', 'timestamp', 'ttls']].copy()

    relDf['asns_updated'] = relDf['asns']

    # print('Attempt to fix unknown ASNs based on mapped IP addresses...')
    c = 0

    try:
        for idx, asns, hops, s, d in relDf[['asns', 'hops', 'src', 'dest']].itertuples():

            if len(asns)>0 and 0 in asns:
                asns_updated = asns.copy()

                positions = [pos for pos, n in enumerate(asns) if n==0]

                for pos in positions:
                    # when AS number is 0 (unknown) get the IP at this position and
                    # find all ASNs for it, usually it is just 1
                    ip = hops[pos]
                    asns4IP = ip2asn[ip]

                    if 0 in asns4IP:
                        asns4IP.remove(0)

                    if asns4IP:
                        if len(asns4IP) < 3:
                            # replace 0 with the known ASN for that IP
                            asns_updated[pos] = asns4IP[0]
                            # if len(asns4IP) > 1:
                            #     # when there are 2 we add both
                            #     asns_updated.append(asns4IP[1])
                            if idx not in zfix:
                                zfix.append(idx)

                        else:
                            print('Too many possibilities ...', idx, asns, pos)

                relDf.at[idx, 'asns_updated'] = asns_updated

                if c>0 and c%50000 == 0:
                    print(f'Processed {c}', flush=True)
                c+=1

        print(f'{len(zfix)} zeros successfully replaced with AS numbers.', flush=True)
        return relDf
    except Exception as e:
        print(idx, asns, hops, s, d)
        print(e, traceback.format_exc())


# Gets all ASNs for each site name from CRIC
# They will later be used as alternatives to each other
@timer
def getCricASNInfo():

    cricDict = {}

    response = requests.get(
        "https://wlcg-cric.cern.ch/api/core/rcsite/query/list/?json", verify=False)
    rcsites = json.loads(response.text)

    cricsites = {}

    for rcsite in rcsites:
        temp = []
        for netsite in rcsites[rcsite]['netsites']:
            for netroute in rcsites[rcsite]['netroutes']:
                if rcsites[rcsite]['netroutes'][netroute]["netsite"] == netsite or \
                   rcsites[rcsite]['netroutes'][netroute]["netsite_spare"] == netsite:

                    for iptype in rcsites[rcsite]['netroutes'][netroute]["networks"]:
                        for subnet in rcsites[rcsite]['netroutes'][netroute]["networks"][iptype]:
                            asn = rcsites[rcsite]['netroutes'][netroute]["asn"]
                            if asn not in temp:
                                temp.append(asn)
            if temp:
                cricsites[rcsite] = temp

    cricDict = {}
    for key, vals in cricsites.items():
        # print(vals)
        if len(vals) > 1:
            for v in vals:
                if v not in cricDict.keys():
                    cricDict[v] = vals.copy()
                    cricDict[v].remove(v)
                    # print(cricDict)
                else:
                    print('                currvals', cricDict[v])

    return cricDict


# Builds a dictionalry of ASNs, where the key is a single AS number and
# the values are the alaternative ASNs dicovered through the hops mapping
@timer
def getAltASNs(asn2ip, ip2asn):
    alt_dict = {}
    for asn, ip_list in asn2ip.items():

        if asn != 0:
            for ip in ip_list:

                others = ip2asn[ip]
                if 0 in others:
                    others.remove(0)

                if len(others) == 2:
                    alt = [el for el in ip2asn[ip] if el != asn]
                    if len(alt) > 1:
                        print(f'There are >1 alternatives to {asn}: {alt}')

                    if asn not in alt_dict.keys():
                        alt_dict[asn] = [alt[0]]
                    else:
                        if alt[0] not in alt_dict[asn]:
                            alt_dict[asn].append(alt[0])

                elif len(others) > 2:
                    print(asn, others)
                    print('There are more possibilities ........................')
    return alt_dict


# Grabs also the alternative ASNs of the altenrnatives
@timer
def getAltsOfAlts(altASNsDict):
    alt_dict = altASNsDict.copy()
    altsOfAlts = {}
    allVals = []
    for key, vals in alt_dict.items():
        allVals = vals
        # print(key, vals)
        for asn_list in alt_dict.values():
            temp = []
            if key in asn_list and len(asn_list) > 1:
                temp = asn_list.copy()
                allVals.extend(list(set(temp)))

        allVals = list(set(allVals))
        if key in allVals:
            allVals.remove(key)
        altsOfAlts[key] = allVals

    return altsOfAlts


# Adds known ASNs manuaaly
def mapASNsManualy(asn1, asn2, altsOfAlts):
    if asn1 not in altsOfAlts.keys():
        altsOfAlts[asn1] = [asn2]
    else:
        temp = altsOfAlts[asn1]
        temp.extend(asn2)
        altsOfAlts[asn1]


# Builds a dataframe that strips all repeated values and
# hashes each clean path
# Also calculates what % a path gets used for the given period
@timer
def getStats4Paths(relDf, df):
    allPathsList, uniquePathsList = [], []

    def hashASNs(group):
        try:
            hashList = []
            if len(group.asns_updated.values) > 1:
                # print(group.asns_updated.values)
                for i, g in enumerate(group.asns_updated.values):
                    if g is not None and g == g:
                        # take only the unique AS numbers on the list
                        asnList = list(dict.fromkeys(g.copy()))

                        # remove remaining zeros (unknowns)
                        if 0 in asnList:
                            asnList.remove(0)

                        # hash the path and keep the order
                        hashid = hash(frozenset(asnList))

                        if hashid not in hashList:
                            hashList.append(hashid)

                            if len(g) > 0:
                                # store just the unique sequences since
                                # Pandas has limitted functions on dataframes with lists
                                uniquePathsList.append([group.name[0], group.name[1], asnList,
                                                        len(asnList), len(group.values), hashid, group.timestamp.values[i]])
                        if len(g) > 0:
                            # store all values + the cleaned paths and hashes,
                            # so that we can get the probabilities later
                            allPathsList.append([group.name[0], group.name[1], asnList, len(asnList),
                                                len(group.values), hashid, group.destination_reached.values[i]])
        except Exception as e:
            print('Issue wtih:', group.name, asnList)
            print(e)

    relDf[['src', 'dest', 'asns_updated', 'hops', 'destination_reached', 'timestamp']].\
        groupby(['src', 'dest']).apply(lambda x: hashASNs(x))

    uniquePaths = pd.DataFrame(uniquePathsList).rename(columns={
        0: 'src', 1: 'dest', 2: 'asns_updated',
        3: 'cnt_asn', 4: 'cnt_total_measures', 5: 'hash',  6: 'dt'
    })
    uniquePaths['pair'] = uniquePaths['src']+'-'+uniquePaths['dest']

    cleanPathsAllTests = pd.DataFrame(allPathsList).rename(columns={
        0: 'src', 1: 'dest', 2: 'asns_updated', 3: 'cnt_asn',
        4: 'cnt_total_measures', 5: 'hash', 6: 'dest_reached'
    })

    # for each hashed path check if all tests reported destination_reached=True
    pathReachedDestDf = cleanPathsAllTests.groupby('hash').\
        apply(lambda x: True if all(x.dest_reached) else False).\
        to_frame().rename(columns={0: 'path_always_reaches_dest'})

    # get the probability for each path in a column (hash_freq)
    pathFreq = cleanPathsAllTests.groupby(['src', 'dest'])['hash'].\
        apply(lambda x: x.value_counts(normalize=True)).to_frame()
    pathFreq = pathFreq.reset_index().rename(columns={'hash': 'hash_freq', 'level_2': 'hash'})

    # finally merge with the rest of the dataframes in order to add all available fields
    pathDf = pd.merge(uniquePaths, pathFreq, how="inner", on=['src', 'dest', 'hash'])
    sub = df[['dest', 'src_site', 'src', 'dest_site', 'src_host', 'dest_host', 'pair']].drop_duplicates()
    pathDf = pd.merge(pathDf, sub, on=['pair', 'src', 'dest'], how='inner').drop_duplicates(subset=['hash', 'pair'])
    pathDf = pd.merge(pathDf, pathReachedDestDf, how="left", on=['hash'])

    return pathDf, cleanPathsAllTests


# Takes one path as a baseline depending on the % usage and the number of unique ASNs
# Separates those from the rest of the paths.
# Returns the 2 dataframes
@timer
def getBaseline(dd):
    baselineList = []

    for name, group in dd.groupby('pair'):
        try:
            cnt_max = max(group.cnt_asn.values)
            freq_max = max(group.hash_freq.values)

            cnt_max_position = [pos for pos, i in enumerate(group.cnt_asn) if i == cnt_max]
            freq_max_position = [pos for pos, i in enumerate(group.hash_freq) if i == freq_max]

            max_position = -9999

            # if path was used 65% of the time, take it for a baseline
            if freq_max >= 0.65:
                position = freq_max_position[0]
                max_position = position
                freq_max = group.hash_freq.values[position]
            # if not, get the path with the highest count of unique ASNs
            else:
                # in case there are >1 paths with the same # of ASNs, take the one most frequently taken
                if len(cnt_max_position) > 1:
                    path_freq = 0
                    for pos in cnt_max_position:
                        if path_freq < group.hash_freq.values[pos]:
                            position = pos
                            path_freq = group.hash_freq.values[pos]
                    max_position = position
                else:
                    max_position = cnt_max_position[0]

            baselineList.append(group.index.values[max_position])

        except Exception as e:
            print('EXCEPTION:', e, name)
            print()
            print(group.cnt_asn.values)
            print(group.hash_freq.values)
            print(group.index.values[max_position])
            print(max_position, group.index.values[max_position], group.index.values[max_position])

    # the dataframe containing one path as a baseline per pair
    baseLine = dd[dd.index.isin(baselineList)].copy()
    # the dataframe containing all the remaining paths
    compare2 = dd[~dd.index.isin(baseLine.index)].copy()
    print(f' Baseline: {len(baseLine)} \n left to compare to: {len(compare2)}')

    return [baseLine, compare2]


# Compares each path to the baseline. Does that for each pair and flags the ASN that are not on the baseline path
# Returns a dictionary of pairs and a list of flagged AS numbers
@timer
def getChanged(baseDf, compare2, updatedbaseLine, altsOfAlts, cricDict, cut):

    diffs = {}

    # look at all traceroute tests for each pair
    for name, group in cut.groupby('pair'):
        try:
            base = baseDf[baseDf['pair'] == name]['asns_updated'].values.tolist()[0]

            upbase = []
            if name in updatedbaseLine['pair'].values:
                upbase = updatedbaseLine[updatedbaseLine['pair'] == name]['asns_updated'].values.tolist()[
                    0]

            casns = compare2[compare2['pair'] == name]['asns_updated'].tolist()
            asns_expanded = list([j for i in casns for j in i])
            counter = collections.Counter(asns_expanded)

            flag = False
            diff_temp = []

            alarms = []


            # NEW FILTER: if there's just 1 alternative path and its usage is in less than 1% of the time, ignore the anomaly
            if not (len(group.asns_updated.values)==1 and group.hash_freq.values[0]<=0.01):
                for i, asns in enumerate(group.asns_updated):
                    diff = list(set(asns)-set(base))
    
                    if len(diff) > 0:
    
                        for d in diff:
    
                            if d not in upbase:
                                if d in altsOfAlts.keys():
                                    # if none of the alternative ASNs is in the baseline path or the updated baseline list,
                                    # then flag it to True (meaning raise an alarm)
                                    flag = not any(False if alt not in base or alt in upbase else True
                                                   for alt in altsOfAlts[d])
                                    # print(flag)
    
                                elif d in cricDict.keys():
                                    # some ASN alternatives are found in CRIC, if that's the case,
                                    # there is no need for an alarm
                                    flag = not any(False if alt not in base or alt in upbase else True
                                                   for alt in cricDict[d])
    
                                else:
                                    flag = True
    
                            # store the flags
                            alarms.append(flag)
    
                        # store the ASNs not on the baseline list or on the alternative ASN lists
                        diff_temp.extend(diff)

            # exclude paths having <3 hops, and check if any flags were raised
            if any(alarms) > 0 and len(base) >= 2:
                # store the pair and the list of diffs
                diffs[name] = list(set(diff_temp))
        except Exception as e:
            print('Issue wtih:', name, e)

    print(f'Number of pairs flagged {len(diffs)}')

    return diffs


# Builds a dataframe that takes into account the missing TTLs
def positionASNsUsingTTLs(subset):
    pdf = pd.DataFrame()
    for idx, asns, hops, pair, ttls in subset.itertuples():
        if len(asns) == len(hops) and ttls:
            pdf.loc[idx, ttls] = asns
            missing = {x: -1 for x in range(ttls[0], ttls[-1]+1) if x not in ttls}
            pdf.loc[idx, list(missing.keys())] = list(missing.values())
            pdf.loc[idx, 'pair'] = pair
    return pdf

# Gets the probability of an ASN to apear at each position on the path
@timer
def getProbabilities(posDf, max_ttl):
    columns = ['pair']
    columns.extend(sorted([c for c in posDf.columns if c != 'pair']))
    posDf = posDf[columns].sort_values('pair')
    
    plist = []

    def calcP(g):
        try:
            pair = g.pair.values[0]
            for col in range(1, len(g.columns)):

                asns = g[col].value_counts('probability').index.values
                p = g[col].value_counts('probability').values

                for i, asn in enumerate(asns):
                    plist.append([pair, asn, col, p[i]])
        except Exception as e:
            print(pair)
            print(e)

    posDf.groupby('pair').apply(lambda g: calcP(g))

    return pd.DataFrame(plist, columns=['pair', 'asn', 'pos', 'P'])


# In some cases a devixe on the path could be mostly down and only sometimes reponds.
# The code bellow finds those and adds the "blinking" ASN to the baseline
@timer
def addOnAndOffNodes(diffs, probDf, baseLine):

    for p in diffs.keys():
        sub = probDf[(probDf['pair'] == p)]
        onoff, currASNs = [], []

        for d in diffs[p]:
            pos = sub[(sub['asn'] == d)]['pos'].values[0]
            asns = sub[sub['pos'] == pos]['asn'].values

            # if there are only 2 values at that position
            # one is the flagged ASN and the other one is -1,
            # meaning the ttl was missing,
            # then the node was On and Off and should not be considered as path change
            if len(asns) == 2 and -1 in asns:
                onoff.append(sorted(asns)[1])

        onoff = list(set(onoff))
        if onoff:
            currASNs = baseLine[baseLine['pair'] == p]['asns_updated'].values.tolist()[0].copy()
            currASNs.extend(onoff)
            baseLine.at[baseLine[baseLine['pair'] == p].index.values[0], 'asns_updated'] = currASNs

    return baseLine


# Builds a dataframe based on the flagged ASNs when tests take another route (change path),
# the number of pairs between which the ASN appears, affected sites, AS owner and the queried period
@timer
def aggResultsBasedOnSites(diffs, asnInfo, dateFrom, dateTo):

    diffData = []
    for pair, diff in diffs.items():
        for d in diff:
            diffData.append([pair, d])

    diffDf = pd.DataFrame(diffData, columns=['pair', 'diff'])

    sub = df[['dest', 'src_site', 'src', 'dest_site',
              'src_host', 'dest_host', 'pair']].drop_duplicates()
    diffDf = pd.merge(diffDf, sub, on='pair', how='left')
    cntPairs = diffDf.groupby(['diff'])[['pair']].count().sort_values('pair', ascending=False)

    top = cntPairs[cntPairs['pair'] >= 10]

    # n = 80 if len(allPairs)>80 else len(allPairs)
    # pairs2Plot = allPairs.sample(n=n, random_state=1).values

    alarmsList = []

    for asn, g in diffDf[diffDf['diff'].isin(top.index)][['diff', 'src_site', 'dest_site']].\
            drop_duplicates().groupby('diff'):

        affectedSites = list(set(g['src_site'].values.tolist() + g['dest_site'].values.tolist()))
        toHash = ','.join([str(asn), dateFrom, dateTo])
        alarm_id = hashlib.sha224(toHash.encode('utf-8')).hexdigest()

        alarmsList.append({
            'asn': asn,
            'owner': asnInfo[str(asn)],
            'num_pairs': str(top[top.index == asn]['pair'].values[0]),
            'sites': affectedSites,
            'from': dateFrom,
            'to': dateTo,
            'alarm_id': alarm_id
        })

    return alarmsList

# Grabs the R&E ASNs and their owners from ES
@timer
def getASNInfo(ids):

    query = {
        "query": {
            "terms": {
                "_id": ids
            }
        }
    }

    # print(str(query).replace("\'", "\""))
    asnDict = {}
    data = scan(hp.es, index='ps_asns', query=query)
    for item in data:
        asnDict[str(item['_id'])] = item['_source']['owner']

    return asnDict

@timer
def saveStats(diffs, ddf, probDf, baseLine, updatedbaseLine, compare2):
    def getPaths(fld, ddf):
        temp = {}
        ddf['hash_freq'] = ddf['hash_freq'].round(2)
        if len(ddf)>0:
            temp[fld] = ddf[['asns_updated', 'cnt_total_measures', 'path_always_reaches_dest', 'hash_freq']].\
                to_dict('records')
        return temp

    probDf['P'] = probDf['P'].round(2)
    probDf['asn'] = probDf['asn'].astype('int')

    alarmsData = []
    for pair, diff in diffs.items():
        temp = {}
        # prepare the data for ES - adding _id and _index to send in bulk
        temp['from_date'] = dateFrom
        temp['to_date'] = dateTo
        temp['_index'] = 'ps_traces_changes'
        temp['diff'] = diff
        temp.update(baseLine[baseLine['pair']==pair]
                            [['src', 'dest', 'src_host', 'dest_host', 'src_site', 'dest_site']].to_dict('records')[0])

        temp.update(getPaths('baseline', baseLine[baseLine['pair']==pair]))
        temp.update(getPaths('second_baseline', updatedbaseLine[updatedbaseLine['pair']==pair]))
        temp.update(getPaths('alt_paths', compare2[compare2['pair']==pair]))
        temp['positions'] = probDf[probDf['pair']==pair][['asn', 'pos', 'P']].to_dict('records')

        alarmsData.append(temp)


    print(f'Number of docs: {len(alarmsData)}')

    def genData(data):
        for d in data:
            yield d

    for success, info in parallel_bulk(hp.es, genData(alarmsData)):
        if not success:
            print('A document failed:', info)

# Sends the alarms
@timer
def sendAlarms(data):
    ALARM = alarms('Networking', 'RENs', 'path changed')

    for issue in data:
        ALARM.addAlarm(
            body="Path changed",
            tags=issue['sites'],
            source=issue
        )



# query the past 72 hours
dateFrom, dateTo = hp.defaultTimeRange(72)
data = runInParallel(dateFrom, dateTo)
df = pd.DataFrame(data)

print('Total number of documnets:', len(df))
df.loc[:, 'src_site'] = df['src_netsite'].str.upper()
df.loc[:, 'dest_site'] = df['dest_netsite'].str.upper()
df.loc[:, 'pair'] = df['src']+'-'+df['dest']
df = df[~(df['src_site'].isnull()) & ~(df['dest_site'].isnull()) & ~(df['asns'].isnull())]

asn2ip, ip2asn, max_ttl = mapHopsAndASNs(df)

cricDict = getCricASNInfo()
altASNsDict = getAltASNs(asn2ip, ip2asn)
altsOfAlts = getAltsOfAlts(altASNsDict)

mapASNsManualy(291, 293, altsOfAlts)
mapASNsManualy(293, 291, altsOfAlts)

relDf = hp.parallelPandas(fix0ASNs)(df)

pathDf, allPathsList = getStats4Paths(relDf, df)

# remove rows where site is None and ignore those with 100% stable paths
valid = pathDf[~(pathDf['src_site'].isnull()) & ~(pathDf['dest_site'].isnull()) & (pathDf['hash_freq'] < 1) & (pathDf['cnt_asn']>1)].copy()
if len(valid) == 0:
    raise NameError('No valid paths. Check pathDf.')

Success
 Run for period: 2024-04-13T13:28:48.000Z  -   2024-04-16T13:28:48.000Z
For [1713263328000, 1713274128000] fetched: 180025
For [1713122928000, 1713133728000] fetched: 179471
For [1713198528000, 1713209328000] fetched: 173259
For [1713101328000, 1713112128000] fetched: 169962
For [1713252528000, 1713263328000] fetched: 177862
For [1713112128000, 1713122928000] fetched: 175838
For [1713176928000, 1713187728000] fetched: 175715
For [1713187728000, 1713198528000] fetched: 167995
For [1713230928000, 1713241728000] fetched: 178087
For [1713090528000, 1713101328000] fetched: 179964
For [1713133728000, 1713144528000] fetched: 181408
For [1713068928000, 1713079728000] fetched: 180969
For [1713241728000, 1713252528000] fetched: 176498
For [1713220128000, 1713230928000] fetched: 178162
For [1713166128000, 1713176928000] fetched: 179268
For [1713144528000, 1713155328000] fetched: 180310
For [1713014928000, 1713025728000] fetched: 176520
For [1713058128000, 1713068928000] fetched: 184543
Fo



Finished getCricASNInfo in 1.8040 secs
Finished getAltASNs in 0.0012 secs
Finished getAltsOfAlts in 0.0000 secs
2089 zeros successfully replaced with AS numbers.
1980 zeros successfully replaced with AS numbers.
1962 zeros successfully replaced with AS numbers.
1979 zeros successfully replaced with AS numbers.
1796 zeros successfully replaced with AS numbers.
2728 zeros successfully replaced with AS numbers.
2898 zeros successfully replaced with AS numbers.
2226 zeros successfully replaced with AS numbers.
2998 zeros successfully replaced with AS numbers.
3347 zeros successfully replaced with AS numbers.
3419 zeros successfully replaced with AS numbers.
4194 zeros successfully replaced with AS numbers.
4208 zeros successfully replaced with AS numbers.
3928 zeros successfully replaced with AS numbers.
4087 zeros successfully replaced with AS numbers.
3920 zeros successfully replaced with AS numbers.
4075 zeros successfully replaced with AS numbers.
4058 zeros successfully replaced with 

### Run is the current version:

In [2]:
%%time
baseLine, compare2 = getBaseline(valid)

# get a second stable path (baseline) for the T1 sites
# T1 = ['BNL-ATLAS', 'FZK-LCG2', 'IN2P3-CC', 'INFN-T1', 'JINR-T1', 'KR-KISTI-GSDC-01', 'NDGF-T1', 'NIKHEF-ELPROD',
#       'pic', 'RAL-LCG2', 'RRC-KI-T1', 'SARA-MATRIX', 'Taiwan-LCG2', 'TRIUMF-LCG2', 'USCMS-FNAL-WC1']
# limit temporarily to the known site having load balancing
T1 = ['PIC', 'TAIWAN-LCG2']
t1s = compare2[(compare2['src_site'].isin(T1)) & (compare2['dest_site'].isin(T1))]
updatedbaseLine, updatedcompare2 = getBaseline(t1s)


# Ignore sites for which we know there's an issue
ignore_list = [  'ATLAS-CBPF',
                 'BEIJING-LCG2',
                 'CBPF',
                 'EELA-UTFSM',
                 'IHEP',
                 'IN2P3-CC',
                 'ITEP',
                 'ITEP-LHCONE',
                 'JINR-LCG2',
                 'JINR-LCG2-LHCONE',
                 'JINR-T1',
                 'JINR-T1-LHCOPNE',
                 'KHARKOV-KIPT-LCG2-LHCONE',
                 'NCP-LCG2',
                 'RRC-KI',
                 'RRC-KI-T1',
                 'RRC_KI',
                 'RU-PROTVINO-IHEP-LHCONE',
                 'RU-Protvino-IHEP',
                 'UAM-LCG2-LHCONE',
                 'UTA_SWT2']
cut = compare2[(~compare2['src_site'].isin(ignore_list)) & (~compare2['dest_site'].isin(ignore_list))]

# Get the pairs which took different form the usual paths
diffs = getChanged(baseLine, compare2, updatedbaseLine, altsOfAlts, cricDict, cut)

# Build a position matrix, where each TTL helps put ASNs at their places
subset = relDf[relDf['pair'].isin(diffs.keys())][['asns_updated', 'hops', 'pair', 'ttls']]
posDf = hp.parallelPandas(positionASNsUsingTTLs)(subset)

# Get the probability for each position, based on src-dest pair
probDf = getProbabilities(posDf, max_ttl)


# Find the nodes that work sporadically and add those the the baseline list
baseLine = addOnAndOffNodes(diffs, probDf, baseLine)
# Again get the pairs which took different frоm the usual paths
diffs = getChanged(baseLine, compare2, updatedbaseLine, altsOfAlts, cricDict, cut)

# saveStats(diffs, df, probDf, baseLine, updatedbaseLine, compare2)

# Extract all seen ASNs
asns = list(set([str(item) for diffList in diffs.values() for item in diffList]))
# Get the oweners of the ASNs
asnInfo = getASNInfo(asns)
# Build the dictinary of alarms where for each ASN, there is an owner, number of pairs and a list of affected sites
old_alarms = aggResultsBasedOnSites(diffs, asnInfo, dateFrom, dateTo)

# sendAlarms(old_alarms)

 Baseline: 3609 
 left to compare to: 5532
Finished getBaseline in 0.3872 secs
 Baseline: 0 
 left to compare to: 0
Finished getBaseline in 0.0005 secs
Number of pairs flagged 385
Finished getChanged in 2.9783 secs
Finished getProbabilities in 5.8354 secs
Finished addOnAndOffNodes in 0.7733 secs
Number of pairs flagged 339
Finished getChanged in 3.0813 secs
Finished getASNInfo in 0.1694 secs
Finished aggResultsBasedOnSites in 9.4014 secs
CPU times: user 20.7 s, sys: 14 s, total: 34.6 s
Wall time: 48.3 s


### Check a single pair

In [17]:
# take an flagged pair and see the sequnces for it
pair, asns = list(diffs.items())[0]
print(f'Pair {pair}, flagged ASNs: {asns}')
valid[(valid['pair']==pair)][['src_site','dest_site','asns_updated', 'hash_freq']]

Pair 128.227.221.131-144.16.111.26, flagged ASNs: [2907, 7660, 101, 23855]


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
493,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 55824, 2697]",0.960648
494,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 101, 7660, 23855, 55824]",0.013889
495,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 101, 7660, 23855, 55824, 2697]",0.002315
496,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 101, 2907, 7660, 23855, 55824, 2...",0.011574
497,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 101, 2907, 7660, 23855, 55824]",0.011574


### Prepare the data for the new version

In [4]:
# from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import numpy as np


df = valid.copy()

df.sort_values(['pair', 'dt'],inplace=True)
df['asns_updated'] = df['asns_updated'].apply(lambda x: [str(item) for item in x])

# Flatten and encode all IPs and ASNs to strings
# all_items = pd.concat([df['src'], df['dest'], df['asns_updated'].apply(pd.Series).stack().astype(str)]).unique()
all_items = df['asns_updated'].apply(pd.Series).stack().astype(str).unique()


label_encoder = LabelEncoder()
encoded_items = label_encoder.fit_transform(all_items)

# Mapping from original item to encoded value
item_to_encoded = dict(zip(all_items, encoded_items))

df['encoded_asns_updated'] = df['asns_updated'].apply(lambda x: [item_to_encoded[item] for item in x])

# Create sequences
# df['sequence'] = df.apply(lambda row: [row['encoded_src']] + row['encoded_asns_updated'] + [row['encoded_dest']], axis=1)
df['sequence'] = df.apply(lambda row: row['encoded_asns_updated'], axis=1)

# Pad sequences
sequences = list(df['sequence'])
hash_freqs = np.array(list(df['hash_freq']))
padded_sequences = pad_sequences(sequences, padding='post', value=0)

2024-04-16 13:42:35.290774: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### The model

In [6]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, TimeDistributed, Dense, Embedding
from tensorflow.keras.optimizers import Adam
import numpy as np


num_sequences, sequence_length = padded_sequences.shape
vocab_size = np.max(padded_sequences) + 1  # encoding starts from 0
embedding_dim = 64 
inputs = Input(shape=(sequence_length,))

embedded = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=sequence_length, mask_zero=True)(inputs)

encoded = LSTM(128)(embedded)

decoded = RepeatVector(sequence_length)(encoded)
decoded = LSTM(embedding_dim, return_sequences=True)(decoded)
decoded = TimeDistributed(Dense(vocab_size, activation='softmax'))(decoded)

autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
# autoencoder.compile(optimizer='adam', loss=custom_loss)

# Preparing target data for categorical crossentropy
target_data = np.expand_dims(padded_sequences, -1)

# Train the autoencoder
autoencoder.fit(padded_sequences, target_data, epochs=100, batch_size=128, shuffle=False, validation_split=0.2)

# Predict and calculate reconstruction error
predictions = autoencoder.predict(padded_sequences)
reconstruction_error = np.mean(np.abs(predictions - target_data), axis=(1, 2))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [7]:
np.mean(reconstruction_error),2*np.std(reconstruction_error)

(30.639218929353184, 24.44177149519671)

In [8]:
threshold = np.mean(reconstruction_error) + 2*np.std(reconstruction_error)
# threshold = np.percentile(element_wise_errors, 95)

# Flag sequences with errors above the threshold as anomalies
anomalies = np.where(reconstruction_error > threshold)[0]

anomalous_sequences = df.iloc[anomalies]


print(f"Total individual anomalies: {len(anomalous_sequences)}, where hash_freq<0.5 -> {len(anomalous_sequences[anomalous_sequences['hash_freq']<0.5])}")
print(f"Total number of pairs with annomalies: {len(anomalous_sequences[anomalous_sequences['hash_freq']<0.5].groupby('pair')['dt'].count())}")



Total individual anomalies: 349, where hash_freq<0.5 -> 188
Total number of pairs with annomalies: 142


In [9]:
threshold, reconstruction_error

(55.080990424549896,
 array([25.33566434, 20.78166278, 28.89121989, ..., 27.22455322,
        19.33721834, 32.22455322]))

### Check the anomalies before applying the filters

In [10]:
print("Old version, number of src-dest pairs: ", len(valid[(valid['pair'].isin(list(diffs.keys())))].groupby(['src_site', 'dest_site'])['dt'].count()))
anomalous_sequences_drop_ignored = anomalous_sequences[(~anomalous_sequences['src_site'].isin(ignore_list)) & (~anomalous_sequences['dest_site'].isin(ignore_list))]
print("New version, number of src-dest pairs: ", len(anomalous_sequences_drop_ignored.groupby(['src_site', 'dest_site'])['dt'].count()))

Old version, number of src-dest pairs:  113
New version, number of src-dest pairs:  48


In [11]:
anomalous_sequences_drop_ignored.groupby(['src_site', 'dest_site'])['dt'].count()

src_site                    dest_site              
AGLT2_MSU                   IFIC-LCG2                   4
AGLT2_UM                    FZK-LCG2-LHCOPNE            4
                            PIC-LHCOPNE                 2
BEIJING-LCG2-LHCONE         INDIACMS-TIFR-LHCONE-V4     2
CA-UVIC-CLOUD-LHCONE        RRC-KI-T1-LHCOPNE           3
FR-GRIF_LPNHE               NEBRASKA-LHCONE             1
FZK-LCG2-LHCOPNE            CA-UVIC-CLOUD-LHCONE        1
                            CA-WATERLOO-T2-LHCONE       4
                            CIT_CMS_T2-LHCONE           8
                            FR-GRIF_LLR                 4
                            GRIF_IRFU                   4
                            IFIC-LCG2                  14
                            INDIACMS-TIFR-LHCONE-V4     3
                            UB-LCG2                     1
                            UKI-LT2-QMUL                4
                            UKI-LT2-RHUL                1
                    

### Apply filters

In [18]:
_diff, remaining_anomalies_indices = [], []

alarms_data, asn_data = [],{}

# remove the sites on the ignore list, and the anomalies which frequencies are <50%
for name, group in anomalous_sequences_drop_ignored[(anomalous_sequences_drop_ignored['hash_freq']<0.5)].groupby(['pair']):
    
    anom_sequences = set([asn for seq in group.asns_updated.values.tolist() for asn in seq])
    remaining = valid[(valid.pair.isin(name)) & (~valid.index.isin(group.index.values))]
    remaining_records = remaining['asns_updated'].values.tolist()
    
    freq = remaining['hash_freq'].sum()

    # drop anomalies if only 2 distinct paths were seen and one the anomalous' freq<0.01
    if not (len(remaining_records)==1 and freq>=0.99):
        rem_sequnces = set([str(asn) for seq in remaining_records for asn in seq])
        diff1 = anom_sequences - rem_sequnces
        diff2 = rem_sequnces - anom_sequences

        if len(diff1)>0:

            idx = group.index.values.tolist()
            remaining_anomalies_indices.extend(idx)
            print('Pair:',name[0])
            print("Annomaly index: :",  idx)
            print("Annomalous path: :", anom_sequences)
            print("Other paths:", rem_sequnces)
            print("\nIn other paths, but not in anomalous:", diff2)
            print("\nIn anomalous, but not in other paths:", diff1)
    
            display(valid[valid['pair'].isin(name)][['src_site','dest_site','asns_updated', 'hash_freq']])
    
            print('\n\n\n')
            src,dest = valid[(valid.pair.isin(name))][['src_site','dest_site']].values[0].tolist()
    
            record = {'pair':name,
                      'period':[dateFrom, dateTo],
                      'src_site':src,
                      'dest_site':dest,
                      'annomalous_path':anom_sequences,
                      'other_asns':rem_sequnces,
                      'difference':list(diff1)}
    
            # print(record)
    
            for d in diff1:
                if d in asn_data.keys():
                    asn_data[d]['sites'] = list(set(asn_data[d]['sites']))
                else:
                    asn_data[d] = {'sites': [src,dest], 'period':[dateFrom, dateTo]}
    
            alarms_data.append(record)
    
            _diff.extend(diff1)

_diff = set(_diff)

Pair: 144.92.180.75-144.16.111.26
Annomaly index: : [2832]
Annomalous path: : {'2907', '59', '23855', '101', '2697', '55824', '7660'}
Other paths: {'59', '23855', '101', '2697', '55824', '7660'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'2907'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
2831,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 55824, 2697]",0.968504
2832,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 101, 7660, 2907, 23855, 55824, 2697]",0.023622
2833,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 101, 7660, 23855, 55824, 2697]",0.007874






Pair: 169.228.130.41-144.16.111.26
Annomaly index: : [6232, 6233, 6235]
Annomalous path: : {'2907', '2153', '23855', '101', '2697', '55824', '7660', '7377', '26397'}
Other paths: {'2153', '20965', '24489', '2697', '55824', '7377', '26397'}

In other paths, but not in anomalous: {'20965', '24489'}

In anomalous, but not in other paths: {'23855', '2907', '101', '7660'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
6231,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 26397, 2153, 55824, 2697]",0.958333
6232,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 26397, 2153, 101, 2907, 23855, 55824, 2...",0.006944
6233,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 26397, 2153, 101, 7660, 2907, 23855, 55...",0.023148
6234,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 26397, 2153, 20965, 24489]",0.002315
6235,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 26397, 2153, 101, 7660, 23855, 55824, 2...",0.006944
6236,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 26397, 2153, 55824]",0.002315






Pair: 18.12.1.171-144.16.111.26
Annomaly index: : [6360]
Annomalous path: : {'2907', '23855', '3', '101', '2697', '55824', '7660'}
Other paths: {'2907', '23855', '3', '101', '2697', '55824'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'7660'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
6359,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 55824, 2697]",0.972973
6360,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 101, 7660, 2907, 23855, 55824, 2697]",0.019305
6361,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 101, 2907, 23855, 55824, 2697]",0.007722






Pair: 192.108.47.12-134.158.132.200
Annomaly index: : [6688]
Annomalous path: : {'680', '789', '58069', '2200', '34878', '20965'}
Other paths: {'680', '789', '58069', '2200', '20965'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'34878'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
6687,FZK-LCG2-LHCOPNE,FR-GRIF_LLR,"[58069, 680, 20965, 2200, 789]",0.921986
6688,FZK-LCG2-LHCOPNE,FR-GRIF_LLR,"[58069, 34878, 680, 20965, 2200, 789]",0.078014






Pair: 192.108.47.12-198.32.44.3
Annomaly index: : [6796]
Annomalous path: : {'680', '293', '291', '62', '58069', '20965'}
Other paths: {'32361', '680', '293', '291', '58069', '20965'}

In other paths, but not in anomalous: {'32361'}

In anomalous, but not in other paths: {'62'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
6795,FZK-LCG2-LHCOPNE,CIT_CMS_T2-LHCONE,"[58069, 680, 20965, 293, 291, 32361]",0.512397
6796,FZK-LCG2-LHCOPNE,CIT_CMS_T2-LHCONE,"[58069, 680, 20965, 293, 291, 62]",0.487603






Pair: 192.108.47.12-198.32.44.4
Annomaly index: : [6798]
Annomalous path: : {'680', '293', '291', '62', '58069', '20965'}
Other paths: {'32361', '680', '293', '291', '58069', '20965'}

In other paths, but not in anomalous: {'32361'}

In anomalous, but not in other paths: {'62'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
6797,FZK-LCG2-LHCOPNE,CIT_CMS_T2-LHCONE,"[58069, 680, 20965, 293, 291, 32361]",0.512195
6798,FZK-LCG2-LHCOPNE,CIT_CMS_T2-LHCONE,"[58069, 680, 20965, 293, 291, 62]",0.487805






Pair: 192.108.47.6-134.158.132.201
Annomaly index: : [6869]
Annomalous path: : {'680', '789', '58069', '2200', '34878', '20965'}
Other paths: {'680', '789', '58069', '2200', '20965'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'34878'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
6868,FZK-LCG2-LHCOPNE,FR-GRIF_LLR,"[58069, 680, 20965, 2200, 789]",0.954128
6869,FZK-LCG2-LHCOPNE,FR-GRIF_LLR,"[58069, 34878, 680, 20965, 2200, 789]",0.045872






Pair: 192.108.47.6-198.32.44.4
Annomaly index: : [7028]
Annomalous path: : {'32361', '680', '293', '291', '58069', '20965'}
Other paths: {'680', '293', '291', '62', '58069', '20965'}

In other paths, but not in anomalous: {'62'}

In anomalous, but not in other paths: {'32361'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
7027,FZK-LCG2-LHCOPNE,CIT_CMS_T2-LHCONE,"[58069, 680, 20965, 293, 291, 62]",0.530973
7028,FZK-LCG2-LHCOPNE,CIT_CMS_T2-LHCONE,"[58069, 680, 20965, 293, 291, 32361]",0.469027






Pair: 2001:12d8:80:0:0:0:0:19-2406:f00:9:0:0:0:0:1b
Annomaly index: : [9882]
Annomalous path: : {'58758', '20080', '1251', '23855', '101', '4758', '55824', '7660'}
Other paths: {'58758', '20080', '1251', '23855', '101', '55824', '7660'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
9880,SPRACE-REDNESP,INDIACMS-TIFR-LHCONE-V4,"[1251, 20080, 55824, 58758]",0.958333
9881,SPRACE-REDNESP,INDIACMS-TIFR-LHCONE-V4,"[1251, 20080, 101, 7660, 23855, 55824, 58758]",0.016204
9882,SPRACE-REDNESP,INDIACMS-TIFR-LHCONE-V4,"[1251, 20080, 101, 7660, 23855, 55824, 4758, 5...",0.023148
9883,SPRACE-REDNESP,INDIACMS-TIFR-LHCONE-V4,"[1251, 20080, 55824]",0.002315






Pair: 2001:630:58:1c20:0:0:82f6:2ff1-2406:f00:9:0:0:0:0:1b
Annomaly index: : [12749]
Annomalous path: : {'58758', '24489', '4758', '55824', '20965', '786'}
Other paths: {'58758', '24489', '55824', '20965', '786'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
12748,UKI-SOUTHGRID-RALPP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 55824, 58758]",0.974359
12749,UKI-SOUTHGRID-RALPP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 4758, 58758]",0.008547
12750,UKI-SOUTHGRID-RALPP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 58758]",0.017094






Pair: 2001:630:58:1c20:0:0:82f6:2ff2-2406:f00:9:0:0:0:0:1b
Annomaly index: : [12867]
Annomalous path: : {'58758', '24489', '4758', '55824', '20965', '786'}
Other paths: {'58758', '24489', '55824', '20965', '786'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
12865,UKI-SOUTHGRID-RALPP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 55824, 58758]",0.966942
12866,UKI-SOUTHGRID-RALPP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 58758]",0.024793
12867,UKI-SOUTHGRID-RALPP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 4758, 58758]",0.008264






Pair: 2401:de00:0:0:0:0:0:301-2406:f00:9:0:0:0:0:1b
Annomaly index: : [16061]
Annomalous path: : {'7497', '58758', '23855', '4758', '55824'}
Other paths: {'7497', '23855', '58758', '55824'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
16060,BEIJING-LCG2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7497, 55824, 58758]",0.933333
16061,BEIJING-LCG2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7497, 23855, 55824, 4758, 58758]",0.033333
16062,BEIJING-LCG2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7497, 23855, 55824, 58758]",0.033333






Pair: 2401:de00:0:0:0:0:0:302-2406:f00:9:0:0:0:0:1b
Annomaly index: : [16250]
Annomalous path: : {'7497', '58758', '23855', '4758', '55824'}
Other paths: {'7497', '23855', '58758', '55824'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
16249,BEIJING-LCG2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7497, 55824, 58758]",0.948276
16250,BEIJING-LCG2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7497, 23855, 55824, 4758, 58758]",0.017241
16251,BEIJING-LCG2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7497, 23855, 55824, 58758]",0.034483






Pair: 2603:4000:c00:0:100:0:120c:1ab-2406:f00:9:0:0:0:0:1b
Annomaly index: : [16653]
Annomalous path: : {'58758', '10578', '23855', '3', '101', '4758', '55824', '7660'}
Other paths: {'58758', '10578', '23855', '3', '101', '55824', '7660'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
16652,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 55824, 58758]",0.882845
16653,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 101, 7660, 23855, 55824, 4758, 58758]",0.016736
16654,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 101, 7660, 23855, 55824, 58758]",0.020921
16655,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 55824, 58758]",0.079498






Pair: 2603:4000:c00:0:100:0:120c:1ac-2406:f00:9:0:0:0:0:1b
Annomaly index: : [16846]
Annomalous path: : {'58758', '10578', '23855', '3', '101', '4758', '55824', '7660'}
Other paths: {'58758', '10578', '23855', '3', '101', '55824', '7660'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
16844,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 55824, 58758]",0.900232
16845,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 101, 7660, 23855, 55824, 58758]",0.020882
16846,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 101, 7660, 23855, 55824, 4758, 58758]",0.016241
16847,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 55824, 58758]",0.060325
16848,MIT-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[3, 10578, 55824]",0.00232






Pair: 2607:8a00:17:1496:0:0:0:82-2406:f00:9:0:0:0:0:1b
Annomaly index: : [17386, 17387]
Annomalous path: : {'58758', '10490', '23855', '101', '4758', '55824', '7660', '7212'}
Other paths: {'55824', '7212', '58758', '10490'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'23855', '7660', '101', '4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
17385,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 10490, 55824, 58758]",0.939675
17386,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 10490, 101, 7660, 23855, 55824, 4758, 5...",0.023202
17387,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 10490, 101, 7660, 23855, 55824, 58758]",0.013921
17388,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 55824, 58758]",0.023202






Pair: 2607:8a00:17:1496:0:0:0:83-2406:f00:9:0:0:0:0:1b
Annomaly index: : [17564, 17565]
Annomalous path: : {'58758', '10490', '23855', '101', '4758', '55824', '7660', '7212'}
Other paths: {'55824', '7212', '58758', '10490'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'23855', '7660', '101', '4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
17563,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 10490, 55824, 58758]",0.920732
17564,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 10490, 101, 7660, 23855, 55824, 4758, 5...",0.021341
17565,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 10490, 101, 7660, 23855, 55824, 58758]",0.02439
17566,VANDERBILT,INDIACMS-TIFR-LHCONE-V4,"[7212, 55824, 58758]",0.033537






Pair: 2607:f388:101c:1000:0:0:0:441-2406:f00:9:0:0:0:0:1b
Annomaly index: : [17697, 17698]
Annomalous path: : {'58758', '59', '23855', '101', '4758', '55824', '7660'}
Other paths: {'55824', '58758', '59'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'23855', '7660', '101', '4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
17696,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 55824, 58758]",0.963768
17697,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 101, 7660, 23855, 55824, 4758, 58758]",0.014493
17698,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 101, 7660, 23855, 55824, 58758]",0.021739






Pair: 2607:f388:101c:1000:0:0:0:442-2406:f00:9:0:0:0:0:1b
Annomaly index: : [17824, 17825]
Annomalous path: : {'58758', '59', '23855', '101', '4758', '55824', '7660'}
Other paths: {'55824', '58758', '59'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'23855', '7660', '101', '4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
17823,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 55824, 58758]",0.958333
17824,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 101, 7660, 23855, 55824, 4758, 58758]",0.030093
17825,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 101, 7660, 23855, 55824, 58758]",0.009259
17826,GLOW-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[59, 55824]",0.002315






Pair: 2607:f720:1700:1b30:0:0:0:29-2406:f00:9:0:0:0:0:1b
Annomaly index: : [18046, 18048]
Annomalous path: : {'2152', '58758', '23855', '101', '4758', '55824', '7660', '7377'}
Other paths: {'2152', '58758', '23855', '101', '55824', '7660', '7377'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18045,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 2152, 55824, 58758]",0.959233
18046,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 2152, 101, 7660, 23855, 55824, 4758, 58...",0.021583
18047,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 2152, 101, 7660, 23855, 55824]",0.002398
18048,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 2152, 101, 7660, 23855, 55824, 58758]",0.014388
18049,UCSDT2-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[7377, 2152]",0.002398






Pair: 2620:104:1f:1000:21b:21ff:fe97:4dc8-2406:f00:9:0:0:0:0:1b
Annomaly index: : [18656, 18657]
Annomalous path: : {'58758', '7660', '11096', '23855', '101', '4758', '55824', '6356'}
Other paths: {'55824', '6356', '58758', '11096'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'23855', '7660', '101', '4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18655,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 55824, 58758]",0.963504
18656,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 101, 7660, 23855, 55824, 4758, 5...",0.018248
18657,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 101, 7660, 23855, 55824, 58758]",0.014599
18658,UFLORIDA-HPC-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[6356, 11096, 55824]",0.00365






Pair: 2a00:139c:5:585:0:41:2:12-2001:630:21:8d80:0:0:8afd:3c52
Annomaly index: : [18765]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18762,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LIV-HEP,"[58069, 34878, 20965, 786]",0.79661
18763,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LIV-HEP,"[58069, 20965, 786]",0.067797
18764,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LIV-HEP,"[34878, 20965, 786]",0.127119
18765,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LIV-HEP,"[34878, 58069, 680, 20965, 786]",0.008475






Pair: 2a00:139c:5:585:0:41:2:12-2001:630:22:d000:800:ff:fe00:27
Annomaly index: : [18769]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18766,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[34878, 58069, 20965, 786]",0.852713
18767,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[58069, 20965, 786]",0.062016
18768,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[34878, 20965, 786]",0.077519
18769,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[58069, 34878, 680, 20965, 786]",0.007752






Pair: 2a00:139c:5:585:0:41:2:12-2001:630:3c1:638:0:0:0:d545
Annomaly index: : [18773]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18770,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-ECDF,"[58069, 34878, 20965, 786]",0.80315
18771,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-ECDF,"[58069, 20965, 786]",0.094488
18772,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-ECDF,"[34878, 20965, 786]",0.094488
18773,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-ECDF,"[58069, 34878, 680, 20965, 786]",0.007874






Pair: 2a00:139c:5:585:0:41:2:12-2001:630:40:ef:0:0:0:7b
Annomaly index: : [18782]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'786', '20965', '34878', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18779,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[58069, 20965, 786]",0.0625
18780,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[58069, 34878, 20965, 786]",0.875
18781,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[34878, 20965, 786]",0.044643
18782,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[58069, 34878, 680, 20965, 786]",0.017857






Pair: 2a00:139c:5:585:0:41:2:12-2001:630:40:ef:0:0:0:7c
Annomaly index: : [18786]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18783,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[34878, 58069, 20965, 786]",0.84375
18784,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[34878, 20965, 786]",0.085938
18785,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[58069, 20965, 786]",0.0625
18786,FZK-LCG2-LHCOPNE,UKI-SCOTGRID-GLASGOW,"[58069, 34878, 680, 20965, 786]",0.007812






Pair: 2a00:139c:5:585:0:41:2:12-2001:630:441:905:0:0:0:c
Annomaly index: : [18793]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
18790,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[58069, 34878, 20965, 786]",0.889706
18791,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[34878, 20965, 786]",0.036765
18792,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[58069, 20965, 786]",0.058824
18793,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[34878, 58069, 680, 20965, 786]",0.014706






Pair: 2a00:139c:5:585:0:41:2:6-2001:630:113:0:0:0:0:4
Annomaly index: : [19120]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19117,FZK-LCG2-LHCOPNE,UKI-LT2-RHUL,"[34878, 58069, 20965, 786]",0.803279
19118,FZK-LCG2-LHCOPNE,UKI-LT2-RHUL,"[58069, 20965, 786]",0.090164
19119,FZK-LCG2-LHCOPNE,UKI-LT2-RHUL,"[34878, 20965, 786]",0.090164
19120,FZK-LCG2-LHCOPNE,UKI-LT2-RHUL,"[34878, 58069, 680, 20965, 786]",0.016393






Pair: 2a00:139c:5:585:0:41:2:6-2001:630:22:d000:800:ff:fe00:27
Annomaly index: : [19133]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19130,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[58069, 34878, 20965, 786]",0.80531
19131,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[58069, 20965, 786]",0.088496
19132,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[34878, 20965, 786]",0.079646
19133,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[58069, 34878, 680, 20965, 786]",0.026549






Pair: 2a00:139c:5:585:0:41:2:6-2001:630:22:d000:800:ff:fe00:28
Annomaly index: : [19137]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19134,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[34878, 58069, 20965, 786]",0.8
19135,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[34878, 20965, 786]",0.104348
19136,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[58069, 20965, 786]",0.078261
19137,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-MAN-HEP,"[34878, 58069, 680, 20965, 786]",0.017391






Pair: 2a00:139c:5:585:0:41:2:6-2001:630:441:905:0:0:0:b
Annomaly index: : [19155]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'786', '20965', '34878', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19152,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[58069, 20965, 786]",0.098214
19153,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[58069, 34878, 20965, 786]",0.776786
19154,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[34878, 20965, 786]",0.116071
19155,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[34878, 58069, 680, 20965, 786]",0.008929






Pair: 2a00:139c:5:585:0:41:2:6-2001:630:441:905:0:0:0:c
Annomaly index: : [19159]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '58069', '20965', '786'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19156,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[34878, 20965, 786]",0.085714
19157,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[34878, 58069, 20965, 786]",0.771429
19158,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[58069, 20965, 786]",0.121429
19159,FZK-LCG2-LHCOPNE,UKI-SOUTHGRID-OX-HEP,"[58069, 34878, 680, 20965, 786]",0.021429






Pair: 2a00:139c:5:585:0:41:2:6-2001:630:80:2fd:0:0:aa83:2196
Annomaly index: : [19186]
Annomalous path: : {'680', '58069', '34878', '20965', '786'}
Other paths: {'34878', '786', '20965', '58069'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19183,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LANCS-HEP,"[34878, 58069, 20965, 786]",0.817308
19184,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LANCS-HEP,"[58069, 20965, 786]",0.096154
19185,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LANCS-HEP,"[34878, 20965, 786]",0.076923
19186,FZK-LCG2-LHCOPNE,UKI-NORTHGRID-LANCS-HEP,"[58069, 34878, 680, 20965, 786]",0.009615






Pair: 2a00:139c:5:585:0:41:2:6-2406:f00:9:0:0:0:0:1b
Annomaly index: : [19399, 19400, 19402]
Annomalous path: : {'58758', '680', '55836', '58069', '553', '34878', '20965', '55824'}
Other paths: {'58758', '680', '58069', '55824', '20965', '34878'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'55836', '553'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19397,FZK-LCG2-LHCOPNE,INDIACMS-TIFR-LHCONE-V4,"[58069, 20965, 55824, 58758]",0.54918
19398,FZK-LCG2-LHCOPNE,INDIACMS-TIFR-LHCONE-V4,"[34878, 20965, 55824, 58758]",0.385246
19399,FZK-LCG2-LHCOPNE,INDIACMS-TIFR-LHCONE-V4,"[34878, 58069, 553, 55836, 55824, 58758]",0.040984
19400,FZK-LCG2-LHCOPNE,INDIACMS-TIFR-LHCONE-V4,"[34878, 553, 55836, 55824, 58758]",0.008197
19401,FZK-LCG2-LHCOPNE,INDIACMS-TIFR-LHCONE-V4,"[34878, 680, 20965, 55824, 58758]",0.008197
19402,FZK-LCG2-LHCOPNE,INDIACMS-TIFR-LHCONE-V4,"[58069, 680, 20965, 55824, 58758]",0.008197






Pair: 2a00:139c:5:585:0:41:2:6-2607:f8f0:c11:700b:0:0:0:59
Annomaly index: : [19468]
Annomalous path: : {'6509', '680', '58069', '271', '34878', '16462', '20965'}
Other paths: {'6509', '58069', '271', '34878', '16462', '20965'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19465,FZK-LCG2-LHCOPNE,CA-UVIC-CLOUD-LHCONE,"[58069, 20965, 6509, 271, 16462]",0.089286
19466,FZK-LCG2-LHCOPNE,CA-UVIC-CLOUD-LHCONE,"[34878, 20965, 6509, 271, 16462]",0.107143
19467,FZK-LCG2-LHCOPNE,CA-UVIC-CLOUD-LHCONE,"[58069, 34878, 20965, 6509, 271, 16462]",0.794643
19468,FZK-LCG2-LHCOPNE,CA-UVIC-CLOUD-LHCONE,"[34878, 58069, 680, 20965, 6509, 271, 16462]",0.008929






Pair: 2a00:139c:5:585:0:41:2:6-2a01:56c1:10:1000:0:0:c224:b25
Annomaly index: : [19525]
Annomalous path: : {'680', '58069', '34878', '20965', '198864', '786'}
Other paths: {'58069', '34878', '20965', '198864', '786'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'680'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
19522,FZK-LCG2-LHCOPNE,UKI-LT2-QMUL,"[58069, 34878, 20965, 786, 198864]",0.786885
19523,FZK-LCG2-LHCOPNE,UKI-LT2-QMUL,"[34878, 20965, 786, 198864]",0.081967
19524,FZK-LCG2-LHCOPNE,UKI-LT2-QMUL,"[58069, 20965, 786, 198864]",0.122951
19525,FZK-LCG2-LHCOPNE,UKI-LT2-QMUL,"[34878, 58069, 680, 20965, 786, 198864]",0.008197






Pair: 2a0c:5bc0:c8:2:1e34:daff:fe49:1b04-2406:f00:9:0:0:0:0:1b
Annomaly index: : [20324]
Annomalous path: : {'58758', '24489', '4758', '55824', '20965', '786'}
Other paths: {'58758', '24489', '55824', '20965', '786'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
20322,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 55824, 58758]",0.979079
20323,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 58758]",0.008368
20324,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 4758, 58758]",0.012552






Pair: 2a0c:5bc0:c8:2:1e34:daff:fe75:3496-2406:f00:9:0:0:0:0:1b
Annomaly index: : [20449]
Annomalous path: : {'58758', '24489', '4758', '55824', '20965', '786'}
Other paths: {'58758', '24489', '55824', '20965', '786'}

In other paths, but not in anomalous: set()

In anomalous, but not in other paths: {'4758'}


Unnamed: 0,src_site,dest_site,asns_updated,hash_freq
20448,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 55824, 58758]",0.96
20449,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 4758, 58758]",0.017778
20450,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824]",0.004444
20451,UKI-LT2-IC-HEP-LHCONE,INDIACMS-TIFR-LHCONE-V4,"[786, 20965, 24489, 55824, 58758]",0.017778








In [19]:
_diff

{'101',
 '23855',
 '2907',
 '32361',
 '34878',
 '4758',
 '553',
 '55836',
 '62',
 '680',
 '7660'}

In [20]:
len(_diff)

11

In [15]:
print(f"Number of ASNs flagged (old version): {len(old_alarms)}")
print(f"Number of ASNs flagged (new version): {len(set(_diff))}")

oldv = set(sorted([str(m['asn']) for m in old_alarms]))
newv = set(sorted(_diff))

print(f'In OLD  version only:',oldv - newv)
print(f'In NEW  version only:',newv - oldv)
print('Common:',[v for v in newv if v in oldv])

print('\n\n\n')

print(f"Total number of pairs with annomalies after applying the filters: {len(anomalous_sequences[anomalous_sequences.index.isin(remaining_anomalies_indices)].groupby(['src_site', 'dest_site'])['dt'].count())}")
anomalous_sequences[anomalous_sequences.index.isin(remaining_anomalies_indices)].groupby(['src_site', 'dest_site'])['dt'].count()

Number of ASNs flagged (old version): 14
Number of ASNs flagged (new version): 11
In OLD  version only: {'293', '24489', '2875', '271', '23911', '17934', '2200', '7212', '20965'}
In NEW  version only: {'32361', '680', '62', '55836', '553', '34878'}
Common: ['2907', '23855', '101', '4758', '7660']




Total number of pairs with annomalies after applying the filters: 21


src_site                    dest_site              
BEIJING-LCG2-LHCONE         INDIACMS-TIFR-LHCONE-V4    2
FZK-LCG2-LHCOPNE            CA-UVIC-CLOUD-LHCONE       1
                            CIT_CMS_T2-LHCONE          3
                            FR-GRIF_LLR                2
                            INDIACMS-TIFR-LHCONE-V4    3
                            UKI-LT2-QMUL               1
                            UKI-LT2-RHUL               1
                            UKI-NORTHGRID-LANCS-HEP    1
                            UKI-NORTHGRID-LIV-HEP      1
                            UKI-NORTHGRID-MAN-HEP      3
                            UKI-SCOTGRID-ECDF          1
                            UKI-SCOTGRID-GLASGOW       2
                            UKI-SOUTHGRID-OX-HEP       3
GLOW-LHCONE                 INDIACMS-TIFR-LHCONE-V4    5
MIT-LHCONE                  INDIACMS-TIFR-LHCONE-V4    3
SPRACE-REDNESP              INDIACMS-TIFR-LHCONE-V4    1
UCSDT2-LHCONE               INDIACMS

In [21]:
print(f'Old set of alarms based on pairs: {len(diffs)}')
print(f'New set of alarms based on pairs: {len(alarms_data)}')

Old set of alarms based on pairs: 339
New set of alarms based on pairs: 38
