In [None]:
import sys
import copy

import requests
import descartes
import geopandas as gpd
from shapely.geometry import Point, Polygon

from scipy import stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline 

In [None]:
# Nombre de archivo a utilizar
filename = 'singapur'

In [None]:
# Calcula el valor de la Thompson Modificada para un tamaño n de entrada
def thompson_tau(n):
    alpha = 0.05
    return (stats.t.ppf(1 - (alpha/2), n - 2) * (n - 1))/(np.sqrt(n) * np.sqrt(n - 2 + stats.t.ppf(1 - (alpha/2), n - 2) ** 2))

In [None]:
# Plotea (xᵢ - x̄)/S para un archivo de RTTs
def plotOutliers(result_file):
    dataRTT = pd.read_csv(f'results/{result_file}-rtts.csv', index_col=0)

    RTTDifference = dataRTT['Latencia'].tolist()
    n = len(RTTDifference)
    sd = np.std(RTTDifference)
    mean = np.mean(RTTDifference)
    points = []
    
    for elem in RTTDifference:
        di = elem - mean
        points.append(di/sd)

    plt.hlines(0, xmin=0, xmax=n - 1, color='black', linestyle='--')
    plt.xlabel('Numero de salto')
    plt.ylabel('(xᵢ - x̄)/S')
    plt.plot(range(n), points)

In [None]:
plotOutliers(filename)

In [None]:
# Plotea los RTTs entre saltos para un archivo de RTTs
def plotRTTs(result_file):
    data = pd.read_csv(f'results/{result_file}-rtts.csv', index_col=0)
    strings = []
    for i, s in data.iterrows():
        strings.append(f'{s[0]} → {s[1]}')
        
    plt.barh(strings, data['Latencia'])
    plt.title('RTT entre saltos')
    plt.xlabel('Latencia (ms)')
    plt.ylabel('Saltos IP')
    plt.gca().invert_yaxis()

In [None]:
plotRTTs(filename)

In [None]:
# Obtiene los outliers utilizando metodo Cimbala para un archivo de RTTs
def findOutliers(result_file):
    dataRTT = pd.read_csv(f'results/{result_file}-rtts.csv', index_col=0)

    # Obtener Latencias (RTT entre saltos)
    RTTDifference = dataRTT['Latencia'].tolist()
    originalRTTDifference = copy.deepcopy(RTTDifference)
    n = len(RTTDifference)

    processing = True
    thompson_tau_data = thompson_tau(n)
    outliers = []

    while processing:
        # Recalcular media/desvio/longitud de RTTs al remover un outlier
        sd = np.std(RTTDifference)
        mean = np.mean(RTTDifference)
        new_len = len(RTTDifference)
        ts = thompson_tau(new_len)
        
        for i in range(new_len):
            elem = RTTDifference[i]
            di = abs(elem - mean)
            
            # Punto de corte
            # Si detectamos un outlier, lo sacamos de la lista
            # y terminamos el "for" para recalcular media/desvio/ttau/longitud
            if di/sd > ts:
                outliers.append(originalRTTDifference.index(elem))
                del RTTDifference[i]
                break
                
            # Dejamos de procesar al llegar al ultimo
            if i == (new_len - 1): 
                processing = False
            
    print(f'Found {len(outliers)} outlier(s), in indices: {outliers}\n')
    return dataRTT.loc[outliers]

In [None]:
findOutliers('singapur')

In [None]:
# Buscar ubicacion geografica de una IP
# NOTA: No encuentra info de IPs privadas
def infoOfIp(ip):
    response = requests.get('https://ipinfo.io/{}/json?token=72857c31c67a33'.format(ip)).json()
    if 'loc' in response.keys():
        coordenadas = response['loc'].split(",")
        latitud = float(coordenadas[0])
        longitud = float(coordenadas[1])
        return [response['ip'], latitud, longitud]
    else:
        # Si no encontró la ubicación de la IP, devolvemos las coordenadas de Bs.As
        return [0,-34.6033,-58.3817] 

# Retorna si existe una IP de ubicacion similar a la pedida en una lista de IPs
# 0.2 grados de distancia ~ aprox: 20km
def similarIP(ip, ipsList):
    for e in ipsList:
        if abs(e[1] - ip[1]) <= 0.2 and abs(e[2] - ip[2]) <= 0.2:
            return True
    
    return False

In [None]:
# Dibuja en el mapa las ubicaciones de las IPs a partir de un archivo de camino ICMP
def plotMap(result_file, minx=None, maxx=None, miny=None, maxy=None):
    data = pd.read_csv(f'results/{result_file}-icmp.csv', index_col=0)['IPsrc'].tolist()
    
    # Obtener coordenadas y sacar similares
    ipsCoord = [infoOfIp(i) for i in data]
    ipsCoord = [e for i,e in enumerate(ipsCoord) if not similarIP(e, ipsCoord[:i])]
    
    # Cargar mapa
    earthMap = gpd.read_file('mapas/ne_110m_land/ne_110m_land.shp')
    
    ipsCoordModified = [e[1:] for e in ipsCoord] # Remover primer indice (no nos importa)
    df = pd.DataFrame(data=ipsCoordModified,columns=['latitude','longitude'])
    geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]

    crs = 'epsg:4326'
    geo_df = gpd.GeoDataFrame(df,crs =crs,geometry=geometry)
    
    fig,ax = plt.subplots(figsize = (15,15))
    earthMap.plot(ax=ax, alpha = 0.4, color = 'grey')
    geo_df.plot(ax=ax, markersize = 20, color = 'red', marker = 'o', label = 'ip') 
    
    ax.set_xlim(minx, maxx)
    ax.set_ylim(miny, maxy)
    
    # Numerar el camino
    i = 0
    for ipInfo in ipsCoord:
        ax.annotate(text=i, xy=(ipInfo[2], ipInfo[1]), xytext=(ipInfo[2] + 1, ipInfo[1] + 1), color='red')
        i = i + 1

In [None]:
plotMap(filename)