In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import itertools
import glob
from scipy.spatial.distance import squareform, pdist
from math import radians, cos, sin, asin, sqrt

In [2]:
df = pd.read_csv('link_intents.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44942562 entries, 0 to 44942561
Data columns (total 7 columns):
 #   Column              Dtype 
---  ------              ----- 
 0   timestamp           object
 1    identifier         object
 2    node_a_identifier  object
 3    node_a_interface   object
 4    node_b_identifier  object
 5    node_b_interface   object
 6    state              object
dtypes: object(7)
memory usage: 2.3+ GB


## Check timestamp column

The link intents file is quite large. Excel will not load the dataset entirely. At first glance, it appears that there are no dates associated with the timestamps. We want to check if this is true. First, convert the timestamp column to a date time object. Then, check if any of the entries end in a year. If so, we can then filter to the same date range as the gps data and see what links the TS-SDN selected. 

In [4]:
df['time_dt_intents'] = pd.to_datetime(df['timestamp']) #convert to date time

In [5]:
df['time_dt_intents']

0          2018-11-26 09:56:07.789025
1          2018-11-26 09:56:07.928088
2          2018-11-26 10:01:30.578018
3          2018-11-26 10:01:30.834429
4          2018-11-26 10:01:30.954537
                      ...            
44942557   2021-03-01 17:42:04.206962
44942558   2021-03-01 17:42:04.363790
44942559   2021-03-01 17:42:04.382646
44942560   2021-03-01 17:42:05.161701
44942561   2021-03-01 17:42:05.189539
Name: time_dt_intents, Length: 44942562, dtype: datetime64[ns]

In [6]:
#the timestamp is on the order of a nano second
#for filtering we care only about the date 

df['time_dt_intents_date'] = pd.to_datetime(df['timestamp']).dt.date

In [7]:
df['time_dt_intents_date']

0           2018-11-26
1           2018-11-26
2           2018-11-26
3           2018-11-26
4           2018-11-26
               ...    
44942557    2021-03-01
44942558    2021-03-01
44942559    2021-03-01
44942560    2021-03-01
44942561    2021-03-01
Name: time_dt_intents_date, Length: 44942562, dtype: object

In [8]:
#filter to date range of gps data
start_date = '2021-01-01 '
end_date =  '2021-01-18'

df = df[(df['time_dt_intents_date'] >= pd.to_datetime(start_date, utc=True)) & (df['time_dt_intents_date'] <= pd.to_datetime(end_date, utc=True))]

  result = libops.scalar_compare(x.ravel(), y, op)


In [9]:
#filter furter to start and stop time 

start_time = '2021-01-01 00:01:00.000000'
stop_time = '2021-01-17 05:44:00.000000'

df = df[(df['time_dt_intents'] >= pd.to_datetime(start_time, utc=False)) & (df['time_dt_intents'] <= pd.to_datetime(stop_time, utc=False))]

## Filter Link Selections 

We want to get the balloon to balloon pairings that the TS-SDN selected. Then we can use these link selections as a baseline for comparing our algorithm to. 

In [10]:
df.head(10)

Unnamed: 0,timestamp,identifier,node_a_identifier,node_a_interface,node_b_identifier,node_b_interface,state,time_dt_intents,time_dt_intents_date
42764422,2021-01-01 00:01:05.102763,Temporospatial Topology & Routing:Intent:-922...,lwg01.nrr01,wifi0,LN-316,wifi0,INSTALLING,2021-01-01 00:01:05.102763,2021-01-01
42764423,2021-01-01 00:01:05.272345,Temporospatial Topology & Routing:Intent:-922...,lwg01.nrr01,wifi0,LN-316,wifi0,FAILED,2021-01-01 00:01:05.272345,2021-01-01
42764424,2021-01-01 00:01:05.401223,Temporospatial Topology & Routing:Intent:-922...,lwg01.nrr01,wifi0,LN-316,wifi0,INSTALL_REQ,2021-01-01 00:01:05.401223,2021-01-01
42764425,2021-01-01 00:01:07.760114,Temporospatial Topology & Routing:Intent:-922...,LN-065,wifi0,lwg01.wmc03,wifi0,INSTALLING,2021-01-01 00:01:07.760114,2021-01-01
42764426,2021-01-01 00:01:07.909056,Temporospatial Topology & Routing:Intent:-922...,LN-065,wifi0,lwg01.wmc03,wifi0,FAILED,2021-01-01 00:01:07.909056,2021-01-01
42764427,2021-01-01 00:01:09.364512,Temporospatial Topology & Routing:Intent:-922...,LN-065,wifi0,lwg01.wmc03,wifi0,INSTALL_REQ,2021-01-01 00:01:09.364512,2021-01-01
42764428,2021-01-01 00:01:12.044226,Temporospatial Topology & Routing:Intent:-922...,lwg03.wmc03,wifi0,LN-183,wifi0,INSTALLING,2021-01-01 00:01:12.044226,2021-01-01
42764429,2021-01-01 00:01:12.211053,Temporospatial Topology & Routing:Intent:-922...,lwg03.wmc03,wifi0,LN-183,wifi0,FAILED,2021-01-01 00:01:12.211053,2021-01-01
42764430,2021-01-01 00:01:13.458894,Temporospatial Topology & Routing:Intent:-922...,lwg03.wmc03,wifi0,LN-183,wifi0,INSTALL_REQ,2021-01-01 00:01:13.458894,2021-01-01
42764431,2021-01-01 00:01:13.567116,Temporospatial Topology & Routing:Intent:-922...,LN-274,wifi0,lwg02.wmc03,wifi0,INSTALLING,2021-01-01 00:01:13.567116,2021-01-01


Now we want to filter out the node identifiers for the ground stations (ie: lwg03.wmc03)

In [11]:
vals = np.logical_not(df[' node_a_identifier'].str.contains('lwg'))

In [12]:
df = df[vals]

In [13]:
vals = np.logical_not(df[' node_b_identifier'].str.contains('lwg'))

In [14]:
df = df[vals]

In [15]:
df.head(20)

Unnamed: 0,timestamp,identifier,node_a_identifier,node_a_interface,node_b_identifier,node_b_interface,state,time_dt_intents,time_dt_intents_date
42764434,2021-01-01 00:01:18.176876,mmwprober: LN-318 LN-318/mmwave1 1609458805: ...,LN-318,mmwave1,LN-320,mmwave2,FAILED,2021-01-01 00:01:18.176876,2021-01-01
42764435,2021-01-01 00:01:18.367363,mmwprober: LN-318 LN-318/mmwave1 1609458805: ...,LN-318,mmwave1,LN-320,mmwave2,INSTALL_REQ,2021-01-01 00:01:18.367363,2021-01-01
42764436,2021-01-01 00:01:18.549323,mmwprober: LN-318 LN-318/mmwave1 1609458805: ...,LN-318,mmwave1,LN-320,mmwave2,INSTALLING,2021-01-01 00:01:18.549323,2021-01-01
42764473,2021-01-01 00:02:19.928041,Temporospatial Topology & Routing:Intent:-922...,,mmwave0,LN-232,mmwave2,FAILED,2021-01-01 00:02:19.928041,2021-01-01
42764474,2021-01-01 00:02:20.983943,Temporospatial Topology & Routing:Intent:-922...,,mmwave0,LN-232,mmwave2,INSTALL_REQ,2021-01-01 00:02:20.983943,2021-01-01
42764475,2021-01-01 00:02:21.151371,Temporospatial Topology & Routing:Intent:-922...,,mmwave0,LN-232,mmwave2,INSTALLING,2021-01-01 00:02:21.151371,2021-01-01
42764512,2021-01-01 00:03:17.260649,mmwprober: LN-263 LN-263/mmwave2 1609459244: ...,LN-263,mmwave2,LN-223,mmwave2,FAILED,2021-01-01 00:03:17.260649,2021-01-01
42764513,2021-01-01 00:03:17.424535,mmwprober: LN-263 LN-263/mmwave2 1609459244: ...,LN-263,mmwave2,LN-223,mmwave2,INSTALL_REQ,2021-01-01 00:03:17.424535,2021-01-01
42764514,2021-01-01 00:03:17.586377,mmwprober: LN-263 LN-263/mmwave2 1609459244: ...,LN-263,mmwave2,LN-223,mmwave2,INSTALLING,2021-01-01 00:03:17.586377,2021-01-01
42764550,2021-01-01 00:04:14.975996,mmwprober: LN-318 LN-318/mmwave1 1609458805: ...,LN-318,mmwave1,LN-320,mmwave2,INSTALLED,2021-01-01 00:04:14.975996,2021-01-01


There's no node id at idx = 42764473 (position 3 in the filtered dataframe). Is this whitespace or null value? 

In [16]:
df[' node_a_identifier'].iloc[3]

' '

The above returns whitespace. Repeate the above filtering process to remove any rows with whitespace as a node identifier. 

In [22]:
vals = np.logical_not(df[' node_b_identifier'].str.isspace()) ##pick up here @ Lakshya