In [1]:
from BML.data import Dataset
from BML import utils

#################
#https://github.com/KevinHoarau/BGNN/blob/main/collect_transform.py
# Data collection

folder = "dataset/"
dataset = Dataset(folder)

dataset.setParams({
    "PrimingPeriod": 10*60, # 10 hours of priming data
    "IpVersion": [4], # only IPv4 routes
    "Collectors": ["rrc04","rrc05"], # rrc06: at Otemachi, Japan 
    "UseRibsPriming": True
})

'''
Dataset collected: 
{
    "name": "TTNet",
    "start_time": utils.getTimestamp(2004, 12, 24, 9, 20, 0) - 60*30, 
    "end_time": utils.getTimestamp(2004, 12, 24, 9, 20, 0) + 60*30, 
}
{
    "name": "IndoSat",
    "start_time": utils.getTimestamp(2014, 4, 2, 18, 25, 0) - 60*30, 
    "end_time": utils.getTimestamp(2014, 4, 2, 18, 25, 0) + 60*30, 
}
{
    "name": "AWS",
    "start_time": utils.getTimestamp(2016, 4, 22, 17, 10, 0) - 60*30, 
    "end_time": utils.getTimestamp(2016, 4, 22, 17, 10, 0) + 60*30
}
{
    "name": "TM",
    "start_time": utils.getTimestamp(2015, 6, 12, 8, 43, 0) - 60*30, 
    "end_time": utils.getTimestamp(2015, 6, 12, 8, 43, 0) + 60*30
}
{
    "name": "Youtube", => https://www.ripe.net/about-us/news/youtube-hijacking-a-ripe-ncc-ris-case-study/
    "start_time": utils.getTimestamp(2006, 2, 24, 18, 47, 0) - 60*30 - 24*3600, 
    "end_time": utils.getTimestamp(2006, 2, 24, 18, 47, 0) + 60*30 - 24*3600, 
}
{
    "name": "ChinaTelecom", => https://web.archive.org/web/20190415002259/https://bgpmon.net/chinese-isp-hijacked-10-of-the-internet/
    "start_time": utils.getTimestamp(2010, 4, 8, 17, 54, 31) - 60*30 - 24*3600, 
    "end_time": utils.getTimestamp(2010, 4, 8, 17, 54, 31) + 60*30 - 24*3600, 
}
{
    "name": "ConEdison", => https://web.archive.org/web/20130308072127/http://www.renesys.com/blog/2006/01/coned-steals-the-net.shtml
    "start_time": utils.getTimestamp(2006, 1, 22, 5, 5, 33) - 60*30 - 24*3600, 
    "end_time": utils.getTimestamp(2006, 1, 22, 5, 5, 33) + 60*30 - 24*3600, 
}
'''


'''
Feature not available
{
    "name": "ChinaTelecom",
    "start_time": utils.getTimestamp(2019, 6, 6, 9, 44, 0) - 60*30, 
    "end_time": utils.getTimestamp(2019, 6, 6, 9, 44, 0) + 60*30, 
}
{
    "name": "Google",
    "start_time": utils.getTimestamp(2017, 8, 25, 3, 22, 0) - 60*30, 
    "end_time": utils.getTimestamp(2017, 8, 25, 3, 22, 0) + 60*30, 
}
{
    "name": "India",
    "start_time": utils.getTimestamp(2021, 4, 16, 13, 48, 0) - 60*30 - 24*3600, 
    "end_time": utils.getTimestamp(2021, 4, 16, 13, 48, 0) + 60*30 - 24*3600, 
}
'''

dataset.setPeriodsOfInterests([
    {
        "name": "ConEdison",
        "label": "no_anomaly",
        "start_time": utils.getTimestamp(2006, 1, 22, 5, 5, 33) - 60*30 - 24*3600, 
        "end_time": utils.getTimestamp(2006, 1, 22, 5, 5, 33) + 60*30 - 24*3600, 
    },
    {
        "name": "ConEdison",
        "label": "anomaly",
        "start_time": utils.getTimestamp(2006, 1, 22, 5, 5, 33) - 60*30, 
        "end_time": utils.getTimestamp(2006, 1, 22, 5, 5, 33) + 60*30, 
    }

])

In [2]:
# run the data collection
utils.runJobs(dataset.getJobs(), folder+"collect_jobs", nbProcess=4) 

################
# Run jobs list 
################
Number of processes to execute: 2
Number of processes in parallel: 4
Processing queue: started
To monitor the execution run: watch -n 1 cat dataset/collect_jobs/queue.log


  0%|          | 0/2 [00:00<?, ?it/s]

Processing queue: finish
Computation time: 0h 5m 31s


## Features

In [4]:
from BML.transform import DatasetTransformation
from BML import utils
folder = "dataset/"

# features extraction every minute
datTran = DatasetTransformation(folder, "BML.transform", "Features")

datTran.setParams({
        "global":{
            "Period": 3,
            "SkipIfExist": False
        }
    })

# run the data transformation
utils.runJobs(datTran.getJobs(), folder+"transform_jobs")

################
# Run jobs list 
################
Number of processes to execute: 2
Number of processes in parallel: 1
Processing queue: started
To monitor the execution run: watch -n 1 cat dataset/transform_jobs/queue.log


  0%|          | 0/2 [00:00<?, ?it/s]

Processing queue: finish
Computation time: 0h 0m 5s


## Shutmil

In [5]:
import shutil
shutil.make_archive('ConEdison', 'zip', 'dataset')

'/home/jovyan/ConEdison.zip'

In [6]:
import shutil
shutil.rmtree('dataset')

In [8]:
import shutil
shutil.unpack_archive('IndoSat.zip','dataset')