In [1]:
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import pandas as pd
from pathlib import Path
from pyngrok import ngrok
import re
import subprocess
from uuid import uuid4

from netunicorn.base import Pipeline
from netunicorn.library.tasks.capture.tcpdump import StartCapture, StopNamedCapture
from netunicorn.library.tasks.measurements.ookla_speedtest import SpeedTest

from execute_pipeline import execute_pipeline, healthcheck
from get_proxies import get_proxies
from background_traffic import StartBackgroundTraffic, StopNamedBackgroundTraffic
from youtube_watcher import WatchYouTubeVideo
from vimeo_watcher import WatchVimeoVideo
from twitch_watcher import WatchTwitchStream
from webdav import UploadToWebDav

In [3]:
healthcheck()

Health Check: True
[[snl-server-5], <Uncountable node pool with next node template: [aws-fargate-A-cs190n4-, aws-fargate-B-cs190n4-, aws-fargate-ARM64-cs190n4-]>]


Manually `exec` each module so they get pickled by value.

In [4]:
__name_bak__ = __name__
for __name__ in [
    "background_traffic",
    "youtube_watcher",
    "vimeo_watcher",
    "twitch_watcher",
    "webdav"
]:
    with open(f"{__name__}.py", "r") as file:
        exec(file.read())
__name__ = __name_bak__

These are the list of videos that we are testing. 

In [5]:
youtube_ids = [
    "dQw4w9WgXcQ",
    "r5JYHXtt_rw",
    "pxEV1A5mTYM",
    "Ct6BUPvE2sM",
    "KjtYZpqvt50"
]

youtube_videos = [f"https://www.youtube.com/watch?v={v}" for v in youtube_ids]

vimeo_ids = [
    375468729,
    347119375,
    297124334,
    476306167,
    515893651
]

vimeo_videos = [f"https://vimeo.com/{v}?autoplay=1" for v in vimeo_ids]

twitch_ids = [
    2322690366,
    2298318732,
    2316652767,
    1867242354,
    2320975412
]

twitch_videos = [f"https://twitch.tv/video/{v}" for v in twitch_ids]

Load a list of proxies from https://free-proxy-list.net/ and create a Chrome argument for the proxy configuration.

In [6]:
proxies = [proxy for proxy in get_proxies() if proxy.https]

proxy_arg = f"""--proxy-server={','.join([
    f'https://{proxy.ip}:{proxy.port}'
    for proxy in proxies
] + ['direct://'])}"""

Use `ngrok` to expose port `8080`. We'll use this as a https proxy.

In [7]:
tcp_tunnel = ngrok.connect("8080", "tcp")
self_proxy = f"https://{tcp_tunnel.public_url[6:]}"
self_proxy_arg = f"--proxy-server={self_proxy},direct://"
tcp_tunnel

<NgrokTunnel: "tcp://0.tcp.us-cal-1.ngrok.io:16376" -> "localhost:8080">

For each of YouTube, Vimeo, and Twitch, create 10 of the following type of pipelines:
- a pipeline where we are just watching five videos
- a pipeline using proxies from https://free-proxy-list.net/
- a pipeline using a proxy on the host machine
- a pipeline where we run a speedtest in the background

In [8]:
from netunicorn.base import Task
from collections.abc import Callable
from typing import Type

subdirectory = str(uuid4())

watch_tasks: dict[
    str,
    tuple[Type[WatchYouTubeVideo | WatchVimeoVideo | WatchTwitchStream], list[str]]
] = {
    'youtube': (WatchYouTubeVideo, youtube_videos),
    'vimeo': (WatchVimeoVideo, vimeo_videos),
    'twitch': (WatchTwitchStream, twitch_videos),
}

watch_types: dict[str, dict[str, any]] = {
    '_': {
        'processes': lambda name: [
            subprocess.Popen(
                ["tcpdump", "-i", "any", "-U", "-w", f"{name}.pcap"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
        ],
        'requirements': [
            "sudo apt-get update",
            "sudo apt-get install -y tcpdump"
        ]
    },
    '_proxy_': {
        'processes': lambda name: [
            subprocess.Popen(
                ["tcpdump", "-i", "any", "-U", "-w", f"{name}.pcap"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
        ],
        'webdriver_arguments': [proxy_arg],
        'requirements': [
            "sudo apt-get update",
            "sudo apt-get install -y tcpdump"
        ]
    },
    '_proxy1_': {
        'processes': lambda name: [
            subprocess.Popen(
                ["tcpdump", "-i", "any", "-U", "-w", f"{name}.pcap"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
        ],
        'webdriver_arguments': [self_proxy_arg],
        'requirements': [
            "sudo apt-get update",
            "sudo apt-get install -y tcpdump"
        ]
    },
    '_speedtest_': {
        'processes': lambda name: [
            subprocess.Popen(
                ["tcpdump", "-i", "any", "-U", "-w", f"{name}.pcap"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            ),
            subprocess.Popen(
                ["sh", "-c", "while true; do speedtest-cli --simple --secure; done"],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
        ],
        'requirements': [
            "sudo apt-get update",
            "sudo apt-get install -y tcpdump",
            "pip install speedtest-cli"
        ]
    }
}

def generate_pipeline(i: int) -> Pipeline:
    pipeline = Pipeline(early_stopping=False)
    
    filepaths: set[str] = set()
    
    for watch_type, kwargs in watch_types.items():
        for site, (TaskImpl, videos) in watch_tasks.items():
            name = f"{site}{watch_type}{i}"
            filepaths.add(f"{name}.pcap")

            pipeline.then(TaskImpl(
                video_url=videos,
                duration=30,
                name=f"{site}{watch_type}{i}",
                **kwargs
            ))
    
    return pipeline.then(UploadToWebDav(
        filepaths=filepaths,
        endpoint="http://snl-server-5.cs.ucsb.edu/cs190n/cs190n4/capture",
        username="uploader",
        password="uploader",
        subdirectory=subdirectory,
        name=f"upload_{i}"
    ))

pipelines = [generate_pipeline(i) for i in range(10)]

In [9]:
working_node = "aws"
experiment_label = "team-4-experiment-1"

In [None]:
with open("./proxy_out.log", "w") as stdout, open("./proxy_err.log", "w") as stderr:
    proxy_env = os.environ.copy()
    proxy_env["PORT"] = "8080"
    proxy_process = subprocess.Popen(["node", "proxy.js"], env=proxy_env, stdout=stdout, stderr=stderr)
    result = execute_pipeline(pipelines, working_node, experiment_label)
    proxy_process.terminate()

result

ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.PREPARING
ExperimentStatus.READY
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
ExperimentStatus.RUNNING
Experim

In [11]:
directory = f"/mnt/md0/cs190n/cs190n4/capture/{subdirectory}"
    
for file in os.listdir(directory):
    if file.endswith(".pcap"): 
        print(os.path.join(directory, file))
        !docker run -v {directory}:/tmp/input -v ./data/unprocessed:/tmp/output mielverkerken/cicflowmeter /tmp/input/{file} /tmp/output > /dev/null 2&>1

/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_0.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_1.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_2.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_3.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_4.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_5.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_6.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_7.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_8.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_9.pcap
/mnt/md0/cs190n/cs190n4/capture/726b33c4-d8b0-4916-bb44-b53782dfabdd/twitch_capture_proxy1