In [None]:
import requests
import pandas as pd
import time
import numpy as np
import os
import csv
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# =========================
# CONFIG
# =========================
TEST_DURATION = 20
CHUNK_SIZE = 8192
TIMEOUT = 10
INPUT_FILES = ["football.csv", "tennis.csv", "other.csv"]
OUTPUT_FILE = "stream_test_results.csv"
VLC_HEADER = {"User-Agent": "VLC/3.0.20 LibVLC/3.0.20"}


# =========================
# STREAM TEST FUNCTION
# =========================
def test_stream(url, duration=TEST_DURATION):
    result = {
        "status_code": None,
        "ttfb_sec": None,
        "avg_speed_kbps": None,
        "speed_stddev": None,
        "bytes_downloaded": 0,
        "error": None,
    }

    try:
        response = requests.get(
            url,
            headers=VLC_HEADER,
            stream=True,
            timeout=TIMEOUT,
            allow_redirects=True,
            verify=False
        )

        result["status_code"] = response.status_code
        result["ttfb_sec"] = response.elapsed.total_seconds()

        if response.status_code != 200:
            return result

        start_time = time.time()
        chunk_times = []
        bytes_downloaded = 0

        for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
            if not chunk:
                continue

            now = time.time()
            chunk_times.append(now)
            bytes_downloaded += len(chunk)

            if now - start_time > duration:
                break

        total_time = time.time() - start_time
        result["bytes_downloaded"] = bytes_downloaded

        if total_time > 0:
            avg_speed = (bytes_downloaded / total_time) / 1024
            result["avg_speed_kbps"] = round(avg_speed, 2)

        if len(chunk_times) > 1:
            intervals = np.diff(chunk_times)
            result["speed_stddev"] = round(float(np.std(intervals)), 4)

    except Exception as e:
        result["error"] = str(e)

    return result


# =========================
# PROCESS CSV FILES
# =========================
def process_files(files):

    # Open output file once
    with open(OUTPUT_FILE, mode="w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)

        # Write header
        writer.writerow([
            "source_file",
            "channel_name",
            "provider_column",
            "url",
            "status_code",
            "ttfb_sec",
            "avg_speed_kbps",
            "speed_stddev",
            "bytes_downloaded",
            "error"
        ])
        f.flush()

        for file in files:
            if not os.path.exists("../" + file):
                print(f"File not found: {file}")
                continue

            df = pd.read_csv("../" + file)

            for _, row in df.iterrows():
                channel_name = row.iloc[0]

                for col in df.columns[1:]:
                    url = row[col]

                    if pd.isna(url) or str(url).strip() == "":
                        continue

                    url = str(url).strip()
                    print(f"Testing: {channel_name} | {col}")

                    metrics = test_stream(url)

                    writer.writerow([
                        file,
                        channel_name,
                        col,
                        url,
                        metrics["status_code"],
                        metrics["ttfb_sec"],
                        metrics["avg_speed_kbps"],
                        metrics["speed_stddev"],
                        metrics["bytes_downloaded"],
                        metrics["error"]
                    ])

                    f.flush()   # ðŸ”¥ flush immediately


# =========================
# MAIN
# =========================
if __name__ == "__main__":
    process_files(INPUT_FILES)
    print("\nTesting complete.")
    print(f"Results saved to: {OUTPUT_FILE}")