In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Define the date range and intervals
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 7, 30)
date_range = pd.date_range(start=start_date, end=end_date, freq='15T')  # 15-minute intervals

# Define service groups and their service names
service_groups = {
    "Gaming": ["Fortnite", "League of Legends", "Valorant"],
    "Social Media": ["Instagram", "Facebook", "LinkedIn", "Twitter"],
    "Streaming": ["YouTube", "Netflix", "Hulu"],
    "Shopping": ["Amazon", "eBay", "Etsy"],
    "Software": ["Gmail", "Outlook", "BlueJeans", "Slack", "WebEx", "GMeet", "Microsoft Teams"]
}

# Define users, device types, and generate MAC addresses
users = ["user1", "user2"]
device_types = {
    "Gaming": ["TV", "Mobile"],
    "Social Media": ["Mobile", "Tablet"],
    "Streaming": ["TV", "Mobile"],
    "Software": ["PC"],
    "Shopping": ["Mobile", "Tablet", "PC"]
}
mac_addresses = [f"02:00:00:{random.randint(0x00, 0xFF):02x}:{random.randint(0x00, 0xFF):02x}:{random.randint(0x00, 0xFF):02x}" for _ in range(len(users))]

# Data generation
data_adjusted = []

for timestamp in date_range:
    for user, mac_address in zip(users, mac_addresses):
        hour = timestamp.hour
        day_of_week = timestamp.weekday()

        # Specific patterns for user1 and user2 on weekends and weekdays
        if user == "user1" and day_of_week >= 5 and 19 <= hour < 22:
            # User1 uses Netflix during weekends between 7 PM and 10 PM
            service_group = "Streaming"
            service_name = "Netflix"
        elif user == "user2" and day_of_week >= 5 and 16 <= hour < 21:
            # User2 does more gaming during weekends between 4 PM and 9 PM
            service_group = "Gaming"
            service_name = random.choice(service_groups["Gaming"])
        elif day_of_week < 5 and 9 <= hour < 19:
            # Weekday 9 AM - 7 PM: Higher proportion of "Software" for both users
            service_group = "Software" if random.random() > 0.2 else random.choice(["Social Media", "Streaming"])
            service_name = random.choice(service_groups[service_group])
        else:
            # General patterns for other times
            if 9 <= hour < 19 and day_of_week < 5:
                service_group = "Software" if random.random() > 0.3 else random.choice(["Social Media", "Streaming"])
            elif day_of_week < 4 and hour >= 19:
                service_group = "Gaming" if random.random() > 0.3 else "Streaming"
            elif day_of_week == 4 and hour >= 20 and hour <= 23:
                service_group = random.choice(["Social Media", "Streaming", "Gaming"]) if random.random() > 0.7 else "Shopping"
            elif day_of_week >= 5:
                service_group = random.choice(["Shopping", "Streaming", "Gaming"])
            else:
                service_group = "Streaming" if random.random() > 0.5 else "Social Media"

            # Select a service name based on service group
            service_name = random.choice(service_groups[service_group])

        # Select an appropriate device type based on service group
        device_type = random.choice(device_types[service_group])

        # Generate random values for other columns
        usage_minutes = random.randint(1, 200)
        usage_percentage = round(random.uniform(10, 100), 2)
        signal_strength = round(random.uniform(-60, -40), 2)
        packet_loss_rate = round(random.uniform(0, 1), 4)
        latency = round(random.uniform(10, 50), 2)
        jitter_ms = round(random.uniform(0, 10), 2)
        traffic_spike = random.choice([0, 1])
        bandwidth_speed_per_sec_mbps = round(random.uniform(5, 15), 2)
        buffer_occupancy = round(random.uniform(0, 1), 4)

        # Append row to data
        data_adjusted.append([user, timestamp, service_group, service_name, usage_minutes, usage_percentage, mac_address,
                              device_type, signal_strength, packet_loss_rate, latency, jitter_ms, traffic_spike,
                              bandwidth_speed_per_sec_mbps, buffer_occupancy])

# Create DataFrame and save
df_balanced_adjusted = pd.DataFrame(data_adjusted, columns=[
    "user", "timestamp", "service_group", "service_name", "usage_minutes", "usage_percentage", "mac_address", 
    "device_type", "signal_strength", "packet_loss_rate", "latency", "jitter_ms", "traffic_spike", 
    "bandwidth_speed_per_sec_mbps", "buffer_occupancy"
])

# Save to CSV
df_balanced_adjusted.to_csv("netflix_pattern.csv", index=False)
print("Dataset created and saved as 'netflix_pattern.csv'")
