In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os

log_dir = '/content/drive/My Drive/historical_logs'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

data_dir = '/content/drive/My Drive/historical_data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
import json
import pandas as pd
import random
from datetime import datetime, timedelta
import numpy as np

# Create directories if they don't exist
log_dir = '/content/drive/My Drive/historical_logs'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

data_dir = '/content/drive/My Drive/historical_data'
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# Load data from JSON files
with open('/content/drive/My Drive/historical_data/employees.json') as f:
    employees = json.load(f)

with open('/content/drive/My Drive/historical_data/credentials.json') as f:
    credentials = json.load(f)

with open('/content/drive/My Drive/historical_data/internal_emails.json') as f:
    internal_emails = json.load(f)

with open('/content/drive/My Drive/historical_data/external_emails.json') as f:
    external_emails = json.load(f)

with open('/content/drive/My Drive/historical_data/applications.json') as f:
    applications = json.load(f)

phish_data = pd.read_csv('/content/drive/My Drive/historical_data/malicious_phish.csv')

# List of 10 machines
machines = [f"machine_{i}" for i in range(1, 11)]

# Define activity lists
normal_activities = [
    "login", "logout", "file_create", "file_delete", "file_modify", "file_copy", "file_move",
    "send_email", "receive_email", "open_email_attachment", "web_browsing", "download_file",
    "stream_video", "open_application", "close_application", "install_application", "uninstall_application",
    "connect_vpn", "disconnect_vpn", "upload_to_cloud", "download_from_cloud", "run_sql_query",
    "node_js_interaction", "meeting", "idle", "check_disk_space", "list_processes", "network_ping",
    "check_memory_usage", "install_package", "uninstall_package", "run_benchmark", "backup_files",
    "restore_files", "create_user", "delete_user", "add_to_group", "remove_from_group", "change_password",
    "check_system_logs", "update_system", "upgrade_system", "reboot_system", "shutdown_system",
    "restart_service", "stop_service", "start_service", "check_service_status", "list_open_ports",
    "network_trace", "check_network_connections", "view_system_info", "list_installed_packages",
    "version_check", "install_docker", "uninstall_docker", "run_docker_container", "stop_docker_container",
    "remove_docker_container", "pull_docker_image", "push_docker_image", "create_database",
    "delete_database", "backup_database", "restore_database", "create_table", "delete_table",
    "insert_data", "update_data", "delete_data", "query_data", "create_index", "delete_index",
    "monitor_database", "check_database_status", "optimize_database", "repair_database", "start_vpn",
    "stop_vpn", "check_vpn_status", "install_vpn", "uninstall_vpn", "connect_to_wifi", "disconnect_from_wifi",
    "list_wifi_networks", "check_wifi_status", "configure_firewall", "enable_firewall", "disable_firewall",
    "check_firewall_status", "add_firewall_rule", "remove_firewall_rule", "list_firewall_rules",
    "test_firewall_rules", "view_firewall_logs", "monitor_network_traffic", "analyze_network_traffic",
    "generate_network_report", "generate_system_report", "generate_security_report", "test_security_policy"
]

malicious_activities = [
    "unauthorized_access", "data_exfiltration", "privilege_escalation",
    "credential_stealing", "malware_installation", "brute_force_attack", "network_scanning",
    "ddos_attack", "sql_injection", "cross_site_scripting", "phishing_attack",
    "install_backdoor", "run_malicious_script", "disable_security_tools", "modify_system_logs",
    "clear_system_logs", "hide_malware", "stealth_network_scanning", "network_sniffing",
    "spoof_network_packets", "tamper_data", "exploit_vulnerability", "download_sensitive_files",
    "upload_malicious_files", "modify_file_permissions", "delete_system_files", "overwrite_system_files",
    "disable_network_security", "bypass_authentication", "create_fake_users", "create_fake_logs"
]

# Statistical approach to determine activity weights
def get_activity_weights(normal_activities, malicious_activities, malicious):
    normal_weights = np.random.normal(1, 0.1, len(normal_activities))
    malicious_weights = np.random.normal(1, 0.1, len(malicious_activities))

    if malicious:
        normal_weights *= 0.3
        malicious_weights *= 3
    else:
        normal_weights *= 0.99
        malicious_weights *= 0.01

    return np.concatenate((normal_weights, malicious_weights))

# Function to log activities
def log_activity(activity, detail, malicious, machine, current_time):
    timestamp = current_time.isoformat()
    flag = 1 if malicious else 0
    log_entry = {
        "timestamp": timestamp,
        "activity": activity,
        "detail": detail,
        "malicious": flag,
        "machine": machine
    }
    print(log_entry)  # Print the log entry to verify timestamps
    with open('/content/drive/My Drive/historical_logs/historical_logs.log', 'a') as log_file:
        log_file.write(json.dumps(log_entry) + '\n')

# Function to simulate user activities
def simulate_user_activities(username, employee_id, malicious, start_date):
    current_time = start_date
    end_date = datetime.now()
    total_minutes = int((end_date - start_date).total_seconds() / 60)

    while current_time < end_date:
        weights = get_activity_weights(normal_activities, malicious_activities, malicious)
        activities = normal_activities + malicious_activities

        activity = random.choices(activities, weights=weights, k=1)[0]

        machine = random.choice(machines)

        if activity in normal_activities:
            log_activity(activity, f"Details of {activity}", malicious, machine, current_time)
        elif activity in malicious_activities:
            log_activity(activity, f"Details of {activity}", True, machine, current_time)

        # Increment the current time by a random interval
        current_time += timedelta(minutes=random.randint(5, 60))

# Simulate activities for all users over the past 3 months
start_date = datetime.now() - timedelta(days=90)
for employee in employees:
    simulate_user_activities(employee['username'], employee['employee_id'], employee['malicious'], start_date)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
{'timestamp': '2024-07-22T01:45:28.654832', 'activity': 'ddos_attack', 'detail': 'Details of ddos_attack', 'malicious': 1, 'machine': 'machine_3'}
{'timestamp': '2024-07-22T01:56:28.654832', 'activity': 'malware_installation', 'detail': 'Details of malware_installation', 'malicious': 1, 'machine': 'machine_10'}
{'timestamp': '2024-07-22T02:03:28.654832', 'activity': 'create_fake_logs', 'detail': 'Details of create_fake_logs', 'malicious': 1, 'machine': 'machine_10'}
{'timestamp': '2024-07-22T02:26:28.654832', 'activity': 'delete_system_files', 'detail': 'Details of delete_system_files', 'malicious': 1, 'machine': 'machine_8'}
{'timestamp': '2024-07-22T02:46:28.654832', 'activity': 'disable_network_security', 'detail': 'Details of disable_network_security', 'malicious': 1, 'machine': 'machine_1'}
{'timestamp': '2024-07-22T02:55:28.654832', 'activity': 'exploit_vulnerability', 'detail': 'Details of exploit_vulnerability', '