In [None]:
# users.csv

import csv
from faker import Faker
from datetime import datetime, timedelta

# Initialize Faker
fake = Faker()

# Define the number of users to generate
num_users = 50

# Generate user data
users = []
start_date = datetime(2025, 1, 1, 12, 0, 0)  # Start date for "createdat"
for i in range(1, num_users + 1):
    user_id = f"user{i}"  # Generate user ID
    email = f"user{i}@example.com"  # Generate email
    firstname = fake.first_name()  # Generate first name
    lastname = fake.last_name()  # Generate last name
    created_at = (start_date + timedelta(days=i)).isoformat() + "Z"  # Increment date by days
    users.append([user_id, email, firstname, lastname, created_at])

# Define CSV headers
headers = ['id', 'email', 'firstname', 'lastname', 'createdat']

# Write to a CSV file
output_file = "./data/users.csv"
with open(output_file, mode="w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)  
    writer.writerows(users)   

print(f"CSV file with {num_users} users created: {output_file}")


CSV file with 50 users created: ./data/users.csv


In [13]:
# feedback.csv

import csv
import random

user_ids = [f"user{i}" for i in range(1, 10)]  

# Load news IDs from news_test.csv
news_file = "./data/news_test.csv"
with open(news_file, "r", encoding="utf-8") as file:
    news_data = list(csv.DictReader(file))
    news_ids = [row["News ID"] for row in news_data]  # Extract News IDs

# Define possible actions
actions = ["not_interested", "rated"]

# Generate random feedback data
feedback = []
for _ in range(300):  # Generate 100 rows
    user_id = random.choice(user_ids)
    news_id = random.choice(news_ids)
    action = random.choice(actions)
    rating = round(random.uniform(1, 5), 1) if action == "rated" else None
    feedback.append([user_id, news_id, action, rating])

# Define headers for feedback.csv
headers = ["user_id", "news_id", "action", "rating"]

# Write to feedback.csv
output_file = "./data/feedback.csv"
with open(output_file, mode="w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)  # Write headers
    writer.writerows(feedback)  # Write feedback rows

print(f"Generated feedback.csv with 100 random entries.")


Generated feedback.csv with 100 random entries.


In [14]:
# liked_clubs.csv

import csv
import random
from datetime import datetime, timedelta

# Mock data for users and clubs
user_ids = [f"user{i}" for i in range(1, 10)]  # Example: user1, user2, ..., user50
club_ids = list(range(1, 100))  # Example: 1, 2, ..., 100

# Generate random liked clubs data
liked_clubs = []
click_id = 1  # Initialize click_id
start_date = datetime(2024, 1, 1, 12, 0, 0)  # Start date for timestamps

for _ in range(400):  # Generate 100 rows
    user_id = random.choice(user_ids)
    club_id = random.choice(club_ids)
    action = "liked"
    timestamp = (start_date + timedelta(days=random.randint(0, 365), seconds=random.randint(0, 86400))).strftime("%Y-%m-%d %H:%M:%S")
    liked_clubs.append([click_id, club_id, user_id, action, timestamp])
    click_id += 1

# Define headers for liked_clubs.csv
headers = ["click_id", "club_id", "user_id", "action", "timestamp"]

# Write to liked_clubs.csv
output_file = "./data/liked_clubs.csv"
with open(output_file, mode="w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)  # Write headers
    writer.writerows(liked_clubs)  # Write liked clubs rows

print(f"Generated liked_clubs.csv with 100 random entries.")


Generated liked_clubs.csv with 100 random entries.


In [15]:
matches_csv_file = "./data/matches.csv"

matches = []
with open(matches_csv_file, mode="r", encoding="utf-8") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        matches.append({
            "match_id": int(row["Match ID"]),  # Convert Match ID to integer
            "home_team": row["Home Team"],
            "away_team": row["Away Team"]
        })
        
user_ids = [f"user{i}" for i in range(1, 10)]  

bet_types = ["win", "lose", "draw"]
statuses = ["won", "lost", "pending"]

# Start date for created_at
start_date = datetime(2025, 1, 1, 12, 0, 0)

# Generate random bets data
bets = []
for bet_id in range(1, 101):  # Generate 100 rows
    user_id = random.choice(user_ids)
    match = random.choice(matches)
    bet_type = random.choice(bet_types)
    selected_team = random.choice([match["home_team"], match["away_team"]])
    amount = round(random.uniform(10.0, 100.0), 2)
    coefficient = round(random.uniform(1.5, 5.0), 2)
    potential_win = round(amount * coefficient, 2)
    status = random.choice(statuses)
    created_at = (start_date + timedelta(days=random.randint(0, 30), seconds=random.randint(0, 86400))).isoformat() + "Z"
    bets.append([bet_id, user_id, match["match_id"], bet_type, selected_team, amount, potential_win, coefficient, status, created_at])

# Define headers for bets.csv
headers = ["bet_id", "user_id", "match_id", "bet_type", "selected_team", "amount", "potential_win", "coefficient", "status", "created_at"]

# Write to bets.csv
output_file = "./data/bets.csv"
with open(output_file, mode="w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)  # Write headers
    writer.writerows(bets)  # Write bets rows

print(f"Generated bets.csv with 100 random entries.")


Generated bets.csv with 100 random entries.
