## Death at the Aurora Theater: Data Creation

In [None]:
# police record database
import pandas as pd
import random
import string
from datetime import datetime, timedelta

# Function to generate a random alphanumeric ID of fixed length
def generate_case_id(length=10):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

# Possible squad options
squads = ["homicide", "narcotics", "cybercrime", "fraud", "burglary", "organized crime"]

# Possible locations
locations = ["Aurora Theater", "Downtown Park", "Riverside Mall", "Central Station", "Eastside Alley", "West End Club"]

# Function to generate random dates within the last 5 years
def generate_random_date(start_year=2018):
    start_date = datetime(start_year, 1, 1)
    end_date = datetime.now()
    delta = end_date - start_date
    random_days = random.randrange(delta.days)
    return (start_date + timedelta(days=random_days)).strftime("%Y-%m-%d")

# Generate synthetic data
data = {
    "case_id": [generate_case_id() for _ in range(100)],
    "squad": [random.choice(squads) for _ in range(100)],
    "date": [generate_random_date() for _ in range(100)],
    "location": [random.choice(locations) for _ in range(100)]
}

# Create DataFrame
df = pd.DataFrame(data)

In [None]:
# witness reports

from faker import Faker

fake = Faker()

# Use existing case_ids from the police records
case_ids = df["case_id"].tolist()

# Function to generate a synthetic testimony
def generate_testimony():
    observations = [
        "I saw a suspicious person near the scene.",
        "There was a loud noise before people started running.",
        "Someone was arguing loudly moments before it happened.",
        "A dark vehicle was speeding away from the area.",
        "I noticed broken glass and heard shouting.",
        "There was a strange man watching the building all day."
    ]
    actions = [
        "They looked nervous and kept checking over their shoulder.",
        "They dropped something and quickly picked it up.",
        "They ran off when the sirens started.",
        "They were wearing a hoodie and avoided eye contact.",
        "They seemed to be waiting for someone.",
        "They spoke briefly with another person and left."
    ]
    return f"{random.choice(observations)} {random.choice(actions)}"

# Generate synthetic witness reports
witness_data = {
    "case_id": [random.choice(case_ids) for _ in range(150)],  # More reports than cases for variety
    "witness_name": [fake.name() for _ in range(150)],
    "testimony": [generate_testimony() for _ in range(150)]
}

# Create DataFrame
witness_df = pd.DataFrame(witness_data)

In [None]:
# location access
import pandas as pd
import random
from datetime import datetime, timedelta
import string

# Function to generate random dates within the last year
def generate_random_date(start_year=2023):
    start_date = datetime(start_year, 1, 1)
    end_date = datetime.now()
    delta = end_date - start_date
    random_days = random.randrange(delta.days)
    random_seconds = random.randrange(86400)  # Seconds in a day
    return (start_date + timedelta(days=random_days, seconds=random_seconds)).strftime("%Y-%m-%d %H:%M:%S")

# Function to generate random member IDs
def generate_member_id(length=8):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

# Possible theater locations
locations = [
    "main stage", "backstage", "lighting grid", "prop room", "dressing room", "sound booth", "orchestra pit"
]

directions = ["entering", "leaving"]

# Generate synthetic stage access data
stage_access_data = {
    "date": [generate_random_date() for _ in range(200)],
    "location": [random.choice(locations) for _ in range(200)],
    "member_id": [generate_member_id() for _ in range(200)],
    "direction": [random.choice(directions) for _ in range(200)]
}

# Create DataFrame
stage_access_df = pd.DataFrame(stage_access_data)

In [None]:
# staff database
# Generate a synthetic staff database for a theater

# Possible theater roles
roles = [
    "Stage Manager", "Lighting Technician", "Sound Engineer", "Set Designer", "Costume Designer",
    "Actor", "Director", "Props Master", "Makeup Artist", "Usher", "Box Office Clerk",
    "Production Assistant", "Choreographer", "Dramaturg", "Front of House Manager"
]

# Function to generate unique employee IDs
def generate_employee_id(length=6):
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=length))

# Generate unique employee IDs
unique_employee_ids = set()
while len(unique_employee_ids) < 50:
    unique_employee_ids.add(generate_employee_id())

# Generate synthetic staff data
staff_data = {
    "employee_id": list(unique_employee_ids),
    "employee_name": [fake.name() for _ in range(50)],
    "employee_role": [random.choice(roles) for _ in range(50)]
}

# Create DataFrame
staff_df = pd.DataFrame(staff_data)

In [None]:
# ticket database
# not needed for solving the case

# Possible plays in the theater
plays = [
    "Hamlet", "Macbeth", "A Midsummer Night's Dream", "Les Misérables",
    "The Phantom of the Opera", "Death of a Salesman", "Romeo and Juliet",
    "The Lion King", "Wicked", "Cats"
]

# Possible seat positions (e.g., A1 to J10)
rows = [chr(i) for i in range(ord('A'), ord('J') + 1)]
seats = [str(i) for i in range(1, 11)]
seat_positions = [f"{row}{seat}" for row in rows for seat in seats]

# Generate synthetic ticket data
ticket_data = []
for _ in range(300):
    reserved = random.choice([True, False])
    ticket_data.append({
        "date": generate_random_date(),
        "play": random.choice(plays),
        "seat_position": random.choice(seat_positions),
        "price": round(random.uniform(20, 150), 2),  # Ticket price between $20 and $150
        "seat_reserved": reserved,
        "seat_reservation_name": fake.name() if reserved else ""
    })

# Create DataFrame
ticket_df = pd.DataFrame(ticket_data)

# Save to CSV
ticket_file_path = "/mnt/data/synthetic_ticket_database.csv"
ticket_df.to_csv(ticket_file_path, index=False)

ticket_file_path