In [4]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Constants
num_records = 1000
start_date = datetime(2024, 8, 20)
end_date = datetime(2024, 12, 31)

# Generate random dates within the season
date_range = pd.date_range(start_date, end_date)
dates = np.random.choice(date_range, num_records)

# Fixing the date type issue by converting numpy datetime64 to datetime.date
dates_corrected = [pd.to_datetime(date).date() for date in dates]

# Generate random times for each event
times = [datetime.combine(date, datetime.min.time()) + timedelta(hours=np.random.randint(18, 23)) for date in dates_corrected]

# Event Names
event_names = [f"Event {i}" for i in range(1, num_records + 1)]

# Seat Location
seat_locations = ['Upper Deck', 'Lower Bowl', 'VIP']
seat_location = np.random.choice(seat_locations, num_records)

# Ticket Prices based on seat location
price_map = {'Upper Deck': 30, 'Lower Bowl': 70, 'VIP': 150}
ticket_prices = [price_map[loc] + np.random.normal(0, 5) for loc in seat_location]

# Number of Tickets sold
num_tickets = np.random.randint(1, 10, num_records)

# Team Performance (simple metric, e.g., a score out of 100)
team_performance = np.random.randint(50, 100, num_records)

# Promotion indicator (binary)
promotions = np.random.choice([0, 1], num_records, p=[0.8, 0.2])

# Compile the dataset
data = {
    'Date': dates_corrected,
    'Time': times,
    'Event Name': event_names,
    'Seat Location': seat_location,
    'Ticket Price': np.round(ticket_prices, 2),
    'Number of Tickets': num_tickets,
    'Team Performance': team_performance,
    'Promotion': promotions
}

# Create DataFrame
ticket_sales_df = pd.DataFrame(data)
ticket_sales_df.head()


Unnamed: 0,Date,Time,Event Name,Seat Location,Ticket Price,Number of Tickets,Team Performance,Promotion
0,2024-09-06,2024-09-06 22:00:00,Event 1,Lower Bowl,73.54,2,76,0
1,2024-11-11,2024-11-11 19:00:00,Event 2,Upper Deck,35.22,8,54,0
2,2024-09-13,2024-09-13 18:00:00,Event 3,Lower Bowl,74.13,4,78,0
3,2024-11-11,2024-11-11 21:00:00,Event 4,Lower Bowl,72.36,3,95,0
4,2024-12-23,2024-12-23 20:00:00,Event 5,Upper Deck,29.81,4,67,0


In [5]:
ticket_sales_df.describe()

Unnamed: 0,Ticket Price,Number of Tickets,Team Performance,Promotion
count,1000.0,1000.0,1000.0,1000.0
mean,85.49155,4.917,74.288,0.186
std,50.265042,2.613607,13.99167,0.389301
min,14.53,1.0,50.0,0.0
25%,34.5825,3.0,62.0,0.0
50%,70.615,5.0,74.5,0.0
75%,146.515,7.0,86.0,0.0
max,164.13,9.0,99.0,1.0
