In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [17]:
# Constants
num_records = 1000
dates = [
    datetime.date(2021, 8, 29),
    datetime.date(2024, 9, 7),
    datetime.date(2024, 9, 14),
    datetime.date(2024, 9, 21),
    datetime.date(2024, 9, 28),
    datetime.date(2024, 10, 5),
    datetime.date(2024, 10, 12),
    datetime.date(2024, 10, 19),
    datetime.date(2024, 11, 2),
    datetime.date(2024, 11, 9),
    datetime.date(2024, 11, 21),
    datetime.date(2024, 11, 30),
    datetime.date(2024, 12, 7),
]
times = [
    datetime.time(19, 0),  # 7:00 PM
    datetime.time(19, 30), # 7:30 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(20, 0), # 8:00 PM
    datetime.time(15, 30), # 3:30 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(15, 30), # 3:30 PM
    datetime.time(19, 30), # 7:30 PM
    datetime.time(19, 30), # 7:30 PM
    datetime.time(12, 0), # 12:00 PM
]

In [28]:
# Create 1000 observations from the list of dates
randomize_dates = random.choices(dates, k=1000)

# Fixing the date type issue by converting numpy datetime64 to datetime.date
dates_corrected = [pd.to_datetime(date).date() for date in dates]

# Generate random times for each event
times = [datetime.combine(date, datetime.min.time()) + timedelta(hours=np.random.randint(18, 23)) for date in dates_corrected]


In [5]:
# Event Names
event_names = [f"Event {i}" for i in range(1, num_records + 1)]

# Seat Location
seat_locations = ['Upper Deck', 'Lower Bowl', 'VIP']
seat_location = np.random.choice(seat_locations, num_records)

# Ticket Prices based on seat location
price_map = {'Upper Deck': 30, 'Lower Bowl': 70, 'VIP': 150}
ticket_prices = [price_map[loc] + np.random.normal(0, 5) for loc in seat_location]

# Number of Tickets sold
num_tickets = np.random.randint(1, 10, num_records)

# Team Performance (simple metric, e.g., a score out of 100)
team_performance = np.random.randint(50, 100, num_records)

# Promotion indicator (binary)
promotions = np.random.choice([0, 1], num_records, p=[0.8, 0.2])


In [6]:
# Compile the dataset
data = {
    'Date': dates_corrected,
    'Time': times,
    'Event Name': event_names,
    'Seat Location': seat_location,
    'Ticket Price': np.round(ticket_prices, 2),
    'Number of Tickets': num_tickets,
    'Team Performance': team_performance,
    'Promotion': promotions
}


In [7]:
# Create DataFrame
ticket_sales_df = pd.DataFrame(data)
ticket_sales_df.head()


Unnamed: 0,Date,Time,Event Name,Seat Location,Ticket Price,Number of Tickets,Team Performance,Promotion
0,2024-12-11,2024-12-11 20:00:00,Event 1,Upper Deck,25.31,5,89,0
1,2024-10-02,2024-10-02 21:00:00,Event 2,Lower Bowl,67.69,4,55,0
2,2024-11-19,2024-11-19 22:00:00,Event 3,Upper Deck,26.02,2,88,0
3,2024-10-07,2024-10-07 21:00:00,Event 4,VIP,152.67,7,60,0
4,2024-09-03,2024-09-03 21:00:00,Event 5,Upper Deck,17.12,3,71,0


In [8]:
ticket_sales_df.describe()

Unnamed: 0,Ticket Price,Number of Tickets,Team Performance,Promotion
count,1000.0,1000.0,1000.0,1000.0
mean,81.15933,5.057,74.168,0.202
std,50.495189,2.593159,14.269457,0.401693
min,15.38,1.0,50.0,0.0
25%,32.5625,3.0,62.0,0.0
50%,69.09,5.0,73.0,0.0
75%,146.03,7.0,86.0,0.0
max,164.99,9.0,99.0,1.0


In [10]:
ticket_sales_df['Date'].value_counts()

2024-08-29    14
2024-10-09    13
2024-09-30    13
2024-12-06    13
2024-10-08    13
              ..
2024-11-14     4
2024-11-20     4
2024-11-24     3
2024-09-24     2
2024-11-30     2
Name: Date, Length: 134, dtype: int64