In [53]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [65]:
# Constants
num_records = 20


In [None]:
dates = [
    datetime.date(2021, 8, 29),
    datetime.date(2024, 9, 7),
    datetime.date(2024, 9, 14),
    datetime.date(2024, 9, 21),
    datetime.date(2024, 9, 28),
    datetime.date(2024, 10, 5),
    datetime.date(2024, 10, 12),
    datetime.date(2024, 10, 19),
    datetime.date(2024, 11, 2),
    datetime.date(2024, 11, 9),
    datetime.date(2024, 11, 21),
    datetime.date(2024, 11, 30),
    datetime.date(2024, 12, 7),
]
times = [
    datetime.time(19, 0),  # 7:00 PM
    datetime.time(19, 30), # 7:30 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(20, 0), # 8:00 PM
    datetime.time(15, 30), # 3:30 PM
    datetime.time(12, 0), # 12:00 PM
    datetime.time(15, 30), # 3:30 PM
    datetime.time(19, 30), # 7:30 PM
    datetime.time(19, 30), # 7:30 PM
    datetime.time(12, 0), # 12:00 PM
]

# Fixing the date type issue by converting numpy datetime64 to datetime.date
dates_corrected = [pd.to_datetime(date).date() for date in dates]

# Generate random times for each event
times = [datetime.combine(date, datetime.min.time()) + timedelta(hours=np.random.randint(18, 23)) for date in dates_corrected]


In [73]:
# Event Names
event_names = [f"Game {i}" for i in range(1, 14)]

# Seat Location
seat_locations = ['Upper Deck', 'Lower Bowl', 'VIP']
seat_location = np.random.choice(seat_locations, num_records)

# Ticket Prices based on seat location
price_map = {'Upper Deck': 30, 'Lower Bowl': 70, 'VIP': 150}
ticket_prices = [price_map[loc] + np.random.normal(0, 5) for loc in seat_location]

# Number of Tickets sold
num_tickets = np.random.randint(1, 8, num_records)

# Team Performance (simple metric, e.g., a score out of 100)
#team_performance = np.random.randint(50, 100, num_records)

# Promotion indicator (binary)
promotions = np.random.choice([0, 1], num_records, p=[0.8, 0.2])


In [61]:
print(dates)
print(times)
print(event_names)

[datetime.date(2021, 8, 29), datetime.date(2024, 9, 7), datetime.date(2024, 9, 14), datetime.date(2024, 9, 21), datetime.date(2024, 9, 28), datetime.date(2024, 10, 5), datetime.date(2024, 10, 12), datetime.date(2024, 10, 19), datetime.date(2024, 11, 2), datetime.date(2024, 11, 9), datetime.date(2024, 11, 21), datetime.date(2024, 11, 30), datetime.date(2024, 12, 7)]
[datetime.time(19, 0), datetime.time(19, 30), datetime.time(12, 0), datetime.time(12, 0), datetime.time(12, 0), datetime.time(12, 0), datetime.time(20, 0), datetime.time(15, 30), datetime.time(12, 0), datetime.time(15, 30), datetime.time(19, 30), datetime.time(19, 30), datetime.time(12, 0)]
['Game 1', 'Game 2', 'Game 3', 'Game 4', 'Game 5', 'Game 6', 'Game 7', 'Game 8', 'Game 9', 'Game 10', 'Game 11', 'Game 12', 'Game 13']


In [62]:
# Compile the games dataset
games = {
    'Date': dates,
    'Time': times,
    'Event Name': event_names
}


In [63]:
# Create DataFrame
games_df = pd.DataFrame(games)
games_df.head()


Unnamed: 0,Date,Time,Event Name
0,2021-08-29,19:00:00,Game 1
1,2024-09-07,19:30:00,Game 2
2,2024-09-14,12:00:00,Game 3
3,2024-09-21,12:00:00,Game 4
4,2024-09-28,12:00:00,Game 5


In [89]:
# Create 1000 observations from the list of dates
#randomize = random.choices(data, k=1000)
full_games=games_df.sample(n=num_records, replace=True)
full_games.reset_index(drop=True, inplace=True)


In [90]:
full_games.describe()

Unnamed: 0,Date,Time,Event Name
count,20,20,20
unique,10,5,10
top,2024-10-05,12:00:00,Game 6
freq,4,11,4


In [91]:
full_games['Date'].value_counts()

2024-10-05    4
2024-11-09    3
2024-12-07    2
2024-09-21    2
2024-10-19    2
2024-10-12    2
2024-11-02    2
2021-08-29    1
2024-09-28    1
2024-11-21    1
Name: Date, dtype: int64

In [92]:
# Compile the dataset
randomized = {
    'Seat Location': seat_location,
    'Ticket Price': np.round(ticket_prices, 2),
    'Number of Tickets': num_tickets,
    'Promotion': promotions
}

In [93]:
# Create DataFrame
sales_df = pd.DataFrame(randomized)
sales_df.reset_index(drop=True, inplace=True)


In [94]:
print(sales_df)
print(full_games)

   Seat Location  Ticket Price  Number of Tickets  Promotion
0            VIP        151.09                  6          0
1     Lower Bowl         65.15                  2          0
2     Lower Bowl         70.25                  7          0
3     Lower Bowl         67.69                  2          1
4            VIP        150.72                  4          0
5     Upper Deck         33.27                  7          0
6     Lower Bowl         65.14                  7          0
7            VIP        148.75                  6          0
8     Lower Bowl         59.31                  4          0
9     Upper Deck         36.00                  6          0
10    Lower Bowl         76.82                  2          0
11    Upper Deck         32.99                  6          0
12    Lower Bowl         71.44                  2          0
13    Lower Bowl         62.98                  4          0
14           VIP        142.94                  3          1
15    Upper Deck        

In [95]:
ticket_sales_df = pd.concat([full_games, sales_df], axis=1, ignore_index=True)  # Concatenate horizontally
print(ticket_sales_df)

             0         1        2           3       4  5  6
0   2024-10-05  12:00:00   Game 6         VIP  151.09  6  0
1   2024-12-07  12:00:00  Game 13  Lower Bowl   65.15  2  0
2   2024-11-09  15:30:00  Game 10  Lower Bowl   70.25  7  0
3   2021-08-29  19:00:00   Game 1  Lower Bowl   67.69  2  1
4   2024-09-21  12:00:00   Game 4         VIP  150.72  4  0
5   2024-11-09  15:30:00  Game 10  Upper Deck   33.27  7  0
6   2024-11-09  15:30:00  Game 10  Lower Bowl   65.14  7  0
7   2024-10-19  15:30:00   Game 8         VIP  148.75  6  0
8   2024-09-21  12:00:00   Game 4  Lower Bowl   59.31  4  0
9   2024-10-12  20:00:00   Game 7  Upper Deck   36.00  6  0
10  2024-10-12  20:00:00   Game 7  Lower Bowl   76.82  2  0
11  2024-11-02  12:00:00   Game 9  Upper Deck   32.99  6  0
12  2024-10-05  12:00:00   Game 6  Lower Bowl   71.44  2  0
13  2024-10-05  12:00:00   Game 6  Lower Bowl   62.98  4  0
14  2024-10-05  12:00:00   Game 6         VIP  142.94  3  1
15  2024-11-02  12:00:00   Game 9  Upper