In [3]:
import pandas as pd

## Ticket Sales Data Cleaning

In [4]:
#Import ticket sales data
tickets = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/ticket_sales_data_2223_2324.csv", 
                      names = [
                        'season_name', 'event_date', 'event_weekday', 'opponent_short',
                        'parent_ticket_categories', 'add_date', 'days_before_event',
                        'email', 'total_seats', 'seat_location', 'seat_level',
                        'arrival_time', 'attendee_email', 'attendee_zip_code', 'mobile_scan',
                        'scan_category'
                    ],
                    header = 0
                )



In [5]:
# Fix Date Data Types
tickets["event_date"] = pd.to_datetime(tickets['event_date'], format = "%d-%b-%y")#.astype(str)

tickets["add_date"] = pd.to_datetime(tickets['add_date'], format = "%d-%b-%y")#.astype(str)

tickets["arrival_time"] = pd.to_datetime(tickets['arrival_time'], format = "%m/%d/%y %H:%M")

In [6]:
tickets.to_csv("/Users/jm/working/acl_spring_24_bulls2/data/bulls2224_tickets.csv", index = False)

## Game Data Ingestion

In [7]:
s23 = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/bulls2223_season.csv")
s24 = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/bulls2324_season.csv")

In [8]:
def convert_time(time_str):
    return time_str.replace('p', ' PM').replace('a', ' AM')

def prep_season_games(season_data, season:str):
    season_data['season_name'] = season

    season_data["start_et"] = season_data['date'] + ' ' + season_data['start_et'].apply(convert_time)
    season_data["start_et"] = pd.to_datetime(season_data['start_et'], format = "%a %b %d %Y %I:%M %p")
    season_data['event_date'] = pd.to_datetime(season_data['date'], format = "%a %b %d %Y")#.astype(str)

    # Convert to central time zone
    season_data["start_ct"] =  season_data["start_et"] - pd.Timedelta(hours=1)

    return season_data

s23 = prep_season_games(s23, season = "2022-2023 Chicago Bulls")[
    [
        "season_name", "G", "event_date", "start_ct", "away", 
        "Opponent", "outcome", "overtime", "pts", "pts_all",
        "W", "L", "Streak", "Notes"
        ]
    ]

s24 = prep_season_games(s24, season = "2023-2024 Chicago Bulls")[
    [
        "season_name", "G", "event_date", "start_ct", "away", 
        "Opponent", "outcome", "overtime", "pts", "pts_all",
        "W", "L", "Streak", "Notes"
        ]
    ]

In [9]:
bulls2224_games = pd.concat([s23, s24])
bulls2224_games.to_csv("/Users/jm/working/acl_spring_24_bulls2/data/bulls2224_games.csv", index = False)

# Combine the Tickets and Games Data

In [10]:
df = pd.merge(tickets, bulls2224_games, how = "left", on = ["season_name", "event_date"])

In [11]:
df.to_csv("/Users/jm/working/acl_spring_24_bulls2/data/bulls2224_tickets_games.csv", index = False)

In [119]:
bulls2224_games.dtypes

season_name            object
G                       int64
event_date     datetime64[ns]
start_ct       datetime64[ns]
away                    int64
Opponent               object
outcome                object
overtime               object
pts                     int64
pts_all                 int64
W                       int64
L                       int64
Streak                 object
Notes                  object
dtype: object

In [115]:
len(tickets_games_df)

570265

In [117]:
df.head(200)

Unnamed: 0,season_name,event_date,event_weekday,opponent_short,parent_ticket_categories,add_date,days_before_event,email,total_seats,seat_location,...,away,Opponent,outcome,overtime,pts,pts_all,W,L,Streak,Notes
0,2022-2023 Chicago Bulls,2022-10-04,Tue,Pelicans,Group,2022-09-27,7,frooogpop@outlook.com,1,100 Level End,...,,,,,,,,,,
1,2022-2023 Chicago Bulls,2022-10-04,Tue,Pelicans,Group,2022-09-27,7,frooogpop@outlook.com,2,100 Level Center,...,,,,,,,,,,
2,2022-2023 Chicago Bulls,2022-10-04,Tue,Pelicans,Group,2022-09-27,7,frooogpop@outlook.com,2,100 Level Center,...,,,,,,,,,,
3,2022-2023 Chicago Bulls,2022-10-04,Tue,Pelicans,Group,2022-09-27,7,frooogpop@outlook.com,2,100 Level Center,...,,,,,,,,,,
4,2022-2023 Chicago Bulls,2022-10-04,Tue,Pelicans,Group,2022-09-27,7,frooogpop@outlook.com,2,100 Level Center,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,2022-2023 Chicago Bulls,2022-10-07,Fri,Nuggets,Group,2022-10-03,4,christinakuz94@gmail.com,2,300 Level,...,,,,,,,,,,
196,2022-2023 Chicago Bulls,2022-10-07,Fri,Nuggets,Group,2022-10-03,4,christinakuz94@gmail.com,2,300 Level,...,,,,,,,,,,
197,2022-2023 Chicago Bulls,2022-10-07,Fri,Nuggets,Group,2022-10-03,4,christinakuz94@gmail.com,2,300 Level,...,,,,,,,,,,
198,2022-2023 Chicago Bulls,2022-10-07,Fri,Nuggets,Group,2022-10-03,4,christinakuz94@gmail.com,2,300 Level,...,,,,,,,,,,


## Email Data Analysis

In [29]:
#Import E-Mail Data
kbyg_emails = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/kbyg_clickthrough_data_2223_2324.csv", 
                      names = [
                          'email', 'clickthrough_link', 'email_clickthrough_date_time',
                          'email_name', 'total_clickthroughs', 'email_send_date',
                          'unique_clickthroughs', 'clickthrough_link_count', 'season'
                      ]
       )
postgame_emails = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/Postgame_Email_Clickthrough_2223_and_2324_seasons.csv", 
                      names = [
                          'email', 'clickthrough_link', 'email_clickthrough_date_time',
                          'email_name', 'total_clickthroughs', 'email_send_date',
                          'unique_clickthroughs', 'clickthrough_link_count', 'season'
                      ]
       )

In [26]:
kbyg_emails.columns

Index(['Email Address', 'Clickthrough Link', 'Email Clickthrough Date/Time',
       'Email Name', 'Total Clickthroughs', 'Email Send Date',
       'Unique Clickthroughs', 'Clickthrough Link Count', 'Season'],
      dtype='object')

In [21]:
# How many unique emails do we have here?
len(kbyg_emails.email.unique())

9089

In [22]:
# How many unique emails do we have here?
len(postgame_emails.email.unique())

17638

In [10]:
#Import LivaA Data
liva_2223 = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/liveA_2223_season_ticket_holders.csv")
liva_2324 = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/liveA_2324_season_ticket_holders.csv")

  liva_2324 = pd.read_csv("/Users/jm/working/acl_spring_24_bulls2/data/liveA_2324_season_ticket_holders.csv")


In [11]:
kbyg_emails.head()

Unnamed: 0,Email Address,Clickthrough Link,Email Clickthrough Date/Time,Email Name,Total Clickthroughs,Email Send Date,Unique Clickthroughs,Clickthrough Link Count,Season
0,404345@colum.edu,https://www.nba.com/bulls/healthandsafety,10/26/2023 13:28,2023_1025_CB_Digital_KBYG_OKC,1,10/25/2023 17:00,1,1,2324
1,a.aroonlap@gmail.com,https://www.nba.com/bulls/iog,10/25/2023 18:44,2023_1025_CB_Digital_KBYG_OKC,1,10/25/2023 17:00,1,1,2324
2,aarias@viveseguros.mx,https://www.nba.com/bulls/news/chicago-bulls-m...,10/25/2023 17:48,2023_1025_CB_Digital_KBYG_OKC,1,10/25/2023 17:00,1,1,2324
3,adam.zerella@proton.me,http://response.bulls.com/subscription-center,10/25/2023 17:41,2023_1025_CB_Digital_KBYG_OKC,1,10/25/2023 17:00,1,1,2324
4,agarcia2639@gmail.com,https://chicagobullsraffle.5050central.com/Hom...,10/26/2023 8:54,2023_1025_CB_Digital_KBYG_OKC,1,10/25/2023 17:00,1,1,2324
