# Libraries

In [16]:
# Standard
import pandas as pd
import numpy as np

# Modeling
import statsmodels.api as sm

# Settings

In [17]:
# Data file location
data_location = 'C:/Users/nuke2/Desktop/NW Work/Winter Work/MSiA 410/hw07/ConcessionSalesData_ForClass.xlsx'

# Pandas column settings
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

# Data Processing

In [18]:
# Read data from file
df = pd.read_excel(data_location)
df.head()

Unnamed: 0,food_game,UserID,UseCount,revenue,game_week,special_discount,special_item,FAMILYGROUPNAME,Master_Item,MENUITEMNAME,PRICES,actual_discount,actual_price,Discount Type,Discount Percentage,first_week_discount,Discount_HotDog,Discount_SouvCup,Discount_BtlWater,Discount_Peanuts,Discount_Nachos,Discount_Pretzel,Discount_Popcorn,sth_rev_game,total_product_rev_nonSTH
0,BAG PEANUTS_Game 1,3304107,1,4.726207,Game 1,STH Discount Only,Yes,SNACKS,20500003,BAG PEANUTS,5.25,0.523793,4.726207,GA STM Discount,10,No Discount,Yes,No,Yes,No,No,No,No,16441.58141,15296.65411
1,BAG PEANUTS_Game 1,3405989,1,4.73,Game 1,STH Discount Only,Yes,SNACKS,20500003,BAG PEANUTS,5.25,0.52,4.73,GA STM Discount,10,No Discount,Yes,No,Yes,No,No,No,No,16441.58141,15296.65411
2,BAG PEANUTS_Game 1,3302989,1,4.73,Game 1,STH Discount Only,Yes,SNACKS,20500003,BAG PEANUTS,5.25,0.52,4.73,GA STM Discount,10,No Discount,Yes,No,Yes,No,No,No,No,16441.58141,15296.65411
3,BAG PEANUTS_Game 1,3253641,1,4.5675,Game 1,STH Discount Only,Yes,SNACKS,20500003,BAG PEANUTS,5.25,0.6825,4.5675,GA STM Discount,10,No Discount,Yes,No,Yes,No,No,No,No,16441.58141,15296.65411
4,BAG PEANUTS_Game 1,3315665,1,4.726615,Game 1,STH Discount Only,Yes,SNACKS,20500003,BAG PEANUTS,5.25,0.523385,4.726615,GA STM Discount,10,No Discount,Yes,No,Yes,No,No,No,No,16441.58141,15296.65411


In [19]:
# Init list of promotional items
promotional_items = {
    'NACHOS': 'nachos',
    'SOUV POPCORN': 'popcorn',
    'HOT DOG': 'hotdog',
    'BAG PEANUTS': 'peanuts',
    'BAVARIAN PRETZEL': 'pretzel',
    'BTL DEJA BLUE': 'btlwater',
    'SOUV CUP 32': 'souvcup'
}

# Select columns where item is one of seven
df = df[df.MENUITEMNAME.isin(list(promotional_items.keys()))]

# Rename food in row
df = df.replace({"MENUITEMNAME": promotional_items})

# Get columns needed for calculations
columns_needed = [
    'UserID',
    'game_week',
    'MENUITEMNAME',
    'PRICES',
    'actual_discount',
    'actual_price',
    'Discount Percentage',
    'Discount_HotDog',
    'Discount_SouvCup',
    'Discount_BtlWater',
    'Discount_Peanuts',
    'Discount_Nachos',
    'Discount_Pretzel',
    'Discount_Popcorn'
]

# Update df
df = df.loc[:, columns_needed]

# Make columns lowercase
df.columns = df.columns.str.lower()

### What I was supposed to do in the first place

In [20]:
# Group by week, menu item, and discount percentage and aggregate demand, price, and maximum discount columns
df_modeling = df.groupby(['game_week', 'menuitemname', 'discount percentage']).agg({
    'userid': 'count',  # count of userids as demand
    'actual_price': 'mean',  # mean of actual_price
    'discount_hotdog': 'max',  # maximum of discount_hotdog
    'discount_souvcup': 'max',  # maximum of discount_souvcup
    'discount_btlwater': 'max',  # maximum of discount_btlwater
    'discount_peanuts': 'max',  # maximum of discount_peanuts
    'discount_nachos': 'max',  # maximum of discount_nachos
    'discount_pretzel': 'max',  # maximum of discount_pretzel
    'discount_popcorn': 'max',  # maximum of discount_popcorn
}).reset_index().copy()

# Exclude the 100% discount percentage outlier and rename the 'userid' column as 'demand'
df_modeling = df_modeling[df_modeling['discount percentage'] != 100].rename(columns={'userid': 'demand'})

In [21]:
# Get unique games and menu items from the data
games = df_modeling.game_week.unique()
menu_items = df_modeling.menuitemname.unique()

# Define a list of discounts to consider
discounts = [10, 20]

# Loop through each game and menu item combination
for game in games:
    for item in menu_items:
        # Initialize total demand and price dictionary
        total_demand = 0
        price = {}

        # Loop through each discount and calculate the price and total demand
        for discount in discounts:
            # Filter the data to get the relevant rows and extract the demand and actual price values
            df_temp = df_modeling[(df_modeling['game_week'] == game) & (df_modeling['menuitemname'] == item) & (df_modeling['discount percentage'] == discount)]
            num_purchases = df_temp['demand'].astype(int).sum()

            # If there are no purchases for this discount, skip it
            if num_purchases == 0:
                continue

            # Update the total demand and price dictionary for this discount
            total_demand += num_purchases
            price[discount] = [df_temp['actual_price'].astype(float).values[0], num_purchases]

        # Calculate the percentage of demand for each discount and update the price dictionary
        for key, value in price.items():
            value[1] = value[1] / total_demand

        # Calculate the averaged price across all discounts and add it to the price dictionary
        total_price_temp = sum([value[0] * value[1] for value in price.values()])
        price['averaged_price'] = total_price_temp
        
        # Save the data to dataframe
        df_modeling.loc[(df_modeling['game_week'] == game) & (df_modeling['menuitemname'] == item) & (df_modeling['discount percentage'].isin([10])), [
            'discount percentage',
            'demand',
            'actual_price'
        ]] = ['regular', total_demand, price['averaged_price']]
        
        df_modeling = df_modeling[df_modeling['discount percentage'] != 20]

In [22]:
# list of columns with discount information
discount_columns = [
    'discount_hotdog',
    'discount_souvcup',
    'discount_btlwater',
    'discount_peanuts',
    'discount_nachos',
    'discount_pretzel',
    'discount_popcorn'
]

# replace 'Yes' and 'No' values in discount columns with 1 and 0, respectively
for col in discount_columns:
    df_modeling[col] = df_modeling[col].replace({'Yes': 1, 'No': 0})

# replace 50 with 1 and 'regular' with 0 in 'discount percentage' column
df_modeling['discount percentage'] = df_modeling['discount percentage'].replace({50: 1, 'regular': 0})

# remove rows with demand value of 2 or 52
df_modeling = df_modeling[~df_modeling['demand'].isin([2, 52])]

# take the natural logarithm of 'demand' and 'actual_price' columns
df_modeling.demand = np.log(df_modeling.demand) # Uncomment for question 1 answer
df_modeling.actual_price = np.log(df_modeling.actual_price) # Uncomment for question 1 answer

# print the first 9 rows of the cleaned up dataframe
df_modeling.head(9)

Unnamed: 0,game_week,menuitemname,discount percentage,demand,actual_price,discount_hotdog,discount_souvcup,discount_btlwater,discount_peanuts,discount_nachos,discount_pretzel,discount_popcorn
2,Game 1,btlwater,1,6.609349,0.966442,1,0,1,0,0,0,0
3,Game 1,hotdog,1,6.285998,1.288389,1,0,1,0,0,0,0
4,Game 1,nachos,0,4.248495,1.71684,1,0,1,0,0,0,0
6,Game 1,peanuts,0,4.615121,1.534707,1,0,1,0,0,0,0
8,Game 1,popcorn,0,2.564949,2.309211,1,0,1,0,0,0,0
10,Game 1,pretzel,0,5.236442,1.780445,1,0,1,0,0,0,0
12,Game 1,souvcup,0,5.517453,1.948363,1,0,1,0,0,0,0
14,Game 2,btlwater,0,5.484797,1.52167,0,0,0,1,1,0,0
16,Game 2,hotdog,0,4.770685,1.824631,0,0,0,1,1,0,0


In [27]:
# Define an empty dictionary to hold the special item data
special_item_data = {}

# Define the columns that will be used for the modeling
columns_model = [
    'game_week',
    'actual_price',
    'discount_hotdog',
    'discount_souvcup',
    'discount_btlwater',
    'discount_peanuts',
    'discount_nachos',
    'discount_pretzel',
    'discount_popcorn'
]

# For printing cleanly
count = 0

# Iterate over each promotional item
for item in promotional_items.values():
    
    # Create a dictionary for the item in the special item data dictionary
    special_item_data[item] = {}
    
    # Get the data for the item from the modeling data
    special_item_data[item]['df'] = df_modeling[df_modeling.menuitemname == item]
    
    # Create a list of columns to use for the modeling, excluding the discount for the current item
    temp_columns = []
    for col in columns_model:
        if 'discount_'+item == col:
            continue
        temp_columns.append(col)
    
    # Create a DataFrame with the selected columns for the current item
    X_temp = df_modeling[df_modeling.menuitemname == item][temp_columns]
    
    # One-hot encode the game week column and join it with the X_temp DataFrame
    if 'game_week' in temp_columns:
        one_hot = pd.get_dummies(X_temp['game_week'])
        X_temp = X_temp.drop(columns=['game_week'],axis = 1)
        X_temp = X_temp.join(one_hot.drop(columns=['Game 1']))
    
    # Add a constant column to the X_temp DataFrame and get the demand column as the y_temp DataFrame
    X_temp = sm.add_constant(X_temp)
    y_temp = df_modeling[df_modeling.menuitemname == item]['demand']
    
    # Save the X_temp and y_temp DataFrames for the current item in the special item data dictionary
    special_item_data[item]['X'] = X_temp
    special_item_data[item]['y'] = y_temp
    
    # Fit a linear regression model with X_temp as the predictors and y_temp as the target variable
    model_temp = sm.OLS(y_temp, X_temp).fit()
    
    # Save the model in the special item data dictionary and print the elasticity of the current item
    special_item_data[item]['model'] = model_temp
    
    # Print elasticity for each item
    print(f"{item} elasticity: {round(model_temp.params[1], 2)}")
    print(f"-----")
    
    # Extract the coefficients and their names
    coef_names = model_temp.params.index
    coef_values = model_temp.params.values

    # Print something
    print(f"Effect of other sales on demand for {item}")
    
    # Format the coefficients as a regression formula
    formula_parts = []
    coef_name = ""
    for i, coef in enumerate(coef_values):
        if i == 0:
            formula_parts.append(f"{coef:.4f}")
        else:
            coef_name = coef_names[i]
            formula_parts.append(f" + {coef:.4f} * {coef_name}")
        
        # Print effects on elasticity
        if 'discount' in coef_name:
            print(f"{coef_name}: {coef:.4f}")
            
    print(f"-----")        

    formula = ''.join(formula_parts)
    print(f"Regression formula: demand = {formula}")    
    
    # Print nicely
    count += 1
    if count != len(promotional_items):
        print(f"=============================================================================")

nachos elasticity: 1.48
-----
Effect of other sales on demand for nachos
discount_hotdog: 0.0151
discount_souvcup: -0.4022
discount_btlwater: 0.4635
discount_peanuts: 1.1199
discount_pretzel: -0.0115
discount_popcorn: 0.0576
-----
Regression formula: demand = 1.2348 + 1.4766 * actual_price + 0.0151 * discount_hotdog + -0.4022 * discount_souvcup + 0.4635 * discount_btlwater + 1.1199 * discount_peanuts + -0.0115 * discount_pretzel + 0.0576 * discount_popcorn + 1.1199 * Game 2 + 0.5196 * Game 3 + -0.4014 * Game 4 + 0.1112 * Game 5 + -0.5311 * Game 6 + -0.9794 * Game 7 + -0.0536 * Game 8
popcorn elasticity: 0.53
-----
Effect of other sales on demand for popcorn
discount_hotdog: -0.4191
discount_souvcup: 1.2720
discount_btlwater: 0.7947
discount_peanuts: -0.1899
discount_nachos: -0.1899
discount_pretzel: -0.3887
-----
Regression formula: demand = 0.9619 + 0.5315 * actual_price + -0.4191 * discount_hotdog + 1.2720 * discount_souvcup + 0.7947 * discount_btlwater + -0.1899 * discount_peanuts +