In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from matplotlib import pyplot
import feather
import warnings
from IPython.display import display
from folium.plugins import HeatMap
#import ipyleaflet
#from ipyleaflet import MarkerCluster, basemaps, CircleMarker
import folium
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

warnings.filterwarnings('ignore')

In [2]:
# hv = pd.read_csv("hv_cleaned.csv", index_col = [0]) # index_col = [0] removes unnmaed index column
# path = 'hv_cleaned.feather'
# feather.write_dataframe(hv, path)
# hv = feather.read_dataframe(path)

In [3]:
# hv["Date"] =  pd.to_datetime(hv["Date"], format="%m/%d/%Y")
# hv['Year'] = hv['Date'].dt.year
# hv['Month'] = hv['Date'].dt.month

In [4]:
# mba_df2 = pd.read_csv("mba_df2.csv", index_col = [0]) # index_col = [0] removes unnmaed index column
# path = 'mba_df2.feather'
# feather.write_dataframe(mba_df2, path)
# mb = feather.read_dataframe(path)

In [5]:
# transactions = []
# for i in range(0, 18110):
#   transactions.extend(mb["Item Description"])

- An association rule implies that if an item A occurs, then item B also occurs with a certain probability.
    - Support is an indication of how frequently the item set appears in the data set.
        - the number of transactions with both X and Y divided by the total number of transactions.
    - For a rule X⇒Y, confidence shows the percentage in which Y is bought with X. It’s an indication of how often the rule has been found to be true.
        - For example, the rule T-shirt⇒Trousers has a confidence of 3/4, which means that for 75% of the transactions containing a t-shirt the rule is correct (75% of the times a customer buys a t-shirt, trousers are bought as well)
    - The lift of a rule is the ratio of the observed support to that expected if X and Y were independent
        - Greater lift values indicate stronger associations

In [6]:
mba_df = pd.read_csv("mba_df.csv", index_col = [0]) # index_col = [0] removes unnmaed index column
path = 'mba_df.feather'
feather.write_dataframe(mba_df, path)
mba_df = feather.read_dataframe(path)

In [7]:
mba_df.head()

Unnamed: 0,Store Number,Date,Month,Year,0,1,2,3,4,5,...,820,821,822,823,824,825,826,827,828,829
0,2500,2016-03-23,3,2016,Hawkeye Vodka,,,,,,...,,,,,,,,,,
1,2500,2016-03-30,3,2016,Popov Vodka 80 Prf,Old Crow,Captain Morgan Spiced Rum,Five Star Pet,Three Olives Vodka,Johnnie Walker Black,...,,,,,,,,,,
2,2500,2016-04-13,4,2016,Old Crow,Paramount Triple Sec,Bushmills Irish Whiskey,,,,...,,,,,,,,,,
3,2500,2016-04-20,4,2016,Burnett's Gin London Dry,Bacardi 151 Prf,Bailey's Vanilla Cinnamon,1800 Silver Tequila,,,...,,,,,,,,,,
4,2500,2016-04-27,4,2016,Southern Comfort,,,,,,...,,,,,,,,,,


In [8]:
mba_df.shape

(18110, 834)

In [9]:
num_rows = 18110
num_cols = 834
transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

In [10]:
from apyori import apriori
rules = apriori(transactions =transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

In [11]:
results = list(rules)

In [12]:
results[:-1]

[RelationRecord(items=frozenset({'1800 Reposado', '1800 Anejo'}), support=0.003865267807840972, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1800 Anejo'}), items_add=frozenset({'1800 Reposado'}), confidence=0.26022304832713755, lift=4.593215794546258)]),
 RelationRecord(items=frozenset({'1800 Reposado', '1800 Coconut'}), support=0.0036443953616786305, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1800 Coconut'}), items_add=frozenset({'1800 Reposado'}), confidence=0.24719101123595505, lift=4.363186367917296)]),
 RelationRecord(items=frozenset({'1800 Cristalino', 'Fireball Cinnamon Whiskey Party Bucket'}), support=0.0038100496963003865, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1800 Cristalino'}), items_add=frozenset({'Fireball Cinnamon Whiskey Party Bucket'}), confidence=0.4181818181818182, lift=3.239209891904503)]),
 RelationRecord(items=frozenset({'1800 Silver', '1800 Reposado'}), support=0.02225289895085588, ordered_statistics=[OrderedSt

In [13]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [14]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,1800 Anejo,1800 Reposado,0.003865,0.260223,4.593216
1,1800 Coconut,1800 Reposado,0.003644,0.247191,4.363186
2,1800 Cristalino,Fireball Cinnamon Whiskey Party Bucket,0.003810,0.418182,3.239210
3,1800 Reposado,1800 Silver,0.022253,0.392788,3.371271
4,1800 Ultimate Blood Orange Margarita,1800 Ultimate Margarita,0.004638,0.409756,3.651911
...,...,...,...,...,...
3430,Woodford Reserve Rye,Woodford Reserve Double Oaked,0.010602,0.349727,5.191436
3431,Woodford Reserve Straight Bourbon,Woodford Reserve Double Oaked,0.009332,0.269108,3.994714
3432,Woodford Reserve Straight Malt,Woodford Reserve Double Oaked,0.003092,0.267943,3.977410
3433,Zaya Gran Reserva Rum,Woodford Reserve Double Oaked,0.004307,0.203655,3.023113


In [15]:

resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3380,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.003313,0.983607,269.895678
3426,White Dog Straight Rye,White Dog Wheat,0.004638,0.988235,201.089227
3425,White Dog Mash #1,White Dog Wheat,0.004749,0.966292,196.624164
3424,White Dog Mash #1,White Dog Straight Rye,0.004528,0.921348,196.301388
3381,Twisted Shotz Buttery Nipple,Twisted Shotz Miami Vice,0.003258,0.59,136.985897


- Look at the different levels of support, confidence, and lift to see if there is anything meaningful in those associations. 
- Do some deeper investigating in order to pull out some of the more meaningful MBA results, but sorting by the confidence and lift will be a good start.

# What happens if we condition on the month, report your findings for different months.

## January

In [16]:
jan_mba = mba_df[mba_df["Month"] == 1]
jan_mba.shape

(1901, 834)

In [17]:
num_rows = 1901
num_cols = 834
jan_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

In [18]:
len(jan_transactions)

1901

In [19]:
rules = apriori(transactions = jan_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

In [20]:
jan_results = list(rules)

In [21]:
jan_results[:2]

[RelationRecord(items=frozenset({'Bacardi Superior Mini', '1792 Bottle in Bond Bourbon'}), support=0.003156233561283535, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1792 Bottle in Bond Bourbon'}), items_add=frozenset({'Bacardi Superior Mini'}), confidence=0.5, lift=3.5733082706766917)]),
 RelationRecord(items=frozenset({'1792 Bottle in Bond Bourbon', 'Crown Royal Mini'}), support=0.003156233561283535, ordered_statistics=[OrderedStatistic(items_base=frozenset({'1792 Bottle in Bond Bourbon'}), items_add=frozenset({'Crown Royal Mini'}), confidence=0.5, lift=3.2551369863013697)])]

In [22]:
def inspect(jan_results):
    lhs         = [tuple(result[2][0][0])[0] for result in jan_results]
    rhs         = [tuple(result[2][0][1])[0] for result in jan_results]
    supports    = [result[1] for result in jan_results]
    confidences = [result[2][0][2] for result in jan_results]
    lifts       = [result[2][0][3] for result in jan_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

jan_resultsinDataFrame = pd.DataFrame(inspect(jan_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [23]:
jan_resultsinDataFrame.sort_values(by=["Confidence"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
16447,Grey Goose Essences Watermelon & Basil,Grey Goose Essences White Peach & Rosemary,0.004208,1.0,135.785714
5351,Cody Road Barrel Old Fashioned Mini,Bird Dog Blackberry Mini,0.003156,1.0,22.903614
16443,Laphroaig 10 Yr Single Malt Scotch,Grey Goose,0.004208,1.0,3.00316
20659,Mt. Gay Eclipse Rum,New Amsterdam Gin,0.004734,1.0,4.150655
19372,Laphroaig 10 Yr Single Malt Scotch,Mccormick Vodka Pet,0.004208,1.0,41.326087


In [24]:
jan_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
23737,Truly Strawberry Lemonade,Truly Wild Berry,0.003156,1.0,316.833333
23735,Truly Pineapple Mango,Truly Wild Berry,0.003156,1.0,316.833333
23734,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003156,1.0,316.833333
15056,Flecha Azul Anejo,Flecha Azul Reposado,0.003156,1.0,271.571429
20683,Natural Light Lemonade Vodka Mini,Natural Light Strawberry Lemonade Vodka Mini,0.003156,0.857143,271.571429


In [25]:
jan_resultsinDataFrame[(jan_resultsinDataFrame["Lift"] >= 100) & (jan_resultsinDataFrame["Confidence"] >= 0.5)]

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
5106,Belvedere Bartezek,Belvedere Smogory,0.003156,0.545455,129.613636
6519,Branson VS Phantom,Branson VSOP Grande Champagne,0.004734,0.692308,101.236686
9693,Chivas Regal 12 Yr,Mt. Gay Eclipse Rum,0.003156,0.5,105.611111
11513,Daviess County KSBW Cab Finished,Daviess County Kentucky Straight Bourbon Whiskey,0.003682,0.583333,158.416667
13894,Elijah Craig w/Old Fashioned Cocktail Syrup,Larceny VAP w/Glass and Flask,0.003156,0.666667,181.047619
14897,Fireball Countdown Calendar Minis,Fireball Holiday Ornament Minis,0.003156,0.666667,115.212121
15055,Flecha Azul Anejo,Flecha Azul Blanco,0.003156,1.0,237.625
15056,Flecha Azul Anejo,Flecha Azul Reposado,0.003156,1.0,271.571429
15057,Flecha Azul Blanco,Flecha Azul Reposado,0.003682,0.875,237.625
16447,Grey Goose Essences Watermelon & Basil,Grey Goose Essences White Peach & Rosemary,0.004208,1.0,135.785714


## February

In [26]:
feb_mba = mba_df[mba_df["Month"] == 2]
feb_mba.shape

(1837, 834)

In [27]:
num_rows = 1837
num_cols = 834
feb_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

In [28]:
rules = apriori(transactions = feb_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2)

In [29]:
feb_results = list(rules)

KeyboardInterrupt: 

In [None]:
def inspect(feb_results):
    lhs         = [tuple(result[2][0][0])[0] for result in feb_results]
    rhs         = [tuple(result[2][0][1])[0] for result in feb_results]
    supports    = [result[1] for result in feb_results]
    confidences = [result[2][0][2] for result in feb_results]
    lifts       = [result[2][0][3] for result in feb_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

feb_resultsinDataFrame = pd.DataFrame(inspect(feb_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [None]:
feb_resultsinDataFrame.head()

## March

In [30]:
mar_mba = mba_df[mba_df["Month"] == 3]
mar_mba.shape

(2078, 834)

In [31]:
num_rows = 2078
num_cols = 834
mar_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

In [32]:
rules = apriori(transactions = mar_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
mar_results = list(rules)

In [33]:
def inspect(mar_results):
    lhs         = [tuple(result[2][0][0])[0] for result in mar_results]
    rhs         = [tuple(result[2][0][1])[0] for result in mar_results]
    supports    = [result[1] for result in mar_results]
    confidences = [result[2][0][2] for result in mar_results]
    lifts       = [result[2][0][3] for result in mar_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

mar_resultsinDataFrame = pd.DataFrame(inspect(mar_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [34]:
mar_resultsinDataFrame.sort_values(by=["Confidence"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
4955,Laphroaig 10 Yr Single Malt Scotch,Bombay Sapphire Gin,0.00385,1.0,3.95057
18750,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004331,1.0,230.888889
11903,Flecha Azul Anejo,Flecha Azul Blanco,0.003369,1.0,230.888889
18807,White Dog Straight Rye,White Dog Wheat,0.006256,1.0,159.846154
10789,Effen Black Cherry Vodka,Effen Cucumber,0.003369,1.0,24.738095


In [35]:
mar_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
11904,Flecha Azul Anejo,Flecha Azul Reposado,0.003369,1.0,259.75
18750,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004331,1.0,230.888889
11903,Flecha Azul Anejo,Flecha Azul Blanco,0.003369,1.0,230.888889
11905,Flecha Azul Blanco,Flecha Azul Reposado,0.00385,0.888889,230.888889
17770,Rich & Rare Caramel Mini,Rich & Rare Mini,0.003369,0.7,207.8


In [36]:
mar_resultsinDataFrame[(mar_resultsinDataFrame["Lift"] >= 100) & (mar_resultsinDataFrame["Confidence"] >= 0.5)]

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
5132,Branson VS Phantom,Branson VSOP Grande Champagne,0.004812,0.714286,106.020408
5133,Branson VS Phantom,Branson VSOP Royal,0.003369,0.5,115.444444
9093,Daviess County KSBW Cab Finished,Daviess County Kentucky Straight Bourbon Whiskey,0.003369,0.583333,173.166667
11771,Fireball Countdown Calendar Minis,Fireball Holiday Ornament Minis,0.003369,0.7,121.216667
11903,Flecha Azul Anejo,Flecha Azul Blanco,0.003369,1.0,230.888889
11904,Flecha Azul Anejo,Flecha Azul Reposado,0.003369,1.0,259.75
11905,Flecha Azul Blanco,Flecha Azul Reposado,0.00385,0.888889,230.888889
13133,Grey Goose Essences Watermelon & Basil,Grey Goose Essences White Peach & Rosemary,0.00385,0.888889,123.140741
13134,Grey Goose Essences Watermelon & Basil,Grey Goose Strawberry & Lemongrass,0.004331,1.0,188.909091
13135,Grey Goose Essences White Peach & Rosemary,Grey Goose Strawberry & Lemongrass,0.004331,0.6,113.345455


## April

In [37]:
apr_mba = mba_df[mba_df["Month"] == 4]
apr_mba.shape

(1317, 834)

In [38]:
num_rows = 1317
num_cols = 834
apr_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = apr_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
apr_results = list(rules)

def inspect(apr_results):
    lhs         = [tuple(result[2][0][0])[0] for result in apr_results]
    rhs         = [tuple(result[2][0][1])[0] for result in apr_results]
    supports    = [result[1] for result in apr_results]
    confidences = [result[2][0][2] for result in apr_results]
    lifts       = [result[2][0][3] for result in apr_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

apr_resultsinDataFrame = pd.DataFrame(inspect(apr_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [39]:
apr_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
32676,Truly Strawberry Lemonade,Truly Wild Berry,0.003037,1.0,329.25
32670,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003037,1.0,329.25
28627,Natural Light Black Cherry Lemonade Vodka Mini,Natural Light Lemonade Vodka Mini,0.003037,1.0,329.25
32671,Truly Pineapple Mango,Truly Wild Berry,0.003037,1.0,329.25
24241,Issac Bowman Port Finished Bourbon,John J Bowman Bourbon,0.003037,0.8,263.4


In [40]:
apr_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
32676,Truly Strawberry Lemonade,Truly Wild Berry,0.003037,1.0,329.25
32670,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003037,1.0,329.25
28627,Natural Light Black Cherry Lemonade Vodka Mini,Natural Light Lemonade Vodka Mini,0.003037,1.0,329.25
32671,Truly Pineapple Mango,Truly Wild Berry,0.003037,1.0,329.25
24241,Issac Bowman Port Finished Bourbon,John J Bowman Bourbon,0.003037,0.8,263.4


## May

In [41]:
may_mba = mba_df[mba_df["Month"] == 5]
may_mba.shape

(1263, 834)

In [42]:
may_mba.head()

Unnamed: 0,Store Number,Date,Month,Year,0,1,2,3,4,5,...,820,821,822,823,824,825,826,827,828,829
5,2500,2016-05-04,5,2016,Juarez Tequila Gold,Lady Bligh,,,,,...,,,,,,,,,,
6,2500,2016-05-11,5,2016,Smirnoff Orange,,,,,,...,,,,,,,,,,
7,2500,2016-05-18,5,2016,Kraken Black Spiced Rum,,,,,,...,,,,,,,,,,
8,2500,2016-05-25,5,2016,Godiva Liqueur,,,,,,...,,,,,,,,,,
64,2500,2020-05-06,5,2020,Jameson Mini,Malibu Coconut Rum,Dekuyper Luscious Peachtree,Tomatin Single Malt,RumChata Limon,Jim Beam Honey,...,,,,,,,,,,


In [43]:
num_rows = 1263
num_cols = 834
may_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = may_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
may_results = list(rules)

def inspect(may_results):
    lhs         = [tuple(result[2][0][0])[0] for result in may_results]
    rhs         = [tuple(result[2][0][1])[0] for result in may_results]
    supports    = [result[1] for result in may_results]
    confidences = [result[2][0][2] for result in may_results]
    lifts       = [result[2][0][3] for result in may_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

may_resultsinDataFrame = pd.DataFrame(inspect(may_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [44]:
may_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
33588,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003167,1.0,315.75
29416,Natural Light Black Cherry Lemonade Vodka Mini,Natural Light Lemonade Vodka Mini,0.003167,1.0,315.75
33589,Truly Pineapple Mango,Truly Wild Berry,0.003167,1.0,315.75
33594,Truly Strawberry Lemonade,Truly Wild Berry,0.003167,1.0,315.75
21370,Flecha Azul Anejo,Flecha Azul Reposado,0.003167,1.0,252.6


## June

In [45]:
jun_mba = mba_df[mba_df["Month"] == 6]
jun_mba.shape

(1364, 834)

In [46]:
num_rows = 1364
num_cols = 834
jun_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = jun_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
jun_results = list(rules)

def inspect(jun_results):
    lhs         = [tuple(result[2][0][0])[0] for result in jun_results]
    rhs         = [tuple(result[2][0][1])[0] for result in jun_results]
    supports    = [result[1] for result in jun_results]
    confidences = [result[2][0][2] for result in jun_results]
    lifts       = [result[2][0][3] for result in jun_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

jun_resultsinDataFrame = pd.DataFrame(inspect(jun_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [47]:
jun_resultsinDataFrame.head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,1792 Bottled in Bond Bourbon,St Brendans Irish Cream,0.003666,0.833333,10.825397
1,1792 Bottled in Bond Bourbon,UV Vodka PET,0.003666,0.833333,4.96361
2,1792 Full Proof,Beams 8 Star Bl Whiskey,0.006598,0.6,3.621239
3,1792 Full Proof,Calvert Gin,0.005132,0.466667,3.929218
4,1792 Full Proof,Captain Morgan 100prf Spiced Rum,0.003666,0.333333,3.953623


In [48]:
jun_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
23842,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004399,1.0,227.333333
15066,Flecha Azul Blanco,Flecha Azul Reposado,0.003666,0.833333,227.333333
16490,Grey Goose Essences White Peach & Rosemary,Grey Goose Strawberry & Lemongrass,0.003666,0.833333,227.333333
20519,McCormick Vodka Mini,Rumple Minze Peppermint Mini,0.005132,1.0,170.5
12060,Daviess County KSBW Cab Finished,Daviess County Kentucky Straight Bourbon Whiskey,0.003666,0.555556,151.555556


## July

In [49]:
jul_mba = mba_df[mba_df["Month"] == 7]
jul_mba.shape

(1984, 834)

In [50]:
num_rows = 1984
num_cols = 834
jul_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = jul_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
jul_results = list(rules)

def inspect(jul_results):
    lhs         = [tuple(result[2][0][0])[0] for result in jul_results]
    rhs         = [tuple(result[2][0][1])[0] for result in jul_results]
    supports    = [result[1] for result in jul_results]
    confidences = [result[2][0][2] for result in jul_results]
    lifts       = [result[2][0][3] for result in jul_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

jul_resultsinDataFrame = pd.DataFrame(inspect(jul_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [51]:
jan_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
23737,Truly Strawberry Lemonade,Truly Wild Berry,0.003156,1.0,316.833333
23735,Truly Pineapple Mango,Truly Wild Berry,0.003156,1.0,316.833333
23734,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003156,1.0,316.833333
15056,Flecha Azul Anejo,Flecha Azul Reposado,0.003156,1.0,271.571429
20683,Natural Light Lemonade Vodka Mini,Natural Light Strawberry Lemonade Vodka Mini,0.003156,0.857143,271.571429


## August

In [52]:
aug_mba = mba_df[mba_df["Month"] == 8]
aug_mba.shape

(1302, 834)

In [53]:
num_rows = 1302
num_cols = 834
aug_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = aug_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
aug_results = list(rules)

def inspect(aug_results):
    lhs         = [tuple(result[2][0][0])[0] for result in aug_results]
    rhs         = [tuple(result[2][0][1])[0] for result in aug_results]
    supports    = [result[1] for result in aug_results]
    confidences = [result[2][0][2] for result in aug_results]
    lifts       = [result[2][0][3] for result in aug_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

aug_resultsinDataFrame = pd.DataFrame(inspect(aug_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [54]:
aug_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
32904,Truly Strawberry Lemonade,Truly Wild Berry,0.003072,1.0,325.5
28844,Natural Light Black Cherry Lemonade Vodka Mini,Natural Light Lemonade Vodka Mini,0.003072,1.0,325.5
32899,Truly Pineapple Mango,Truly Wild Berry,0.003072,1.0,325.5
32898,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003072,1.0,325.5
29228,North Forest Kreme Maple Bourbon Pecan,North Forest Kreme Walnut,0.003072,0.8,260.4


## September

In [55]:
sept_mba = mba_df[mba_df["Month"] == 9]
sept_mba.shape

(1217, 834)

In [56]:
num_rows = 1217
num_cols = 834
sept_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = sept_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
sept_results = list(rules)

def inspect(sept_results):
    lhs         = [tuple(result[2][0][0])[0] for result in sept_results]
    rhs         = [tuple(result[2][0][1])[0] for result in sept_results]
    supports    = [result[1] for result in sept_results]
    confidences = [result[2][0][2] for result in sept_results]
    lifts       = [result[2][0][3] for result in sept_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

sept_resultsinDataFrame = pd.DataFrame(inspect(sept_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [57]:
sept_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
33922,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003287,1.0,304.25
29713,Natural Light Black Cherry Lemonade Vodka Mini,Natural Light Lemonade Vodka Mini,0.003287,1.0,304.25
33928,Truly Strawberry Lemonade,Truly Wild Berry,0.003287,1.0,304.25
33923,Truly Pineapple Mango,Truly Wild Berry,0.003287,1.0,304.25
30104,North Forest Kreme Maple Bourbon Pecan,North Forest Kreme Walnut,0.003287,0.8,243.4


## October

In [58]:
oct_mba = mba_df[mba_df["Month"] == 10]
oct_mba.shape

(709, 834)

In [59]:
num_rows = 709
num_cols = 834
oct_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = oct_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
oct_results = list(rules)

def inspect(oct_results):
    lhs         = [tuple(result[2][0][0])[0] for result in oct_results]
    rhs         = [tuple(result[2][0][1])[0] for result in oct_results]
    supports    = [result[1] for result in oct_results]
    confidences = [result[2][0][2] for result in oct_results]
    lifts       = [result[2][0][3] for result in oct_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

oct_resultsinDataFrame = pd.DataFrame(inspect(oct_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [60]:
oct_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
52944,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004231,1.0,236.333333
52943,Truly Pineapple Mango,Truly Wild Berry,0.004231,1.0,236.333333
52942,Truly Pineapple Mango,Truly Strawberry Lemonade,0.004231,1.0,236.333333
36489,Grey Goose Essences Watermelon & Basil Mini,Grey Goose Essences White Peach & Rosemary Mini,0.004231,1.0,236.333333
36475,Grey Goose Essences Strawberry & Lemongrass Mini,Grey Goose Essences White Peach & Rosemary Mini,0.004231,1.0,236.333333


## November

In [61]:
nov_mba = mba_df[mba_df["Month"] == 11]
nov_mba.shape

(1385, 834)

In [62]:
num_rows = 1385
num_cols = 834
nov_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = nov_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
nov_results = list(rules)

def inspect(nov_results):
    lhs         = [tuple(result[2][0][0])[0] for result in nov_results]
    rhs         = [tuple(result[2][0][1])[0] for result in nov_results]
    supports    = [result[1] for result in nov_results]
    confidences = [result[2][0][2] for result in nov_results]
    lifts       = [result[2][0][3] for result in nov_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

nov_resultsinDataFrame = pd.DataFrame(inspect(nov_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [63]:
nov_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
16599,Grey Goose Essences White Peach & Rosemary,Grey Goose Strawberry & Lemongrass,0.00361,0.833333,230.833333
23999,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004332,1.0,230.833333
15174,Flecha Azul Blanco,Flecha Azul Reposado,0.00361,0.833333,230.833333
21299,North Forest Kreme Maple Bourbon Pecan,North Forest Kreme Walnut,0.00361,0.833333,230.833333
20652,McCormick Vodka Mini,Rumple Minze Peppermint Mini,0.005054,1.0,173.125


## December

In [64]:
dec_mba = mba_df[mba_df["Month"] == 12]
dec_mba.shape

(1753, 834)

In [65]:
num_rows = 1753
num_cols = 834
dec_transactions = mba_df.iloc[:num_rows, 4:num_cols].astype(str).values.tolist()

rules = apriori(transactions = dec_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
dec_results = list(rules)

def inspect(dec_results):
    lhs         = [tuple(result[2][0][0])[0] for result in dec_results]
    rhs         = [tuple(result[2][0][1])[0] for result in dec_results]
    supports    = [result[1] for result in dec_results]
    confidences = [result[2][0][2] for result in dec_results]
    lifts       = [result[2][0][3] for result in dec_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

dec_resultsinDataFrame = pd.DataFrame(inspect(dec_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [66]:
dec_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
23925,Truly Strawberry Lemonade,Truly Wild Berry,0.003423,1.0,292.166667
23923,Truly Pineapple Mango,Truly Wild Berry,0.003423,1.0,292.166667
20874,Natural Light Black Cherry Lemonade Vodka Mini,Natural Light Lemonade Vodka Mini,0.003423,1.0,292.166667
23922,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003423,1.0,292.166667
15178,Flecha Azul Blanco,Flecha Azul Reposado,0.003423,0.857143,250.428571


# What happens if we condition on different types of stores (chain-super markets, specialized liquor stores, small convenience stores, news stands)?

In [67]:
mba_df2 = pd.read_csv("mba_df2.csv", index_col = [0]) # index_col = [0] removes unnmaed index column
path = 'mba_df2.feather'
feather.write_dataframe(mba_df2, path)
mba_df2 = feather.read_dataframe(path)

In [68]:
mba_df2.head()

Unnamed: 0,Store Number,Date,Month,Year,County Number,Store Category,0,1,2,3,...,820,821,822,823,824,825,826,827,828,829
0,2500,2016-03-23,3,2016,85,Food Store,Hawkeye Vodka,,,,...,,,,,,,,,,
1,2500,2016-03-30,3,2016,85,Food Store,Popov Vodka 80 Prf,Old Crow,Captain Morgan Spiced Rum,Five Star Pet,...,,,,,,,,,,
2,2500,2016-04-13,4,2016,85,Food Store,Old Crow,Paramount Triple Sec,Bushmills Irish Whiskey,,...,,,,,,,,,,
3,2500,2016-04-20,4,2016,85,Food Store,Burnett's Gin London Dry,Bacardi 151 Prf,Bailey's Vanilla Cinnamon,1800 Silver Tequila,...,,,,,,,,,,
4,2500,2016-04-27,4,2016,85,Food Store,Southern Comfort,,,,...,,,,,,,,,,


In [69]:
mba_df2["Store Category"].unique()

array(['Food Store', 'Supermarket', 'Wine & Spirits', 'Drugstore',
       'Food & Drugstore', 'Gas', 'Fast & Fresh'], dtype=object)

### Food Store

In [70]:
food_mba = mba_df2[mba_df2["Store Category"] == "Food Store"]
food_mba.shape

(7683, 836)

In [71]:
num_rows = 7683
num_cols = 836
food_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = food_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
food_results = list(rules)

def inspect(food_results):
    lhs         = [tuple(result[2][0][0])[0] for result in food_results]
    rhs         = [tuple(result[2][0][1])[0] for result in food_results]
    supports    = [result[1] for result in food_results]
    confidences = [result[2][0][2] for result in food_results]
    lifts       = [result[2][0][3] for result in food_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

food_resultsinDataFrame = pd.DataFrame(inspect(food_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [72]:
food_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3688,Truly Strawberry Lemonade,Truly Wild Berry,0.003124,0.923077,295.5
3685,Truly Pineapple Mango,Truly Strawberry Lemonade,0.003384,1.0,295.5
3686,Truly Pineapple Mango,Truly Wild Berry,0.003124,0.923077,295.5
3687,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004035,1.0,207.648649
3726,White Dog Mash #1,White Dog Wheat,0.005076,1.0,187.390244


### Supermarket

In [73]:
supermarket_mba = mba_df2[mba_df2["Store Category"] == "Supermarket"]
supermarket_mba.shape

(4633, 836)

In [74]:
num_rows = 4633
num_cols = 836
supermarket_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = supermarket_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
supermarket_results = list(rules)

def inspect(supermarket_results):
    lhs         = [tuple(result[2][0][0])[0] for result in supermarket_results]
    rhs         = [tuple(result[2][0][1])[0] for result in supermarket_results]
    supports    = [result[1] for result in supermarket_results]
    confidences = [result[2][0][2] for result in supermarket_results]
    lifts       = [result[2][0][3] for result in supermarket_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

supermarket_resultsinDataFrame = pd.DataFrame(inspect(supermarket_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [75]:
supermarket_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
5774,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004317,1.0,220.619048
5817,White Dog Mash #1,White Dog Wheat,0.004317,1.0,210.590909
5816,White Dog Mash #1,White Dog Straight Rye,0.004317,1.0,210.590909
5818,White Dog Straight Rye,White Dog Wheat,0.004749,1.0,210.590909
1633,Branson VS Phantom,Branson VSOP Grande Champagne,0.003885,0.692308,133.644231


### Wine & Spirits

In [76]:
wine_spirits_mba = mba_df2[mba_df2["Store Category"] == "Wine & Spirits"]
wine_spirits_mba.shape

(3164, 836)

In [77]:
num_rows = 3164
num_cols = 836
wine_spirit_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = wine_spirit_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
wine_spirit_results = list(rules)

def inspect(supermarket_results):
    lhs         = [tuple(result[2][0][0])[0] for result in wine_spirit_results]
    rhs         = [tuple(result[2][0][1])[0] for result in wine_spirit_results]
    supports    = [result[1] for result in wine_spirit_results]
    confidences = [result[2][0][2] for result in wine_spirit_results]
    lifts       = [result[2][0][3] for result in wine_spirit_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

wine_spirit_resultsinDataFrame = pd.DataFrame(inspect(wine_spirit_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [78]:
wine_spirit_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
12432,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.003477,1.0,287.636364
7788,Flecha Azul Blanco,Flecha Azul Reposado,0.003477,0.916667,263.666667
8826,Grey Goose Essences Watermelon & Basil,Grey Goose Strawberry & Lemongrass,0.003477,1.0,243.384615
9311,Issac Bowman Port Finished Bourbon,John J Bowman Bourbon,0.003161,0.833333,239.69697
12488,White Dog Mash #1,White Dog Straight Rye,0.005057,1.0,186.117647


### Drugstore

In [79]:
drugstore_mba = mba_df2[mba_df2["Store Category"] == "Drugstore"]
drugstore_mba.shape

(1341, 836)

In [80]:
num_rows = 1341
num_cols = 836
drugstore_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = drugstore_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
drugstore_results = list(rules)

def inspect(drugstore_results):
    lhs         = [tuple(result[2][0][0])[0] for result in drugstore_results]
    rhs         = [tuple(result[2][0][1])[0] for result in drugstore_results]
    supports    = [result[1] for result in drugstore_results]
    confidences = [result[2][0][2] for result in drugstore_results]
    lifts       = [result[2][0][3] for result in drugstore_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

drugstore_resultsinDataFrame = pd.DataFrame(inspect(drugstore_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [81]:
drugstore_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
23151,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004474,1.0,223.5
14677,Flecha Azul Blanco,Flecha Azul Reposado,0.003729,0.833333,223.5
19936,McCormick Vodka Mini,Rumple Minze Peppermint Mini,0.00522,1.0,167.625
11737,Daviess County KSBW Cab Finished,Daviess County Kentucky Straight Bourbon Whiskey,0.003729,0.555556,149.0
23091,Three Olives Pomegranate,Three Olives Raspberry,0.004474,0.75,143.678571


### Food & Drugstore

In [82]:
food_drug_mba = mba_df2[mba_df2["Store Category"] == "Food & Drugstore"]
food_drug_mba.shape

(381, 836)

In [83]:
num_rows = 381
num_cols = 836
food_drug_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = food_drug_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
food_drug_results = list(rules)

def inspect(food_drug_results):
    lhs         = [tuple(result[2][0][0])[0] for result in food_drug_results]
    rhs         = [tuple(result[2][0][1])[0] for result in food_drug_results]
    supports    = [result[1] for result in food_drug_results]
    confidences = [result[2][0][2] for result in food_drug_results]
    lifts       = [result[2][0][3] for result in food_drug_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

food_drug_resultsinDataFrame = pd.DataFrame(inspect(food_drug_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [84]:
food_drug_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
59532,Milagro Select Barrel Reposado,Milagro Select Barrel Silver Tequila,0.005249,1.0,190.5
67679,Truly Pineapple Mango,Truly Strawberry Lemonade,0.005249,1.0,190.5
52049,Jameson Orange Mini,Revel Stoke Root of Evil Root Beer,0.005249,1.0,190.5
31193,Codigo 1530 Tequila Reposado,Codigo 1530 Tequila Rosa Blanco,0.005249,1.0,190.5
29935,Cincoro Anejo Tequila,Cincoro Reposado Tequila,0.005249,1.0,190.5


### Gas

In [85]:
gas_mba = mba_df2[mba_df2["Store Category"] == "Gas"]
gas_mba.shape

(245, 836)

In [86]:
num_rows = 245
num_cols = 836
gas_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = gas_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
gas_results = list(rules)

def inspect(gas_results):
    lhs         = [tuple(result[2][0][0])[0] for result in gas_results]
    rhs         = [tuple(result[2][0][1])[0] for result in gas_results]
    supports    = [result[1] for result in gas_results]
    confidences = [result[2][0][2] for result in gas_results]
    lifts       = [result[2][0][3] for result in gas_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

gas_resultsinDataFrame = pd.DataFrame(inspect(gas_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [87]:
gas_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
15167,Appleton Estate Signature Blend,Early Times 36 Mo Ky Str Bourbon PET,0.004082,1.0,245.0
62807,Combier Pamplemousse Rose Liqueur,Glen Moray Cabernet,0.004082,1.0,245.0
104824,Jagermeister Mini Meisters,McCormick Vodka,0.004082,1.0,245.0
62894,Combier Peche Vigne Liqueur,Evan Williams Vintage,0.004082,1.0,245.0
35846,Blue Chair Bay Banana Rum Cream,Burnetts Peach Mini,0.004082,1.0,245.0


### Fast & Fresh

In [88]:
fast_fresh_mba = mba_df2[mba_df2["Store Category"] == "Fast & Fresh"]
fast_fresh_mba.shape

(663, 836)

In [89]:
num_rows = 663
num_cols = 836
fast_fresh_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = fast_fresh_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
fast_fresh_results = list(rules)

def inspect(fast_fresh_results):
    lhs         = [tuple(result[2][0][0])[0] for result in fast_fresh_results]
    rhs         = [tuple(result[2][0][1])[0] for result in fast_fresh_results]
    supports    = [result[1] for result in fast_fresh_results]
    confidences = [result[2][0][2] for result in fast_fresh_results]
    lifts       = [result[2][0][3] for result in fast_fresh_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

fast_fresh_resultsinDataFrame = pd.DataFrame(inspect(fast_fresh_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [90]:
fast_fresh_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
50139,Devils Sister Kentucky 5YR,Devils Sister Peach,0.003017,1.0,331.5
51508,Door County Cherry Vodka,El Mayor Extra Anejo Sherry Cask Finish Tequila,0.003017,1.0,331.5
60080,Flecha Azul Anejo,Flecha Azul Blanco,0.003017,1.0,331.5
60081,Flecha Azul Anejo,Flecha Azul Reposado,0.003017,1.0,331.5
60084,Flecha Azul Blanco,Flecha Azul Reposado,0.003017,1.0,331.5


## By season

- Spring: March, April, May (3, 4, 5)
- Summer: June, July, August (6, 7, 8)
- Fall: September, October, November (9, 10, 11)
- Winter: December, January, February (12, 1, 2)

In [91]:
mba_df2.head()

Unnamed: 0,Store Number,Date,Month,Year,County Number,Store Category,0,1,2,3,...,820,821,822,823,824,825,826,827,828,829
0,2500,2016-03-23,3,2016,85,Food Store,Hawkeye Vodka,,,,...,,,,,,,,,,
1,2500,2016-03-30,3,2016,85,Food Store,Popov Vodka 80 Prf,Old Crow,Captain Morgan Spiced Rum,Five Star Pet,...,,,,,,,,,,
2,2500,2016-04-13,4,2016,85,Food Store,Old Crow,Paramount Triple Sec,Bushmills Irish Whiskey,,...,,,,,,,,,,
3,2500,2016-04-20,4,2016,85,Food Store,Burnett's Gin London Dry,Bacardi 151 Prf,Bailey's Vanilla Cinnamon,1800 Silver Tequila,...,,,,,,,,,,
4,2500,2016-04-27,4,2016,85,Food Store,Southern Comfort,,,,...,,,,,,,,,,


### Spring

In [92]:
spring_df = mba_df2[(mba_df2["Month"] == 3) | (mba_df2["Month"] == 4) | (mba_df2["Month"] == 5)]
spring_df.shape

(4658, 836)

In [93]:
num_rows = 4658
num_cols = 836
spring_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = spring_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
spring_results = list(rules)

def inspect(spring_results):
    lhs         = [tuple(result[2][0][0])[0] for result in spring_results]
    rhs         = [tuple(result[2][0][1])[0] for result in spring_results]
    supports    = [result[1] for result in spring_results]
    confidences = [result[2][0][2] for result in spring_results]
    lifts       = [result[2][0][3] for result in spring_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

spring_resultsinDataFrame = pd.DataFrame(inspect(spring_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [94]:
spring_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
5877,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004294,1.0,221.809524
5920,White Dog Mash #1,White Dog Straight Rye,0.004294,1.0,211.727273
5921,White Dog Mash #1,White Dog Wheat,0.004294,1.0,211.727273
5922,White Dog Straight Rye,White Dog Wheat,0.004723,1.0,211.727273
2814,Deep Eddy Cranberry Mini,Deep Eddy Peach Mini,0.003006,0.7,141.765217


In [95]:
spring_resultsinDataFrame.sort_values(by=["Support"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
759,Ancient Age Bourbon,Buffalo Trace Bourbon,0.049377,0.699088,5.218514
1693,Buffalo Trace Bourbon,Eagle Rare,0.04766,0.355769,6.576084
1520,Blantons Bourbon,Buffalo Trace Bourbon,0.045298,0.861224,6.42882
4608,Kinky Blue Mini,Kinky Pink Mini,0.04401,0.457589,3.991481
1704,Bulleit 95 Rye,Bulleit Bourbon,0.036926,0.452632,3.02925


### Summer

In [96]:
summer_df = mba_df2[(mba_df2["Month"] == 6) | (mba_df2["Month"] == 7) | (mba_df2["Month"] == 8)]
summer_df.shape

(4650, 836)

In [97]:
num_rows = 4650
num_cols = 836
summer_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = summer_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
summer_results = list(rules)

def inspect(summer_results):
    lhs         = [tuple(result[2][0][0])[0] for result in summer_results]
    rhs         = [tuple(result[2][0][1])[0] for result in summer_results]
    supports    = [result[1] for result in summer_results]
    confidences = [result[2][0][2] for result in summer_results]
    lifts       = [result[2][0][3] for result in summer_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

summer_resultsinDataFrame = pd.DataFrame(inspect(summer_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [98]:
summer_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
5854,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004301,1.0,221.428571
5899,White Dog Straight Rye,White Dog Wheat,0.004731,1.0,211.363636
5898,White Dog Mash #1,White Dog Wheat,0.004301,1.0,211.363636
5897,White Dog Mash #1,White Dog Straight Rye,0.004301,1.0,211.363636
2811,Deep Eddy Cranberry Mini,Deep Eddy Peach Mini,0.003011,0.7,141.521739


### Fall

In [99]:
fall_df = mba_df2[(mba_df2["Month"] == 9) | (mba_df2["Month"] == 10) | (mba_df2["Month"] == 11)]
fall_df.shape

(3311, 836)

In [100]:
num_rows = 3311
num_cols = 836
fall_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = fall_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
fall_results = list(rules)

def inspect(fall_results):
    lhs         = [tuple(result[2][0][0])[0] for result in fall_results]
    rhs         = [tuple(result[2][0][1])[0] for result in fall_results]
    supports    = [result[1] for result in fall_results]
    confidences = [result[2][0][2] for result in fall_results]
    lifts       = [result[2][0][3] for result in fall_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

fall_resultsinDataFrame = pd.DataFrame(inspect(fall_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [101]:
fall_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
8554,Grey Goose Essences Watermelon & Basil,Grey Goose Strawberry & Lemongrass,0.003322,1.0,254.692308
7542,Flecha Azul Blanco,Flecha Azul Reposado,0.003322,0.846154,254.692308
9032,Issac Bowman Port Finished Bourbon,John J Bowman Bourbon,0.00302,0.833333,250.833333
12040,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.003926,1.0,236.5
12097,White Dog Straight Rye,White Dog Wheat,0.005134,1.0,194.764706


### Winter

In [102]:
winter_df = mba_df2[(mba_df2["Month"] == 12) | (mba_df2["Month"] == 1) | (mba_df2["Month"] == 2)]
winter_df.shape

(5491, 836)

In [103]:
num_rows = 5491
num_cols = 836
winter_transactions = mba_df2.iloc[:num_rows, 6:num_cols].astype(str).values.tolist()

rules = apriori(transactions = winter_transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)
winter_results = list(rules)

def inspect(winter_results):
    lhs         = [tuple(result[2][0][0])[0] for result in winter_results]
    rhs         = [tuple(result[2][0][1])[0] for result in winter_results]
    supports    = [result[1] for result in winter_results]
    confidences = [result[2][0][2] for result in winter_results]
    lifts       = [result[2][0][3] for result in winter_results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

winter_resultsinDataFrame = pd.DataFrame(inspect(winter_results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

In [104]:
winter_resultsinDataFrame.sort_values(by=["Lift"], ascending = False).head()

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
4581,White Dog Mash #1,White Dog Wheat,0.003824,1.0,238.73913
4582,White Dog Straight Rye,White Dog Wheat,0.004189,1.0,238.73913
4580,White Dog Mash #1,White Dog Straight Rye,0.003824,1.0,238.73913
4542,Truly Pineapple Mango Mini,Truly Wild Berry Mini,0.004189,1.0,228.791667
4453,Smirnoff Zero Sugar Infusion Cucumber & Lime Mini,Smirnoff Zero Sugar Infusion Watermelon & Mint...,0.003278,0.818182,179.705455
