# D212 Assessment 3

#### Import relevant packages

In [1]:
# Pandas for operation of dataframes
import pandas as pd

import math

# Import permutations from the itertools module
from itertools import permutations

# Import the transaction encoder function from mlxtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [2]:
# Read in dataset as dataframe with pandas read_csv function
teleco_df = pd.read_csv('teleco_market_basket.csv')

In [3]:
# Visualize the head of the dataset with pandas head function
teleco_df.head()

Unnamed: 0,Item01,Item02,Item03,Item04,Item05,Item06,Item07,Item08,Item09,Item10,Item11,Item12,Item13,Item14,Item15,Item16,Item17,Item18,Item19,Item20
0,,,,,,,,,,,,,,,,,,,,
1,Logitech M510 Wireless mouse,HP 63 Ink,HP 65 ink,nonda USB C to USB Adapter,10ft iPHone Charger Cable,HP 902XL ink,Creative Pebble 2.0 Speakers,Cleaning Gel Universal Dust Cleaner,Micro Center 32GB Memory card,YUNSONG 3pack 6ft Nylon Lightning Cable,TopMate C5 Laptop Cooler pad,Apple USB-C Charger cable,HyperX Cloud Stinger Headset,TONOR USB Gaming Microphone,Dust-Off Compressed Gas 2 pack,3A USB Type C Cable 3 pack 6FT,HOVAMP iPhone charger,SanDisk Ultra 128GB card,FEEL2NICE 5 pack 10ft Lighning cable,FEIYOLD Blue light Blocking Glasses
2,,,,,,,,,,,,,,,,,,,,
3,Apple Lightning to Digital AV Adapter,TP-Link AC1750 Smart WiFi Router,Apple Pencil,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,


In [4]:
# Remove completely null records
teleco_df = teleco_df.dropna(subset=['Item01'])

In [5]:
teleco_df = teleco_df.reset_index(drop=True)
teleco_df.insert(0, 'Basket_id', range(1, 1 + len(teleco_df)))

In [6]:
basket_id_list = []
item_list = []
cnt_list = []

for col in teleco_df.columns[1:]:
    for i in range(1, len(teleco_df)):
        if teleco_df[col].isnull().iloc[i,]==False:
            basket_id_list.append(teleco_df['Basket_id'].iloc[i,])
            item_list.append(teleco_df[col].iloc[i,])
            cnt_list.append(1)

In [7]:
basket_df = pd.DataFrame()
basket_df['Basket_id'] = basket_id_list
basket_df['Item desc'] = item_list
basket_df['Cnt'] = 1

In [8]:
basket_grp = (basket_df.groupby(['Basket_id', 'Item desc'])['Cnt'].sum().unstack().reset_index().fillna(0).set_index('Basket_id'))

In [9]:
def normalize(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_enc = basket_grp.applymap(normalize)

basket_std = basket_enc[(basket_enc > 0).sum(axis=1) >= 2]

In [10]:
print(basket_std.shape)

(5746, 119)


In [11]:
basket_std.head()

Item desc,10ft iPHone Charger Cable,10ft iPHone Charger Cable 2 Pack,3 pack Nylon Braided Lightning Cable,3A USB Type C Cable 3 pack 6FT,5pack Nylon Braided USB C cables,ARRIS SURFboard SB8200 Cable Modem,Anker 2-in-1 USB Card Reader,Anker 4-port USB hub,Anker USB C to HDMI Adapter,Apple Lightning to Digital AV Adapter,...,hP 65 Tri-color ink,iFixit Pro Tech Toolkit,iPhone 11 case,iPhone 12 Charger cable,iPhone 12 Pro case,iPhone 12 case,iPhone Charger Cable Anker 6ft,iPhone SE case,nonda USB C to USB Adapter,seenda Wireless mouse
Basket_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
pd.set_option('display.max_rows', None)
support = basket_std.mean()

In [13]:
print(support.sort_values(ascending=False))

Item desc
Dust-Off Compressed Gas 2 pack                0.294814
Apple Pencil                                  0.218935
VIVO Dual LCD Monitor Desk mount              0.218239
HP 61 ink                                     0.201183
USB 2.0 Printer cable                         0.200139
Screen Mom Screen Cleaner kit                 0.163418
Apple USB-C Charger cable                     0.159763
SanDisk Ultra 64GB card                       0.124608
Nylon Braided Lightning to USB cable          0.121128
Stylus Pen for iPad                           0.117299
Apple Lightning to Digital AV Adapter         0.110338
Syntech USB C to USB Adapter                  0.100766
TopMate C5 Laptop Cooler pad                  0.091890
Logitech M510 Wireless mouse                  0.090672
Anker USB C to HDMI Adapter                   0.085973
HP 63XL Ink                                   0.085973
FEIYOLD Blue light Blocking Glasses           0.081796
Cat8 Ethernet Cable                           0.079360


In [14]:
rules = list(permutations(basket_std.columns, 2))

In [15]:
frequent_itemsets = apriori(basket_std, 
                           min_support = 0.01,
                           max_len = 2,
                           use_colnames = True)

In [16]:
print(frequent_itemsets.sort_values('support', ascending=False))

      support                                           itemsets
21   0.294814                   (Dust-Off Compressed Gas 2 pack)
10   0.218935                                     (Apple Pencil)
75   0.218239                 (VIVO Dual LCD Monitor Desk mount)
27   0.201183                                        (HP 61 ink)
71   0.200139                            (USB 2.0 Printer cable)
66   0.163418                    (Screen Mom Screen Cleaner kit)
11   0.159763                        (Apple USB-C Charger cable)
65   0.124608                          (SanDisk Ultra 64GB card)
49   0.121128             (Nylon Braided Lightning to USB cable)
67   0.117299                              (Stylus Pen for iPad)
8    0.110338            (Apple Lightning to Digital AV Adapter)
68   0.100766                     (Syntech USB C to USB Adapter)
70   0.091890                     (TopMate C5 Laptop Cooler pad)
41   0.090672                     (Logitech M510 Wireless mouse)
7    0.085973            

In [17]:
rules = association_rules(frequent_itemsets,
                           metric = 'support',
                           min_threshold = 0.0015)

In [18]:
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Apple Pencil),(10ft iPHone Charger Cable 2 Pack),0.218935,0.064567,0.011834,0.054054,0.837182,-0.002302,0.988887
1,(10ft iPHone Charger Cable 2 Pack),(Apple Pencil),0.064567,0.218935,0.011834,0.183288,0.837182,-0.002302,0.956354
2,(Dust-Off Compressed Gas 2 pack),(10ft iPHone Charger Cable 2 Pack),0.294814,0.064567,0.030108,0.102125,1.581701,0.011073,1.04183
3,(10ft iPHone Charger Cable 2 Pack),(Dust-Off Compressed Gas 2 pack),0.064567,0.294814,0.030108,0.466307,1.581701,0.011073,1.321334
4,(FEIYOLD Blue light Blocking Glasses),(10ft iPHone Charger Cable 2 Pack),0.081796,0.064567,0.01166,0.142553,2.207845,0.006379,1.090952


In [19]:
BREAK

NameError: name 'BREAK' is not defined

In [None]:
# Convert teleco dataframe to a list of lists
transactions = teleco_df.values.tolist()

In [None]:
flattened = [i for t in transactions for i in t]
items = list(set(flattened))

In [None]:
# items = [ i for t in transactions for i in t if not(pd.isnull(i)) == True]


In [None]:
rules = list(permutations(items, 2))

In [None]:
print('There are {} rules from the observed dataset.'.format(len(rules)))

In [None]:
# new_list = [item for item in transactions if str(item) != 'nan']

new_list = [item for transaction in transactions for item in transaction if str(item) != 'nan']

In [None]:
new_list[0] = 'test'

In [None]:
new_list = []
counter = 0

for transaction in transactions:
    print(transaction)
    for item in transaction:
        print(item)
    print(counter)
    counter += 1

In [None]:
print(transactions)

In [None]:
encoder = TransactionEncoder().fit(transactions)

In [None]:
onehot = encoder.transform(transactions)

In [None]:
onehot = pd.DataFrame(onehot, columns = encoder.columns_)

In [None]:
onehot.head()