# IMPORTACIÓN DE LIBRERÍAS

In [1]:
import pandas as pd
import numpy as np
import warnings
import mlxtend.preprocessing
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

warnings.filterwarnings('ignore')

# OBTENCIÓN DE DATOS

In [2]:
# https://archive.ics.uci.edu/ml/datasets/online+retail

# This is a transnational data set which contains all the transactions occurring between 01/12/2010 and
# 09/12/2011 for a UK-based and registered non-store online retail.The company mainly sells unique 
# all-occasion gifts. Many customers of the company are wholesalers.

df_excel = pd.read_excel('../input/Online Retail.xlsx', encoding='latin1')

In [3]:
# Primeras filas del dataframe
df_excel.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [4]:
data = df_excel[(df_excel.InvoiceDate > '2010-12-01 00:00:00') & (df_excel.InvoiceDate <= '2011-12-31 00:00:00')]
data.sort_values(by='InvoiceDate')
data.head()   

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [5]:
data.shape

(541909, 8)

# ANÁLISIS EXPLORATORIO

In [6]:
# Información básica del dataframe
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 541909 entries, 0 to 541908
Data columns (total 8 columns):
InvoiceNo      541909 non-null object
StockCode      541909 non-null object
Description    540455 non-null object
Quantity       541909 non-null int64
InvoiceDate    541909 non-null datetime64[ns]
UnitPrice      541909 non-null float64
CustomerID     406829 non-null float64
Country        541909 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 37.2+ MB


In [7]:
# Market basket analysis identifica relaciones entre los artículos comprados por todos los clientes => Las 
# variables de interés son 'InvoiceNo' y 'CustomerID'. 

In [8]:
# Valores de 'InvoiceNo'
data.InvoiceNo.value_counts()

573585     1114
581219      749
581492      731
580729      721
558475      705
579777      687
581217      676
537434      675
580730      662
538071      652
580367      650
580115      645
581439      635
580983      629
578344      622
538349      620
578347      606
537638      601
537237      597
536876      593
576617      593
536592      592
537823      591
576837      585
579508      578
577078      572
537240      568
577358      561
576618      552
576840      544
           ... 
C554343       1
C539260       1
545990        1
567824        1
567920        1
C577392       1
567918        1
C562051       1
546002        1
546004        1
567895        1
567869        1
546010        1
546014        1
C564949       1
546016        1
567863        1
546018        1
567862        1
546020        1
546021        1
546023        1
546024        1
546025        1
546026        1
567861        1
546029        1
C553514       1
546033        1
C564210       1
Name: InvoiceNo, Length:

In [9]:
# Valores únicos de 'InvoiceNo'
invoices = list(data.InvoiceNo.unique())
len(invoices)

25900

In [10]:
# Valores de 'CustomerID'
data.CustomerID.value_counts()

17841.0    7983
14911.0    5903
14096.0    5128
12748.0    4642
14606.0    2782
15311.0    2491
14646.0    2085
13089.0    1857
13263.0    1677
14298.0    1640
15039.0    1508
14156.0    1420
18118.0    1284
14159.0    1212
14796.0    1165
15005.0    1160
16033.0    1152
14056.0    1128
14769.0    1094
17511.0    1076
13081.0    1061
14527.0    1011
16549.0     981
14456.0     977
15719.0     938
15555.0     925
16931.0     898
17811.0     872
14505.0     803
12415.0     778
           ... 
15590.0       1
16078.0       1
16093.0       1
13703.0       1
18068.0       1
14705.0       1
16138.0       1
16144.0       1
16148.0       1
15562.0       1
15524.0       1
12503.0       1
12505.0       1
15510.0       1
15316.0       1
17956.0       1
17948.0       1
16323.0       1
14090.0       1
17925.0       1
17923.0       1
16428.0       1
14119.0       1
15389.0       1
15369.0       1
13391.0       1
16579.0       1
13366.0       1
17763.0       1
17846.0       1
Name: CustomerID, Length

In [11]:
# Valores únicos de 'CustomerID'
customers = list(data.CustomerID.unique())
len(customers)

4373

# PREPROCESADO DE DATOS

## Eliminación de abonos (valores negativos)

In [12]:
# Los valores negativos en 'Quantity' o 'UnitPrice' se eliminan. 
data = data.loc[data.Quantity > 0]
data = data.loc[data.UnitPrice > 0]
data.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [13]:
# Selección de las columnas de interés
data = data[['InvoiceNo', 'Description']]
data.head()

Unnamed: 0,InvoiceNo,Description
0,536365,WHITE HANGING HEART T-LIGHT HOLDER
1,536365,WHITE METAL LANTERN
2,536365,CREAM CUPID HEARTS COAT HANGER
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE
4,536365,RED WOOLLY HOTTIE WHITE HEART.


## Missing values

In [14]:
# Missing values, cantidad
data.isnull().sum()

InvoiceNo      0
Description    0
dtype: int64

In [15]:
# Missing values, porcentaje
data.isnull().mean()

InvoiceNo      0.0
Description    0.0
dtype: float64

In [16]:
# Eliminación
data = data.dropna(axis=0)
data.head()

Unnamed: 0,InvoiceNo,Description
0,536365,WHITE HANGING HEART T-LIGHT HOLDER
1,536365,WHITE METAL LANTERN
2,536365,CREAM CUPID HEARTS COAT HANGER
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE
4,536365,RED WOOLLY HOTTIE WHITE HEART.


In [17]:
data.shape

(530104, 2)

## Formateado de datos requerido por el modelo

In [18]:
for inv in data.InvoiceNo.unique(): 
    print(inv)

536365
536366
536367
536368
536369
536370
536371
536372
536373
536374
536375
536376
536377
536378
536380
536381
536382
536384
536385
536386
536387
536388
536389
536390
536392
536393
536394
536395
536396
536397
536398
536399
536400
536401
536402
536403
536404
536405
536406
536407
536408
536409
536412
536415
536416
536420
536423
536425
536437
536446
536460
536463
536464
536466
536477
536488
536500
536502
536508
536514
536520
536521
536522
536523
536524
536525
536526
536527
536528
536529
536530
536531
536532
536533
536534
536535
536536
536537
536538
536539
536540
536541
536542
536544
536551
536555
536556
536557
536558
536559
536560
536561
536562
536563
536564
536565
536566
536567
536568
536569
536570
536571
536572
536573
536574
536575
536576
536577
536578
536579
536580
536581
536582
536583
536584
536585
536586
536587
536588
536590
536591
536592
536593
536594
536595
536596
536597
536598
536599
536600
536601
536602
536603
536604
536605
536607
536608
536609
536610
536611
536612
536613
536614

541236
541238
541239
541240
541241
541242
541243
541244
541246
541247
541248
541249
541250
541251
541252
541253
541255
541256
541258
541259
541260
541262
541263
541264
541265
541266
541267
541268
541269
541270
541271
541272
541273
541274
541275
541276
541277
541278
541279
541280
541281
541282
541283
541285
541286
541287
541288
541289
541290
541291
541292
541293
541294
541302
541316
541357
541362
541370
541371
541391
541400
541405
541406
541407
541408
541409
541410
541411
541412
541413
541414
541417
541420
541421
541422
541423
541424
541425
541426
541427
541428
541429
541430
541431
541432
541434
541435
541436
541439
541473
541474
541479
541480
541481
541483
541484
541485
541486
541488
541489
541490
541491
541493
541494
541495
541496
541497
541500
541504
541505
541506
541507
541508
541509
541510
541511
541513
541516
541517
541518
541520
541521
541522
541523
541524
541525
541530
541552
541564
541565
541566
541567
541568
541569
541570
541571
541574
541575
541576
541584
541585
541587
541588

544308
544309
544311
544312
544313
544314
544315
544316
544317
544319
544320
544322
544323
544324
544325
544326
544328
544329
544330
544331
544332
544333
544334
544335
544336
544337
544338
544339
544340
544341
544349
544350
544351
544352
544353
544354
544355
544356
544368
544390
544391
544392
544393
544394
544395
544398
544399
544400
544404
544405
544406
544407
544408
544409
544411
544412
544418
544421
544423
544426
544430
544431
544434
544438
544439
544440
544441
544443
544444
544445
544446
544447
544448
544449
544450
544452
544453
544454
544455
544456
544457
544459
544460
544461
544462
544463
544464
544465
544466
544467
544468
544469
544470
544471
544472
544473
544474
544475
544476
544477
544478
544479
544480
544481
544482
544483
544484
544485
544495
544556
544565
544568
544569
544571
544572
544573
544574
544578
544582
544585
544586
544591
544592
544595
544596
544597
544598
544599
544600
544601
544602
544603
544604
544605
544606
544607
544610
544611
544612
544633
544634
544636
544637

547412
547413
547414
547415
547416
547417
547418
547419
547420
547428
547444
547485
547486
547488
547489
547490
547491
547492
547493
547494
547496
547497
547498
547500
547501
547503
547504
547507
547508
547509
547510
547511
547512
547513
547514
547515
547516
547517
547518
547519
547520
547521
547535
547537
547538
547539
547541
547546
547547
547548
547549
547550
547551
547552
547553
547554
547555
547556
547558
547561
547562
547563
547564
547565
547566
547567
547568
547569
547570
547571
547572
547573
547576
547577
547578
547579
547580
547583
547623
547639
547644
547645
547647
547648
547649
547650
547651
547652
547653
547654
547655
547656
547657
547658
547659
547662
547663
547664
547665
547666
547667
547668
547669
547670
547671
547672
547674
547684
547685
547686
547687
547688
547689
547690
547691
547694
547695
547697
547698
547702
547703
547705
547706
547707
547708
547709
547712
547713
547714
547715
547716
547717
547718
547719
547720
547721
547722
547723
547724
547726
547727
547728
547729

553013
553014
553015
553016
553017
553018
553021
553022
553034
553035
553036
553037
553038
553039
553040
553042
553043
553044
553045
553046
553047
553048
553049
553050
553051
553052
553053
553054
553055
553056
553057
553058
553059
553060
553061
553062
553063
553064
553067
553074
553088
553093
553095
553099
553100
553102
553120
553125
553129
553130
553131
553134
553140
553141
553142
553143
553144
553145
553146
553148
553149
553151
553152
553153
553154
553155
553156
553157
553158
553159
553160
553161
553162
553164
553165
553166
553167
553168
553169
553170
553171
553172
553173
553174
553175
553176
553177
553178
553179
553180
553181
553182
553183
553184
553185
553186
553187
553188
553189
553190
553191
553192
553193
553194
553195
553196
553197
553198
553199
553200
553201
553202
553203
553204
553205
553206
553207
553208
553209
553210
553211
553212
553213
553214
553215
553216
553217
553218
553219
553220
553221
553223
553224
553225
553226
553227
553228
553229
553235
553239
553315
553316
553317

556102
556103
556104
556105
556106
556107
556108
556109
556110
556111
556112
556113
556114
556115
556116
556118
556119
556120
556121
556125
556126
556127
556128
556129
556130
556139
556179
556181
556182
556183
556184
556185
556186
556187
556188
556189
556190
556191
556192
556193
556194
556195
556196
556197
556198
556199
556201
556202
556203
556204
556205
556206
556207
556208
556214
556216
556218
556219
556222
556228
556229
556230
556234
556235
556236
556237
556238
556239
556240
556241
556243
556244
556245
556246
556247
556248
556249
556252
556253
556254
556255
556256
556257
556258
556259
556263
556264
556266
556267
556271
556279
556280
556282
556283
556284
556285
556288
556290
556296
556298
556302
556303
556304
556305
556318
556323
556325
556329
556330
556341
556365
556386
556397
556408
556413
556415
556416
556417
556419
556425
556426
556427
556428
556429
556431
556432
556433
556434
556435
556437
556439
556440
556441
556442
556443
556444
556446
556447
556454
556455
556456
556459
556460

561887
561888
561889
561890
561891
561892
561893
561894
561895
561896
561897
561898
561899
561900
561901
561902
561903
561904
561905
561906
561907
561908
561909
561910
561911
561912
561913
561914
561915
561916
561917
561922
561923
561925
561926
561928
561933
561936
561952
561955
561967
561968
561969
561995
562002
562011
562014
562015
562017
562018
562019
562021
562023
562024
562025
562029
562031
562032
562033
562034
562035
562036
562037
562038
562039
562040
562042
562043
562044
562045
562046
562047
562049
562050
562053
562085
562087
562088
562092
562093
562094
562095
562096
562098
562099
562100
562101
562102
562103
562104
562105
562106
562107
562108
562109
562112
562113
562114
562120
562123
562125
562127
562128
562129
562130
562131
562132
562133
562136
562137
562138
562140
562141
562143
562145
562146
562148
562150
562152
562153
562155
562156
562157
562158
562161
562162
562163
562166
562173
562199
562200
562201
562202
562203
562204
562205
562207
562208
562209
562210
562211
562213
562214

567145
567146
567148
567153
567154
567156
567157
567158
567159
567160
567161
567162
567163
567164
567165
567166
567167
567168
567169
567170
567171
567172
567173
567174
567175
567176
567178
567179
567180
567181
567182
567183
567184
567185
567186
567188
567189
567190
567191
567192
567193
567194
567195
567196
567197
567199
567200
567202
567203
567204
567205
567206
567211
567213
567236
567239
567252
567280
567287
567288
567290
567291
567292
567293
567294
567295
567296
567297
567298
567299
567300
567301
567302
567303
567304
567305
567306
567307
567308
567309
567310
567311
567312
567313
567323
567324
567325
567326
567327
567328
567329
567333
567334
567336
567338
567339
567340
567341
567342
567343
567344
567346
567353
567354
567356
567362
567368
567369
567370
567372
567373
567374
567375
567376
567377
567378
567379
567380
567381
567382
567383
567384
567385
567386
567387
567408
567423
567424
567425
567426
567427
567428
567429
567453
567454
567455
567456
567457
567458
567459
567460
567461
567462

569890
569892
569893
569894
569896
569897
569898
569899
569900
569901
569903
569904
569905
569906
569907
569910
569912
569914
569915
569917
569918
569919
569920
569921
569923
569932
569936
569943
569948
569962
569965
569968
569977
569983
569997
569998
569999
570000
570001
570002
570003
570007
570008
570009
570010
570011
570014
570016
570017
570018
570019
570020
570021
570022
570023
570024
570026
570027
570028
570030
570036
570049
570076
570082
570086
570087
570088
570089
570090
570091
570092
570093
570094
570095
570096
570097
570098
570100
570101
570102
570103
570108
570111
570113
570114
570115
570116
570117
570118
570119
570121
570123
570124
570125
570126
570127
570128
570129
570130
570144
570167
570168
570177
570178
570179
570180
570181
570183
570184
570186
570187
570188
570189
570190
570191
570192
570195
570196
570197
570200
570201
570202
570206
570208
570209
570210
570211
570212
570213
570214
570215
570216
570217
570218
570219
570220
570222
570223
570224
570225
570226
570227
570228

572753
572755
572756
572757
572758
572760
572762
572763
572764
572765
572766
572767
572768
572769
572770
572771
572772
572773
572774
572775
572792
572810
572811
572812
572830
572832
572833
572834
572835
572836
572837
572838
572843
572845
572846
572847
572848
572849
572851
572852
572853
572854
572856
572857
572858
572859
572860
572861
572862
572863
572865
572866
572867
572868
572869
572870
572871
572872
572873
572874
572875
572876
572877
572878
572879
572880
572881
572882
572883
572884
572885
572886
572887
572888
572889
572892
572893
572894
572895
572896
572897
572899
572900
572901
572902
572903
572904
572905
572906
572907
572908
572909
572910
572911
572912
572913
572914
572918
572921
572922
572923
572924
572925
572926
572928
572929
572930
572931
572932
572933
572934
572935
572946
572960
572964
572969
572986
572990
572992
572994
572997
572998
572999
573000
573001
573002
573003
573004
573005
573006
573007
573008
573019
573020
573022
573023
573025
573026
573027
573028
573029
573032
573033

575388
575389
575390
575391
575392
575410
575415
575418
575433
575475
575476
575477
575480
575481
575482
575483
575484
575485
575486
575487
575490
575491
575492
575493
575494
575495
575496
575497
575498
575499
575500
575501
575502
575504
575507
575508
575509
575510
575512
575514
575515
575516
575517
575519
575520
575525
575563
575574
575576
575579
575581
575582
575583
575584
575586
575587
575589
575591
575593
575595
575601
575602
575603
575604
575605
575607
575609
575610
575611
575612
575618
575619
575620
575621
575622
575623
575624
575627
575628
575629
575631
575632
575633
575634
575636
575637
575639
575640
575642
575643
575647
575649
575652
575654
575655
575656
575657
575658
575659
575660
575661
575668
575671
575673
575674
575675
575676
575677
575678
575680
575681
575682
575683
575684
575685
575686
575687
575688
575689
575690
575691
575692
575693
575694
575695
575696
575697
575698
575700
575701
575702
575703
575705
575706
575707
575708
575709
575710
575711
575712
575713
575718
575723

580719
580720
580721
580722
580723
580724
580725
580727
580728
580729
580730
580731
580732
580733
580734
580735
580736
580737
580738
580739
580742
580744
580745
580746
580748
580750
580752
580753
580754
580755
580756
580757
580758
580759
580760
580763
580765
580771
580772
580774
580775
580776
580777
580778
580808
580816
580819
580820
580822
580830
580831
580833
580848
580852
580865
580872
580873
580874
580875
580876
580877
580878
580879
580880
580881
580882
580883
580884
580885
580887
580888
580889
580890
580891
580892
580893
580894
580901
580902
580903
580904
580905
580906
580907
580908
580909
580910
580911
580914
580915
580916
580937
580939
580955
580956
580958
580959
580960
580961
580962
580963
580964
580965
580967
580969
580972
580973
580974
580975
580976
580977
580978
580979
580980
580981
580982
580983
580984
580985
580986
580987
580988
580992
580993
580996
580997
580998
580999
581000
581001
581002
581003
581004
581005
581006
581007
581008
581010
581011
581013
581014
581015
581016

In [19]:
# El modelo requiere formato lista de listas -> cada elemento de la lista es una lista con todos los
# productos de una factura
df_grouped = data.groupby('InvoiceNo')['Description'].apply(list)
invoiceItems = df_grouped.tolist()
invoiceItems

[['WHITE HANGING HEART T-LIGHT HOLDER',
  'WHITE METAL LANTERN',
  'CREAM CUPID HEARTS COAT HANGER',
  'KNITTED UNION FLAG HOT WATER BOTTLE',
  'RED WOOLLY HOTTIE WHITE HEART.',
  'SET 7 BABUSHKA NESTING BOXES',
  'GLASS STAR FROSTED T-LIGHT HOLDER'],
 ['HAND WARMER UNION JACK', 'HAND WARMER RED POLKA DOT'],
 ['ASSORTED COLOUR BIRD ORNAMENT',
  "POPPY'S PLAYHOUSE BEDROOM ",
  "POPPY'S PLAYHOUSE KITCHEN",
  'FELTCRAFT PRINCESS CHARLOTTE DOLL',
  'IVORY KNITTED MUG COSY ',
  'BOX OF 6 ASSORTED COLOUR TEASPOONS',
  'BOX OF VINTAGE JIGSAW BLOCKS ',
  'BOX OF VINTAGE ALPHABET BLOCKS',
  'HOME BUILDING BLOCK WORD',
  'LOVE BUILDING BLOCK WORD',
  'RECIPE BOX WITH METAL HEART',
  'DOORMAT NEW ENGLAND'],
 ['JAM MAKING SET WITH JARS',
  'RED COAT RACK PARIS FASHION',
  'YELLOW COAT RACK PARIS FASHION',
  'BLUE COAT RACK PARIS FASHION'],
 ['BATH BUILDING BLOCK WORD'],
 ['ALARM CLOCK BAKELIKE PINK',
  'ALARM CLOCK BAKELIKE RED ',
  'ALARM CLOCK BAKELIKE GREEN',
  'PANDA AND BUNNIES STICKER SHEET'

## Encoding

In [20]:
dataEncoder = mlxtend.preprocessing.TransactionEncoder()
dataEncoderArray = dataEncoder.fit_transform(invoiceItems)
dataEncoderArray

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [21]:
dataEncoderArray[0].size  # 475 * 2186 = 1038350

4026

In [22]:
dataEncoderDf = pd.DataFrame(
    dataEncoderArray, 
    columns=dataEncoder.columns_
)

dataEncoderDf.head()

Unnamed: 0,4 PURPLE FLOCK DINNER CANDLES,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,I LOVE LONDON MINI RUCKSACK,NINE DRAWER OFFICE TIDY,OVAL WALL MIRROR DIAMANTE,RED SPOT GIFT BAG LARGE,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,...,ZINC STAR T-LIGHT HOLDER,ZINC SWEETHEART SOAP DISH,ZINC SWEETHEART WIRE LETTER RACK,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK,ZINC WIRE KITCHEN ORGANISER,ZINC WIRE SWEETHEART LETTER TRAY
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [23]:
#dataEncoderDf.shape

In [24]:
#dataEncoderDf.columns

In [25]:
# Comprobación valores True / False
#dataEncoderDf[dataEncoderDf['ZINC WILLIE WINKIE  CANDLE STICK']==True]

In [26]:
#dataEncoderDf.iloc[20]

In [27]:
# Modelo Previo
mod0 = apriori(
    dataEncoderDf, 
    min_support=0.01, # un artículo se considerará como 'frecuente' si aparece en un 1% de las facturas
    use_colnames=True                                   
)
mod0

Unnamed: 0,support,itemsets
0,0.013477,( SET 2 TEA TOWELS I LOVE LONDON )
1,0.015932,(10 COLOUR SPACEBOY PEN)
2,0.012575,(12 MESSAGE CARDS WITH ENVELOPES)
3,0.017786,(12 PENCIL SMALL TUBE WOODLAND)
4,0.018136,(12 PENCILS SMALL TUBE RED RETROSPOT)
5,0.017786,(12 PENCILS SMALL TUBE SKULL)
6,0.013427,(12 PENCILS TALL TUBE RED RETROSPOT)
7,0.012826,(12 PENCILS TALL TUBE SKULLS)
8,0.012475,(12 PENCILS TALL TUBE WOODLAND)
9,0.013076,(15CM CHRISTMAS GLASS BALL 20 LIGHTS)


In [28]:
mod0Sorted = mod0.sort_values(by='support', ascending=False)
mod0Sorted

Unnamed: 0,support,itemsets
777,0.113026,(WHITE HANGING HEART T-LIGHT HOLDER)
344,0.104659,(JUMBO BAG RED RETROSPOT)
556,0.099599,(REGENCY CAKESTAND 3 TIER)
463,0.084419,(PARTY BUNTING)
384,0.078357,(LUNCH BAG RED RETROSPOT)
50,0.072896,(ASSORTED COLOUR BIRD ORNAMENT)
625,0.069389,(SET OF 3 CAKE TINS PANTRY DESIGN )
442,0.066132,(PACK OF 72 RETROSPOT CAKE CASES)
376,0.063778,(LUNCH BAG BLACK SKULL.)
416,0.062575,(NATURAL SLATE HEART CHALKBOARD )


In [29]:
# http://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/

In [30]:
# Modelo 1
mod1 = association_rules(
    mod0, 
    metric='confidence',
    min_threshold=0.5,
    support_only=False                                    
)
mod1

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(LUNCH BAG SUKI DESIGN ),(LUNCH BAG RED RETROSPOT),0.054309,0.078357,0.027856,0.512915,6.545899,0.023600,1.892162
1,(HERB MARKER ROSEMARY),(HERB MARKER THYME),0.012024,0.011874,0.011072,0.920833,77.552039,0.010929,12.481595
2,(HERB MARKER THYME),(HERB MARKER ROSEMARY),0.011874,0.012024,0.011072,0.932489,77.552039,0.010929,14.634394
3,"(JUMBO STORAGE BAG SUKI, JUMBO BAG TOYS )",(JUMBO SHOPPER VINTAGE RED PAISLEY),0.014930,0.058868,0.010671,0.714765,12.141882,0.009792,3.299499
4,"(JUMBO SHOPPER VINTAGE RED PAISLEY, JUMBO BAG ...",(JUMBO STORAGE BAG SUKI),0.013327,0.059319,0.010671,0.800752,13.499162,0.009881,4.721156
5,(SET OF 3 WOODEN TREE DECORATIONS),(SET OF 3 WOODEN HEART DECORATIONS),0.014679,0.022295,0.010571,0.720137,32.300955,0.010244,3.493508
6,(RED HANGING HEART T-LIGHT HOLDER),(WHITE HANGING HEART T-LIGHT HOLDER),0.037124,0.113026,0.024800,0.668016,5.910285,0.020604,2.671739
7,"(JUMBO BAG RED RETROSPOT, JUMBO BAG TOYS )",(DOTCOM POSTAGE),0.017234,0.035371,0.010872,0.630814,17.834344,0.010262,2.612854
8,"(DOTCOM POSTAGE, JUMBO BAG TOYS )",(JUMBO BAG RED RETROSPOT),0.013677,0.104659,0.010872,0.794872,7.594850,0.009440,4.364786
9,(PACK OF 60 MUSHROOM CAKE CASES),(PACK OF 72 RETROSPOT CAKE CASES),0.019689,0.066132,0.011673,0.592875,8.964993,0.010371,2.293813


In [31]:
mod1Sorted = mod1.sort_values(by='support', ascending=False)
mod1Sorted

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
872,(JUMBO BAG PINK POLKADOT),(JUMBO BAG RED RETROSPOT),0.061022,0.104659,0.041333,0.677340,6.471855,0.034946,2.774873
599,(GREEN REGENCY TEACUP AND SAUCER),(ROSES REGENCY TEACUP AND SAUCER ),0.050752,0.053357,0.038427,0.757157,14.190472,0.035719,3.898169
600,(ROSES REGENCY TEACUP AND SAUCER ),(GREEN REGENCY TEACUP AND SAUCER),0.053357,0.050752,0.038427,0.720188,14.190472,0.035719,3.392448
416,(JUMBO STORAGE BAG SUKI),(JUMBO BAG RED RETROSPOT),0.059319,0.104659,0.036273,0.611486,5.842638,0.030064,2.304529
837,(JUMBO SHOPPER VINTAGE RED PAISLEY),(JUMBO BAG RED RETROSPOT),0.058868,0.104659,0.034068,0.578723,5.529593,0.027907,2.125304
88,(LUNCH BAG BLACK SKULL.),(LUNCH BAG RED RETROSPOT),0.063778,0.078357,0.032114,0.503535,6.426188,0.027117,1.856411
747,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE RED ),0.049098,0.052655,0.032064,0.653061,12.402571,0.029479,2.730582
746,(ALARM CLOCK BAKELIKE RED ),(ALARM CLOCK BAKELIKE GREEN),0.052655,0.049098,0.032064,0.608944,12.402571,0.029479,2.431625
650,(GREEN REGENCY TEACUP AND SAUCER),(PINK REGENCY TEACUP AND SAUCER),0.050752,0.038327,0.031663,0.623889,16.278213,0.029718,2.556890
649,(PINK REGENCY TEACUP AND SAUCER),(GREEN REGENCY TEACUP AND SAUCER),0.038327,0.050752,0.031663,0.826144,16.278213,0.029718,5.459963


In [32]:
# Guarda resultados de mod0 en carpeta output
mod0Sorted.to_csv('../output/individualItems.csv')

In [33]:
# Guarda resultados de mod1 en carpeta output
mod1Sorted.to_csv('../output/associationRules.csv')