In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5

In [6]:
df=pd.read_csv('BlackFriday.csv')

In [7]:
df.head()

Unnamed: 0,User_ID,Product_ID,Gender,Age,Occupation,City_Category,Stay_In_Current_City_Years,Marital_Status,Product_Category_1,Product_Category_2,Product_Category_3,Purchase
0,1000001,P00069042,F,0-17,10,A,2,0,3,,,8370
1,1000001,P00248942,F,0-17,10,A,2,0,1,6.0,14.0,15200
2,1000001,P00087842,F,0-17,10,A,2,0,12,,,1422
3,1000001,P00085442,F,0-17,10,A,2,0,12,14.0,,1057
4,1000002,P00285442,M,55+,16,C,4+,0,8,,,7969


In [8]:
lst=[]

#get a list of items purchased by each user
for item in df['User_ID'].unique():
    lst2=list(set(df[df['User_ID']==item]['Product_ID']))
    if len(lst2)>0:
        lst.append(lst2)

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

#create a one-hot-encoded array based on the list of items purchased by each user
te=TransactionEncoder()
te_data=te.fit(lst).transform(lst)
df_x=pd.DataFrame(te_data,columns=te.columns_)

In [9]:
#Extract frequent itemsets for association rule mining
frequent_items=apriori(df_x,use_colnames=True,min_support=0.03)
frequent_items.head()

Unnamed: 0,support,itemsets
0,0.191818,(P00000142)
1,0.062977,(P00000242)
2,0.040401,(P00000342)
3,0.086912,(P00000642)
4,0.040401,(P00000742)


In [10]:
rules=association_rules(frequent_items,metric='lift',min_threshold=1)
rules.antecedents=rules.antecedents.apply(lambda x: next(iter(x)))
rules.consequents=rules.consequents.apply(lambda x: next(iter(x)))
rules=rules.sort_values('lift',ascending=False)

rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
9621,P00106742,P00070942,0.081141,0.062977,0.030555,0.376569,5.979429,0.025445,1.503009
9620,P00070942,P00106742,0.062977,0.081141,0.030555,0.485175,5.979429,0.025445,1.7848
4292,P00220142,P00032042,0.072653,0.092344,0.03395,0.46729,5.060301,0.027241,1.703845
4293,P00032042,P00220142,0.092344,0.072653,0.03395,0.367647,5.060301,0.027241,1.466502
4269,P00032042,P00127442,0.092344,0.089798,0.041928,0.454044,5.056283,0.033636,1.667171


### Network Graph using Plotly

In [11]:
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

import networkx as nx

nx_data=rules[rules.lift>=3]
GA=nx.from_pandas_edgelist(nx_data,source='antecedents',target='consequents',edge_attr='lift')
#pos=nx.kamada_kawai_layout(GA,weight='lift')
pos = nx.nx_agraph.graphviz_layout(GA)
pos = nx.nx_agraph.graphviz_layout(GA, prog='dot')

edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

for edge in GA.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=2)))

for node in GA.nodes():
    x, y = pos[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

for node,adjacencies in enumerate(GA.adjacency()):
    node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    node_info = str(adjacencies[0])+' - # of connections: '+str(len(adjacencies[1]))
    node_trace['text']+=tuple([node_info])

fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Network graph',
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plot.ly/ipython-notebooks/network-graphs/'> https://plot.ly/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

py.iplot(fig, filename='networkx')