## Installing  necessary Dependencies

In [1]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import  association_rules ,apriori,fpgrowth
import networkx as nx

## Loading and Reading Datasets

In [2]:
basket=pd.read_csv("mba.csv")
basket.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [3]:
print("Database dimension :", basket.shape)
print("Database size      :", basket.size)

Database dimension : (38765, 3)
Database size      : 116295


In [4]:
basket.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38765 entries, 0 to 38764
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Member_number    38765 non-null  int64 
 1   Date             38765 non-null  object
 2   itemDescription  38765 non-null  object
dtypes: int64(1), object(2)
memory usage: 908.7+ KB


In [5]:
uniqueTransactions=basket['Member_number'].nunique()
print("No of unique Transactions =",uniqueTransactions)

No of unique Transactions = 3898


In [6]:
basket.describe(include=object)

Unnamed: 0,Date,itemDescription
count,38765,38765
unique,728,167
top,21-01-2015,whole milk
freq,96,2502


## Data Exploration and Visualization

In [7]:
itemFrequency = basket['itemDescription'].value_counts().sort_values(ascending=False)
itemFrequency.head(10)

itemDescription
whole milk          2502
other vegetables    1898
rolls/buns          1716
soda                1514
yogurt              1334
root vegetables     1071
tropical fruit      1032
bottled water        933
sausage              924
citrus fruit         812
Name: count, dtype: int64

In [8]:
fig = px.bar(itemFrequency.head(20), title='20 Most Frequent Items', color=itemFrequency.head(20), color_continuous_scale=px.colors.sequential.Mint)
fig.update_traces(texttemplate='%{y}', textposition='outside', hovertemplate = '<b>%{x}</b><br>No. of Transactions: %{y}')
fig.show()

In [9]:
dateTime=pd.to_datetime(basket['Date'])
basket['Day']=dateTime.dt.day_name()
basket['Month']=dateTime.dt.month_name()
basket['Year']=dateTime.dt.year
basket.head(5)





Unnamed: 0,Member_number,Date,itemDescription,Day,Month,Year
0,1808,21-07-2015,tropical fruit,Tuesday,July,2015
1,2552,05-01-2015,whole milk,Monday,January,2015
2,2300,19-09-2015,pip fruit,Saturday,September,2015
3,1187,12-12-2015,other vegetables,Saturday,December,2015
4,3037,01-02-2015,whole milk,Sunday,February,2015


In [10]:
MostProductiveDay= basket.groupby('Day')['itemDescription'].count().sort_values(ascending=False)
MostProductiveDay

Day
Thursday     5620
Friday       5562
Wednesday    5562
Tuesday      5558
Saturday     5551
Sunday       5530
Monday       5382
Name: itemDescription, dtype: int64

In [11]:
fig = px.bar(MostProductiveDay, title='Most Productive Day', color=MostProductiveDay, color_continuous_scale=px.colors.sequential.Mint)
#fig.update_layout(margin=dict(t=50, b=0, l=0, r=0), titlefont=dict(size=20), xaxis_tickangle=0, plot_bgcolor='white', coloraxis_showscale=False)
fig.update_yaxes(showticklabels=False, title=' ')
fig.update_xaxes(title=' ')
fig.update_traces(texttemplate='%{y}', textposition='outside', hovertemplate = '<b>%{x}</b><br>No. of Transactions: %{y}')
fig.show()

In [12]:
MostProductiveMonth = basket.groupby('Month')['itemDescription'].count().sort_values(ascending=False)
MostProductiveMonth

Month
August       3496
May          3408
January      3324
July         3300
June         3264
October      3261
April        3260
November     3254
March        3133
September    3059
December     3009
February     2997
Name: itemDescription, dtype: int64

In [13]:
fig = px.bar(MostProductiveMonth, title='Most Productive Month', color=MostProductiveMonth, color_continuous_scale=px.colors.sequential.Mint)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=0), titlefont=dict(size=20), xaxis_tickangle=0, plot_bgcolor='white', coloraxis_showscale=False)
fig.update_traces(texttemplate='%{y}', textposition='outside', hovertemplate = '<b>%{x}</b><br>No. of Transactions: %{y}')
fig.show()

In [14]:
MostProductiveYear= basket.groupby('Year')['itemDescription'].count().sort_values(ascending=False)
MostProductiveYear

Year
2015    20488
2014    18277
Name: itemDescription, dtype: int64

In [15]:
fig = px.bar(MostProductiveYear, title='Most Productive Year', color=MostProductiveYear, color_continuous_scale=px.colors.sequential.Mint)
fig.update_layout(margin=dict(t=50, b=0, l=0, r=0), titlefont=dict(size=20), xaxis_tickangle=0, plot_bgcolor='white', coloraxis_showscale=False)
fig.update_yaxes(showticklabels=False, title=' ')
fig.update_xaxes(title=' ')
fig.update_traces(texttemplate='%{y}', textposition='outside', hovertemplate = '<b>%{x}</b><br>No. of Transactions: %{y}')
fig.show()

## Association Rules Generation

### Data Preparation for Association Rule Mining

##### List of all the transactions

In [16]:
transactions=[]
for item in basket['Member_number'].unique():
    lst=list(set(basket[basket['Member_number']==item]['itemDescription']))
    transactions.append(lst)

transactions[0:2]

[['rolls/buns',
  'napkins',
  'meat',
  'candy',
  'long life bakery product',
  'citrus fruit',
  'sugar',
  'semi-finished bread',
  'tropical fruit',
  'whole milk'],
 ['root vegetables',
  'shopping bags',
  'butter',
  'other vegetables',
  'chocolate',
  'tropical fruit',
  'hygiene articles',
  'whole milk',
  'coffee',
  'pot plants',
  'female sanitary products']]

##### One Hot encoding 

In [17]:
te = TransactionEncoder()
encodedData = te.fit(transactions).transform(transactions)
data = pd.DataFrame(encodedData, columns=te.columns_)
data.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,True,False,False


## Association Rules Generation

#### Frequent items

In [18]:
frequentItems= apriori(data, use_colnames=True, min_support=0.02)
frequentItems.head()

Unnamed: 0,support,itemsets
0,0.078502,(UHT-milk)
1,0.031042,(baking powder)
2,0.119548,(beef)
3,0.079785,(berries)
4,0.062083,(beverages)


#### Association rule

In [19]:
rules = association_rules(frequentItems, metric="lift", min_threshold=1)
rules = rules.sort_values("lift",ascending=False).reset_index(drop= True)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(whole milk, sausage)","(rolls/buns, yogurt)",0.106978,0.111339,0.022832,0.213429,1.916929,0.010921,1.129791,0.535633
1,"(rolls/buns, yogurt)","(whole milk, sausage)",0.111339,0.106978,0.022832,0.205069,1.916929,0.010921,1.123396,0.538262
2,"(yogurt, whole milk)","(rolls/buns, sausage)",0.15059,0.08235,0.022832,0.151618,1.841148,0.010431,1.081648,0.537856
3,"(rolls/buns, sausage)","(yogurt, whole milk)",0.08235,0.15059,0.022832,0.277259,1.841148,0.010431,1.175261,0.497859
4,"(sausage, whole milk)","(yogurt, other vegetables)",0.106978,0.120318,0.023089,0.215827,1.793806,0.010217,1.121796,0.495538


####  Rules Visualization

In [20]:
network_A = list(rules["antecedents"].unique())
network_B = list(rules["consequents"].unique())
node_list = list(set(network_A + network_B))
G = nx.Graph()
for i in node_list:
    G.add_node(i)
for i,j in rules.iterrows():
    G.add_edges_from([(j["antecedents"], j["consequents"])])
pos = nx.spring_layout(G, k=0.5, dim=2, iterations=400)
for n, p in pos.items():
    G.nodes[n]['pos'] = p

edge_trace = go.Scatter(x=[], y=[], line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text',
    marker=dict(showscale=True, colorscale='Burg', reversescale=True, color=[], size=15,
    colorbar=dict(thickness=10, title='Node Connections', xanchor='left', titleside='right')))

for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

for node, adjacencies in enumerate(G.adjacency()):
    node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    node_info = str(adjacencies[0]) +'<br>No of Connections: {}'.format(str(len(adjacencies[1])))
    node_trace['text']+=tuple([node_info])

fig = go.Figure(data=[edge_trace, node_trace], 
    layout=go.Layout(title='Item Connections Network', titlefont=dict(size=20),
    plot_bgcolor='white', showlegend=False, margin=dict(b=0,l=0,r=0,t=50),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

iplot(fig)


## Refining Rules

In [21]:
index_names = rules[rules['consequents'] == 'whole milk'].index
refinedRules = rules.drop(index_names).sort_values('lift', ascending=False)
refinedRules.drop(['leverage','conviction'], axis=1, inplace=True)
refinedRules = refinedRules.reset_index()
refinedRules

Unnamed: 0,index,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,zhangs_metric
0,0,"(whole milk, sausage)","(rolls/buns, yogurt)",0.106978,0.111339,0.022832,0.213429,1.916929,0.535633
1,1,"(rolls/buns, yogurt)","(whole milk, sausage)",0.111339,0.106978,0.022832,0.205069,1.916929,0.538262
2,2,"(yogurt, whole milk)","(rolls/buns, sausage)",0.150590,0.082350,0.022832,0.151618,1.841148,0.537856
3,3,"(rolls/buns, sausage)","(yogurt, whole milk)",0.082350,0.150590,0.022832,0.277259,1.841148,0.497859
4,4,"(sausage, whole milk)","(yogurt, other vegetables)",0.106978,0.120318,0.023089,0.215827,1.793806,0.495538
...,...,...,...,...,...,...,...,...,...
2951,2951,"(soda, tropical fruit)",(other vegetables),0.081837,0.376603,0.031042,0.379310,1.007188,0.007773
2952,2952,(root vegetables),(chocolate),0.230631,0.086455,0.020010,0.086763,1.003568,0.004621
2953,2953,(chocolate),(root vegetables),0.086455,0.230631,0.020010,0.231454,1.003568,0.003892
2954,2954,(brown bread),(whipped/sour cream),0.135967,0.154695,0.021036,0.154717,1.000144,0.000167


In [22]:
rules.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2956 entries, 0 to 2955
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   antecedents         2956 non-null   object 
 1   consequents         2956 non-null   object 
 2   antecedent support  2956 non-null   float64
 3   consequent support  2956 non-null   float64
 4   support             2956 non-null   float64
 5   confidence          2956 non-null   float64
 6   lift                2956 non-null   float64
 7   leverage            2956 non-null   float64
 8   conviction          2956 non-null   float64
 9   zhangs_metric       2956 non-null   float64
dtypes: float64(8), object(2)
memory usage: 231.1+ KB


In [23]:
rules.to_csv("Rules.csv")

In [24]:
def remove_from_list(y, item_to_search):
    newlist = list()
    for i in y:
        if i not in item_to_search:
            newlist.append(i)
    return newlist

In [25]:
def search_list(item_to_search, list_to_search = rules['antecedents']):
    print(item_to_search)
    max_lift = 0
    item_to_recommend = ''
    for i, item in enumerate(list_to_search):
        if set(list(item_to_search)).issubset(set(list(item))):
            if rules['lift'][i] > max_lift:
                max_lift = rules['lift'][i]
                y = list(rules['antecedents'][i])
                x = remove_from_list(y, item_to_search)
                item_to_recommend = list(rules['consequents'][i]) + x
    
    if item_to_recommend == '':
        item_to_recommend = []
        print(f"Oops! No product recommendations available right now!: {item_to_recommend}")
    else:
        print(f"People who bought this also bought: {item_to_recommend}")
    return item_to_search, item_to_recommend

In [26]:
product_catalog=basket["itemDescription"].unique()

In [27]:
dict_to_store = {}
for i in range(len(product_catalog)):
    key, value = search_list([product_catalog[i]])
    dict_to_store[key[0]] = value

['tropical fruit']
People who bought this also bought: ['yogurt', 'whole milk', 'other vegetables']
['whole milk']
People who bought this also bought: ['rolls/buns', 'yogurt', 'sausage']
['pip fruit']
People who bought this also bought: ['pastry', 'whole milk']
['other vegetables']
People who bought this also bought: ['sausage', 'whole milk', 'yogurt']
['rolls/buns']
People who bought this also bought: ['whole milk', 'sausage', 'yogurt']
['pot plants']
Oops! No product recommendations available right now!: []
['citrus fruit']
People who bought this also bought: ['other vegetables', 'sausage']
['beef']
People who bought this also bought: ['whole milk', 'root vegetables']
['frankfurter']
People who bought this also bought: ['margarine']
['chicken']
People who bought this also bought: ['citrus fruit']
['butter']
People who bought this also bought: ['yogurt', 'other vegetables']
['fruit/vegetable juice']
People who bought this also bought: ['yogurt', 'other vegetables']
['packaged fruit/ve

In [28]:
dict_to_store['tropical fruit']

['yogurt', 'whole milk', 'other vegetables']

In [29]:
import json

json_file = json.dumps(dict_to_store)
# open file for writing, "w" 
f = open("item_sets.json","w")
# write json object to file
f.write(json_file)

# close file
f.close()

In [30]:
 #Opening JSON file
with open('item_sets.json') as json_file:
    data = json.load(json_file)

In [31]:
for a in data['tropical fruit']:
    print(a)

yogurt
whole milk
other vegetables


In [32]:
te = TransactionEncoder()
encodedData = te.fit(transactions).transform(transactions)
data1= pd.DataFrame(encodedData, columns=te.columns_)
data1.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,True,False,False


In [33]:
ft=fpgrowth(data1,use_colnames=True,min_support=0.02)
ft['length'] = ft['itemsets'].apply(lambda x: len(x))
ft.head()

Unnamed: 0,support,itemsets,length
0,0.458184,(whole milk),1
1,0.349666,(rolls/buns),1
2,0.23371,(tropical fruit),1
3,0.18548,(citrus fruit),1
4,0.081324,(napkins),1


In [34]:
rules1 = association_rules(ft, metric="lift", min_threshold=0.2).iloc[:,:-3]
rules1 = rules1.sort_values("lift",ascending=False).reset_index(drop= True)
rules1.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,"(sausage, whole milk)","(rolls/buns, yogurt)",0.106978,0.111339,0.022832,0.213429,1.916929
1,"(rolls/buns, yogurt)","(sausage, whole milk)",0.111339,0.106978,0.022832,0.205069,1.916929
2,"(yogurt, whole milk)","(rolls/buns, sausage)",0.15059,0.08235,0.022832,0.151618,1.841148
3,"(rolls/buns, sausage)","(yogurt, whole milk)",0.08235,0.15059,0.022832,0.277259,1.841148
4,"(yogurt, other vegetables)","(sausage, whole milk)",0.120318,0.106978,0.023089,0.191898,1.793806


In [35]:
network_A = list(rules1["antecedents"].unique())
network_B = list(rules1["consequents"].unique())
node_list = list(set(network_A + network_B))
G = nx.Graph()
for i in node_list:
    G.add_node(i)
for i,j in rules.iterrows():
    G.add_edges_from([(j["antecedents"], j["consequents"])])
pos = nx.spring_layout(G, k=0.5, dim=2, iterations=400)
for n, p in pos.items():
    G.nodes[n]['pos'] = p

edge_trace = go.Scatter(x=[], y=[], line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

node_trace = go.Scatter(x=[], y=[], text=[], mode='markers', hoverinfo='text',
    marker=dict(showscale=True, colorscale='Burg', reversescale=True, color=[], size=15,
    colorbar=dict(thickness=10, title='Node Connections', xanchor='left', titleside='right')))

for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

for node, adjacencies in enumerate(G.adjacency()):
    node_trace['marker']['color']+=tuple([len(adjacencies[1])])
    node_info = str(adjacencies[0]) +'<br>No of Connections: {}'.format(str(len(adjacencies[1])))
    node_trace['text']+=tuple([node_info])

fig = go.Figure(data=[edge_trace, node_trace], 
    layout=go.Layout(title='Item Connections Network', titlefont=dict(size=20),
    plot_bgcolor='white', showlegend=False, margin=dict(b=0,l=0,r=0,t=50),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

iplot(fig)


In [36]:
index_names = rules1[rules1['consequents'] == 'whole milk'].index
refinedRules1 = rules1.drop(index_names).sort_values('lift', ascending=False)
refinedRules1 = refinedRules1.reset_index()
refinedRules1

Unnamed: 0,index,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
0,0,"(sausage, whole milk)","(rolls/buns, yogurt)",0.106978,0.111339,0.022832,0.213429,1.916929
1,1,"(rolls/buns, yogurt)","(sausage, whole milk)",0.111339,0.106978,0.022832,0.205069,1.916929
2,2,"(yogurt, whole milk)","(rolls/buns, sausage)",0.150590,0.082350,0.022832,0.151618,1.841148
3,3,"(rolls/buns, sausage)","(yogurt, whole milk)",0.082350,0.150590,0.022832,0.277259,1.841148
4,4,"(yogurt, other vegetables)","(sausage, whole milk)",0.120318,0.106978,0.023089,0.191898,1.793806
...,...,...,...,...,...,...,...,...
2985,2985,(soda),(salty snack),0.313494,0.069266,0.020523,0.065466,0.945142
2986,2986,(whipped/sour cream),(shopping bags),0.154695,0.168291,0.024371,0.157546,0.936148
2987,2987,(shopping bags),(whipped/sour cream),0.168291,0.154695,0.024371,0.144817,0.936148
2988,2988,(tropical fruit),(margarine),0.233710,0.116983,0.025398,0.108672,0.928953


In [37]:
rules1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2990 entries, 0 to 2989
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   antecedents         2990 non-null   object 
 1   consequents         2990 non-null   object 
 2   antecedent support  2990 non-null   float64
 3   consequent support  2990 non-null   float64
 4   support             2990 non-null   float64
 5   confidence          2990 non-null   float64
 6   lift                2990 non-null   float64
dtypes: float64(5), object(2)
memory usage: 163.6+ KB


In [38]:
rules1.to_csv("Rules1.csv")

In [39]:
def remove_from_list(y, item_to_search):
    newlist = list()
    for i in y:
        if i not in item_to_search:
            newlist.append(i)
    return newlist

In [72]:
def search_list(item_to_search, list_to_search = rules['antecedents']):
    print(item_to_search)
    max_lift = 0
    item_to_recommend = ''
    for i, item in enumerate(list_to_search):
        if set(list(item_to_search)).issubset(set(list(item))):
            if rules['lift'][i] > max_lift:
                max_lift = rules['lift'][i]
                y = list(rules['antecedents'][i])
                x = remove_from_list(y, item_to_search)
                item_to_recommend = list(rules['consequents'][i]) + x
    
    if item_to_recommend == '':
        item_to_recommend = []
        print(f"Oops! No product recommendations available right now!: {item_to_recommend}")
    else:
        print(f"People who bought this also bought: {item_to_recommend}")
    return item_to_search, item_to_recommend

In [73]:
product_catalog=basket["itemDescription"].unique()

In [74]:
store = {}
for i in range(len(product_catalog)):
    key, value = search_list([product_catalog[i]])
    store[key[0]] = value

['tropical fruit']
People who bought this also bought: ['yogurt', 'whole milk', 'other vegetables']
['whole milk']
People who bought this also bought: ['rolls/buns', 'yogurt', 'sausage']
['pip fruit']
People who bought this also bought: ['pastry', 'whole milk']
['other vegetables']
People who bought this also bought: ['sausage', 'whole milk', 'yogurt']
['rolls/buns']
People who bought this also bought: ['whole milk', 'sausage', 'yogurt']
['pot plants']
Oops! No product recommendations available right now!: []
['citrus fruit']
People who bought this also bought: ['other vegetables', 'sausage']
['beef']
People who bought this also bought: ['whole milk', 'root vegetables']
['frankfurter']
People who bought this also bought: ['margarine']
['chicken']
People who bought this also bought: ['citrus fruit']
['butter']
People who bought this also bought: ['yogurt', 'other vegetables']
['fruit/vegetable juice']
People who bought this also bought: ['yogurt', 'other vegetables']
['packaged fruit/ve

In [64]:
import json

json_file = json.dumps(dict_to_store)
# open file for writing, "w" 
f = open("fitem_sets.json","w")
# write json object to file
f.write(json_file)

# close file
f.close()

In [65]:
 #Opening JSON file
with open('fitem_sets.json') as json_file:
    data1 = json.load(json_file)

In [66]:
data1['tropical fruit']

['yogurt', 'whole milk', 'other vegetables']