In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from mlxtend.frequent_patterns import apriori, association_rules

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
groceryData = pd.read_csv('../input/supermarket/GroceryStoreDataSet.csv',names=['Products'],header=None)

In [None]:
groceryData.head()

In [None]:
groceryData.info()

**Lets extract all unique items**

In [None]:
items=[]
for i in groceryData.values:
    items.extend( i[0].split(",")  )
items=list(set(items))
items

In [None]:
df=pd.DataFrame(data=0,columns=items,index=range(len(groceryData)))
for i in df.columns:
    df[i] = groceryData['Products'].str.contains(i)
df

The data frame is ready now. Lets dive into exploring the association between items.
We will take 0.1 as min support value. That will eliminate any item has less than 0.1 support value.

In [None]:
df_freq = apriori(df, min_support = 0.1, use_colnames = True)
df_freq

In [None]:
association_rules(df_freq, metric = "lift", min_threshold = 1).sort_values(by=['antecedent support','confidence'],ascending=False).reset_index(drop=True).head(20)

**Result**

* We can see the item which is getting sold the most is in the "antecedents" column with its support value in the "antecedent support" column. So 1) BREAD 2) COFFEE 3) TEA are getting sold the most.
* After getting the first item in the "antecedent"(with "antecedent support" frequency), in the "consequents" column we can see the item which are most probably will be sold together(with "confidence" value frequency)
* "confidence" tells us how sure we are about selling the 2. item after selling the 1. item(that might(not) be the impact of first item , while "lift" tells  how selling the 1. item affect the proabability of selling the 2. item.

So based on the result some strategical actions can be taken to increase selling items. Also new products can be tried and after analysing the results, the best ones can be replaced with the items which are not getting sold well.

Bonus

We can also count the items and sort them based on the how frequently they have been sold as 2/3/4 items

In [None]:
df_freq['item_count'] = df_freq['itemsets'].apply(lambda x:len(x))

In [None]:
df_freq[(df_freq['item_count']==2) & (df_freq['support']>0.1)].sort_values(by='support',ascending=False)
