In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install bs4

In [None]:
#First we get all the pictures from a web site
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re

html = urlopen("https://www.producebluebook.com/2021/02/16/the-fresh-market-automates-fresh-ordering/#")
bs = BeautifulSoup(html, 'html.parser')
images = bs.find_all('img', {'src':re.compile('.jpg')})
image_list= list()
for image in images: 
    print(image['src']+'\n')
    image_list.append(image['src'])

In [None]:
#Then we will visualize the one I want to show
url = "https://www.producebluebook.com/wp-content/uploads/2021/02/the-fresh-market.jpg"
from PIL import Image
import requests
import matplotlib.pyplot as plt

response = requests.get(url, stream=True)
img = Image.open(response.raw)
plt.figure(figsize=(15,10))
plt.imshow(img)
plt.show()

## 1. General Information About Association Rule Learning

<font color= "blue">

*Association rule learning is a rule-based machine learning method for discovering interesting relations between variables in large data
    
*For example, the rule {onions,potatoes} => {burger} found in the sales data of a supermarket would indicate that if a customer buys onions and potatoes together, they are likely to also buy hamburger meat. Such information can be used as the basis for decisions about marketing activities such as, e.g., promotional pricing or product placements.

In [None]:
plt.imshow(plt.imread("../input/capture1/Capture1.PNG"))

<font color= "blue">
The table above reveals the users that make the market transactions that contain the products purchased. The aim is to put some potential rules from the dataset. Websites like Netflix, IMDB, and Youtube use some more complex type of Apriori model and make some recommendations like “People watch this movie also watch these ones”.

## 2. How Apriori Works

<font color= "blue">
1.Support: Support refers to the default popularity of an item and can be calculated by finding number of transactions containing a particular item divided by total number of transactions.The number of an Specific Item`s transactions among all of the transactions:
    
For example: the support of {apple} is 4 out of 8. 
                
                                                Support= 4/8 = %50

In [None]:
plt.figure(figsize=(15,12))
plt.imshow(plt.imread("../input/capture-2/Capture1.PNG"))

<font color= "blue">
2.Confidence: Confidence refers to the likelihood that an item B is also bought if item A is bought. It can be calculated by finding the number of transactions where A and B are bought together, divided by total number of transactions where A is bought
    
Example: In the picture above, there are 3 transactions which contain both apple and bear among all apple transactions. Therefore;
    
                Confidence= 3/4= %75(The possibility of buying bear among apple buyers is %75)-(In apple-bear example in the figure above)

<font color= "blue">
3.Lift: Lift(A -> B) refers to the increase in the ratio of sale of B when A is sold. Lift(A –> B) can be calculated by dividing Confidence(A -> B) divided by Support(B).It is the division of Confidence by Support:
    
                            Lift = Confidence/Support = %75/%50= 0.75:0.5= 1.5 (In apple-bear example)

Lift basically tells us that the likelihood of buying a bear and apple together is 1.5 times more than the likelihood of just buying the apple. A Lift of 1 means there is no association between products A and B. Lift of greater than 1 means products A and B are more likely to be bought together. Finally, Lift of less than 1 refers to the case where two products are unlikely to be bought together.

## 3. Data Analysis

In [None]:
df= pd.read_csv("../input/market-basket-optimization/Market_Basket_Optimisation.csv",header=None)
#By default, pd.read_csv function treats first row as header.
#To get rid of this problem, add header=None option to pd.read_csv function, as shown above:
df.head()

In [None]:
popular=df[0].value_counts().head(70) #Here we list the most popular items
popular

In [None]:
plt.figure(figsize=(18,8))
color = plt.cm.copper(np.linspace(0, 1, 40))
df[0].value_counts().head(40).plot.bar(color = color)
plt.title('frequency of most popular items', fontsize = 20)
plt.xticks(rotation = 90 )
plt.grid()
plt.show()

In [None]:
popular = pd.DataFrame(popular)
popular

In [None]:
popular.index

In [None]:
import squarify
plt.figure(figsize=(18,12))
color = plt.cm.cool(np.linspace(0, 1, 50))
squarify.plot(sizes = popular.values,label = popular.index , alpha=.8, color = color)
plt.title('Visualization of Popular Items')
plt.axis('off')
plt.show()
#Here we visualize the most 70. popular items

## 4. Training the Apriori Model

<font color= "blue">
Apriori Algorithm contains 5 steps:

1) Transfrom Your Data into A List:
    The algorithm in the apyori package is implemented in such a way that the input to the algorithm is a list of lists rather than a data frame. So we need to convert the data into a list of lists.
    
2) Set minimum support and confidence
    
3) Take all the subsets of the transactions that have higher support than minimum support.
    
4) Take all the rules of the subsets that have higher confidence than minimum confidence.
    
5) Sort the rules by decreasing lift

In [None]:
#First we transforn dataframe into a list of transactions
products= list()
for i in range(0,7501):
    products.append([str(df.values[i,j]) for j in range(20)])



<font color= "blue">
The Apriori library we use requires our dataset to be in the form of a list of lists, where the whole dataset is a big list and each transaction in the dataset is an inner list within the outer big list. 

In [None]:
products[1] #This is our first transaction in python list

In [None]:
!pip install apyori

In [None]:
from apyori import apriori

<font color= "blue">
The apriori class requires some parameter values to work. The first parameter is the list of list that you want to extract rules from. The second parameter is the min_support parameter. This parameter is used to select the items with support values greater than the value specified by the parameter. Next, the min_confidence parameter filters those rules that have confidence greater than the confidence threshold specified by the parameter. Similarly, the min_lift parameter specifies the minimum lift value for the short listed rules. Finally, the min_length parameter specifies the minimum number of items that you want in your rules.

<font color= "blue">
Let's suppose that we want rules for only those items that are purchased at least 3 times a day, or 7 x 3 = 21 times in one week, since our dataset is for a one-week time period. The support for those items can be calculated as 21/7500 = 0.0028. We will rool to 0.003. The minimum confidence for the rules is 20% or 0.2. Similarly, we specify the value for lift as 3 and finally min_length is 2 since we want at least two products in our rules. We will also use max_length parameter because we want only to see birelations between items. Lets assume that the market will make a marketting campaign that will promote one item as a target and the other one as a free gift when the target item are bought. 

In [None]:
21/7500

In [None]:
model= apriori(transactions=products, # This represents list of string of my products to work on
              min_support= 0.003, # This represents minimum support of relations
              min_confidence= 0.2, # This represents minimum confidence of relations
              min_lift=3, # This represents minimum lift of relations
              min_length = 2 , # This represents minimum length of the relation
              max_length = 2 ) # This represents maximum length of the relation

In [None]:
results = list(model)
results

In [None]:
pd.DataFrame(results)

In [None]:
bought_item = [tuple(result[2][0][0])[0] for result in results]
will_buy_item = [tuple(result[2][0][1])[0] for result in results]
support_values = [result[1] for result in results]
confidences = [result[2][0][2] for result in results]
lift_values = [result[2][0][3] for result in results]

In [None]:
new_data = list(zip(bought_item,will_buy_item,support_values,confidences,lift_values))
new_data

In [None]:
new_df=pd.DataFrame(new_data,columns=["Boungt Item", "Expected To Be Bought", "Support", "Confidence","Lift"])
new_df

<font color="blue">
The support value for the first rule is 0.004533. This number is calculated by dividing the number of transactions containing light cream divided by total number of all transactions. The confidence level for the rule is 0.290598 which shows that out of all the transactions that contain light cream, 29.0598% of the transactions also contain chicken. Finally, the lift of 4.84 tells us that chicken is 4.84 times more likely to be bought by the customers who buy light cream compared to the default likelihood of the sale of chicken.

In [None]:
new_df.nlargest(n=10,columns="Lift") # n parameter determines how many rows we want to get, 
                                    # columns parameter determines which will be based to organize data
#we list the our data according to the LiftColumn

In [None]:
new_df.nlargest(n=10,columns="Confidence") #Now we list the our data according to the Confidence Column
# This means that if a customer buy a tomato sauce, the posibility ot buy ground beef is %37

<font color="blue">
Now we will change some parameters

In [None]:
model2= apriori(transactions=products, # This represents list of string of my products to work on
              min_support= 0.004, # This represents minimum support of relations
              min_confidence= 0.5, # This represents minimum confidence of relations
              min_lift=3, # This represents minimum lift of relations
              min_length = 2 , # This represents minimum length of the relation
              max_length = 3 ) # This represents maximum length of the relation

In [None]:
results2 = list(model2)
results2