# Imports

In [21]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Load Dataset 

In [22]:
data = pd.read_csv('cleaned_dataset.csv')

  data = pd.read_csv('cleaned_dataset.csv')


# Select relevant categorical columns for pattern mining

In [23]:
columns_of_interest = ['Category', 'Free', 'Content Rating', 'Ad Supported', 'In app purchases', 'Editor Choice']

# Create a list of transactions

In [24]:
transactions = []
for _, row in data.iterrows():
    transaction = []
    for col in columns_of_interest:
        transaction.append(f"{col}={row[col]}")
    transactions.append(transaction)

# Use TransactionEncoder to transform the list of transactions into a one-hot encoded DataFrame

In [25]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Display the first few rows of the transformed DataFrame
print("Transformed Data:")
print(df.head())

Transformed Data:
   Ad Supported=True  Ad Supported=nan  Category=Action  Category=Adventure  \
0               True             False            False               False   
1               True             False            False               False   
2               True             False            False               False   
3               True             False            False               False   
4               True             False            False               False   

   Category=Arcade  Category=Art & Design  Category=Auto & Vehicles  \
0            False                  False                     False   
1            False                  False                     False   
2            False                  False                     False   
3            False                  False                     False   
4            False                  False                     False   

   Category=Beauty  Category=Board  Category=Books & Reference  ...  \
0        

# Apply the Apriori algorithm to find frequent itemsets

In [27]:
frequent_itemsets = apriori(df, min_support=0.7, use_colnames=True)

# Display the frequent itemsets
print("Frequent Itemsets:")
print(frequent_itemsets)

Frequent Itemsets:
     support                                           itemsets
0   0.852162                          (Content Rating=Everyone)
1   0.998272                              (Editor Choice=False)
2   0.935996                                        (Free=True)
3   0.791676                           (In app purchases=False)
4   0.851147     (Content Rating=Everyone, Editor Choice=False)
5   0.795389               (Free=True, Content Rating=Everyone)
6   0.934352                   (Free=True, Editor Choice=False)
7   0.791355      (In app purchases=False, Editor Choice=False)
8   0.733936                (In app purchases=False, Free=True)
9   0.794404  (Free=True, Content Rating=Everyone, Editor Ch...
10  0.733674  (In app purchases=False, Free=True, Editor Cho...


# Extract association rules from the frequent itemsets


In [31]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Display the association rules
print("Association Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

Association Rules:
                            antecedents                           consequents  \
0             (Content Rating=Everyone)                 (Editor Choice=False)   
1                 (Editor Choice=False)             (Content Rating=Everyone)   
2              (In app purchases=False)                 (Editor Choice=False)   
3                 (Editor Choice=False)              (In app purchases=False)   
4  (Free=True, Content Rating=Everyone)                 (Editor Choice=False)   
5                 (Editor Choice=False)  (Free=True, Content Rating=Everyone)   
6   (In app purchases=False, Free=True)                 (Editor Choice=False)   
7                 (Editor Choice=False)   (In app purchases=False, Free=True)   

    support  confidence      lift  
0  0.851147    0.998808  1.000538  
1  0.851147    0.852620  1.000538  
2  0.791355    0.999594  1.001325  
3  0.791355    0.792725  1.001325  
4  0.794404    0.998762  1.000491  
5  0.794404    0.795779  1.000491  

In [32]:
# # Convert DataFrame to CSV string and then save
# frequent_itemsets_csv = frequent_itemsets.to_csv(index=False)
# with open('frequent_itemsets.csv', 'w', encoding='utf-8') as f:
#     f.write(frequent_itemsets_csv)

# rules_csv = rules.to_csv(index=False)
# with open('association_rules.csv', 'w', encoding='utf-8') as f:
#     f.write(rules_csv)