In [7]:
import pandas as pd
import numpy as np 
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Import the cleaned dataset
csv_file_path = '..\\dataset\\international_top_terms_cleaned.csv'
df = pd.read_csv(csv_file_path)
df.head()

Unnamed: 0,country_code,region_name,region_code,week,score,rank,country_name,term,refresh_date
0,FR,Aquitaine,FR-B,2019-09-15,57.0,14,France,Apple,2024-06-14
1,FR,Auvergne,FR-C,2019-09-08,83.0,14,France,Apple,2024-06-14
2,FR,Burgundy,FR-D,2019-09-01,100.0,14,France,Apple,2024-06-14
3,FR,Burgundy,FR-D,2021-09-12,62.0,14,France,Apple,2024-06-14
4,FR,Burgundy,FR-D,2021-11-21,64.0,14,France,Apple,2024-06-14


In [3]:
df.columns

Index(['country_code', 'region_name', 'region_code', 'week', 'score', 'rank',
       'country_name', 'term', 'refresh_date'],
      dtype='object')

In [4]:
# Splitting the data in accordance to the country

# Top Terms in Australia
basket_Australia = (df[df['country_name'] == "Australia"]
          .groupby(['region_name', 'term'])['score'] 
          .mean().unstack().reset_index().fillna(0)
          .set_index('region_name'))

print(basket_Australia)

term                          Alex de Minaur  Alexa Leary  Alice Springs  \
region_name                                                                
Australian Capital Territory       62.000000          0.0      57.000000   
New South Wales                    56.500000          0.0       0.000000   
Northern Territory                100.000000        100.0      77.589744   
Queensland                         75.333333          0.0      54.400000   
South Australia                    72.000000          0.0      64.214286   
Tasmania                           75.000000          0.0      61.000000   
Victoria                           59.000000          0.0      65.666667   
Western Australia                  82.000000          0.0      66.750000   

term                              Apple  Ash Barty  Bacchus Marsh Grammar  \
region_name                                                                 
Australian Capital Territory  65.750000        0.0                    0.0   
New Sout

In [5]:
def hot_encode(x):
    if(x <= 0): 
        return 0
    if(x >= 1): 
        return 1

basket_encoded = basket_Australia.map(hot_encode) 
basket_Australia = basket_encoded

print(basket_Australia)

term                          Alex de Minaur  Alexa Leary  Alice Springs  \
region_name                                                                
Australian Capital Territory               1            0              1   
New South Wales                            1            0              0   
Northern Territory                         1            1              1   
Queensland                                 1            0              1   
South Australia                            1            0              1   
Tasmania                                   1            0              1   
Victoria                                   1            0              1   
Western Australia                          1            0              1   

term                          Apple  Ash Barty  Bacchus Marsh Grammar  \
region_name                                                             
Australian Capital Territory      1          0                      0   
New South Wales     

In [6]:
# Building the model 
frq_items = apriori(basket_Australia.astype('bool'), min_support = 0.8, use_colnames = True)

print("After frequent Items")
  
# Collecting the inferred rules in a dataframe 
rules = association_rules(frq_items, metric ="lift", min_threshold = 0.5
                          ) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
display(rules)

After frequent Items


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
124,(Nvidia),(RBA),0.875,0.875,0.875,1.000,1.142857,0.109375,inf,1.0
125,(RBA),(Nvidia),0.875,0.875,0.875,1.000,1.142857,0.109375,inf,1.0
126,(The Boys),(Nvidia),0.875,0.875,0.875,1.000,1.142857,0.109375,inf,1.0
127,(Nvidia),(The Boys),0.875,0.875,0.875,1.000,1.142857,0.109375,inf,1.0
136,(The Boys),(RBA),0.875,0.875,0.875,1.000,1.142857,0.109375,inf,1.0
...,...,...,...,...,...,...,...,...,...,...
198265,(Apple),"(Cricket, Premier League, The Boys, UFC, Full ...",1.000,0.875,0.875,0.875,1.000000,0.000000,1.0,0.0
198266,(Full moon),"(Cricket, Premier League, The Boys, UFC, Apple...",1.000,0.875,0.875,0.875,1.000000,0.000000,1.0,0.0
198268,(Alex de Minaur),"(Cricket, Premier League, The Boys, UFC, Apple...",1.000,0.875,0.875,0.875,1.000000,0.000000,1.0,0.0
198269,(Champions League),"(Cricket, Premier League, The Boys, UFC, Apple...",1.000,0.875,0.875,0.875,1.000000,0.000000,1.0,0.0
