# Association Rules - Analysis

In [1]:
# Imports
import pickle
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

### Data

In [2]:
support_min    = 0.05 # 36 elements
confidence_min = 0.5

In [3]:
# Data
dataset_folder = 'data'
data_norm = pd.read_csv('%s/%s' % (dataset_folder, 'platform-game-without-class.txt'), header=None, sep=' ')
columns = pickle.load(open('%s/%s' % (dataset_folder, 'platform-game-column.bin'), 'rb'))
df_clustering = pd.read_parquet('%s/%s' % (dataset_folder, 'cluster-items.parquet'))

df_clustering.head(10)

Unnamed: 0,cluster,coverage,dimension,dimension.set,interval,dimension.size
120,2989,25,"[harmful-breakable-flyBig, harmful-breakable-s...","[harmful-breakable-flyBig, friendly-help, frie...","[[0, 5], [0, 1], [0, 2], [6, 11], [0, 154], [0...",6
110,2489,26,"[harmful-breakable-shooterFlyBig, harmful-inde...","[friendly-coin, friendly-vehicle, harmful-brea...","[[0, 1], [0, 2], [6, 11], [0, 154], [0, 250]]",5
111,2532,26,"[harmful-breakable-flyBig, harmful-breakable-s...","[harmful-breakable-flyBig, friendly-help, frie...","[[0, 5], [0, 1], [6, 11], [0, 154], [0, 250]]",5
119,2542,25,"[harmful-breakable-flyBig, harmful-indestructi...","[harmful-breakable-flyBig, friendly-coin, frie...","[[0, 5], [0, 2], [6, 11], [0, 154], [0, 250]]",5
125,1306,24,"[harmful-breakable-flyBig, harmful-breakable-s...","[harmful-breakable-shooterBig, harmful-breakab...","[[0, 6], [0, 1], [35, 51], [0, 398]]",4
106,1347,27,"[harmful-breakable-flyBig, harmful-indestructi...","[harmful-breakable-flyBig, harmful-indestructi...","[[0, 5], [0, 2], [6, 11], [0, 250]]",4
76,1620,38,"[harmful-breakable-flyBig, harmful-breakable-s...","[harmful-breakable-shooterBig, friendly-telepo...","[[0, 4], [0, 3], [206, 399], [0, 39]]",4
80,1250,32,"[harmful-breakable-shooterBig, harmful-indestr...","[harmful-breakable-shooterBig, harmful-indestr...","[[0, 5], [25, 36], [0, 2], [0, 176]]",4
83,1421,31,"[harmful-breakable-flyBig, harmful-breakable-s...","[harmful-breakable-shooterBig, harmful-indestr...","[[0, 6], [0, 5], [25, 36], [0, 176]]",4
89,1123,30,"[harmful-breakable-shooterFlyBig, harmful-inde...","[friendly-vehicle, harmful-indestructible-shoo...","[[0, 1], [0, 2], [6, 11], [0, 154]]",4


## Cluster Analysis

In [4]:
# -- Without the dimensions (exclud)
def withoutDimension(df, exclud=[]):
    idxs, conds = [], []
    n = len(exclud)
    for idx, row in df.iterrows():
        idxs.append(idx)
        dims = [dim not in row['dimension'] for dim in exclud]
        conds.append(sum(dims) == n)
    print('without: %d' % sum(conds), exclud)
    return pd.Series(conds, index=idxs)


# -- With the dimensions (includ)
def withDimension(df, includ=[]):
    idxs, conds = [], []
    n = len(includ)
    for idx, row in df.iterrows():
        idxs.append(idx)
        dims = [dim in row['dimension'] for dim in includ]
        conds.append(sum(dims) == n)
    print('with: %d' % sum(conds), includ)
    return pd.Series(conds, index=idxs)

In [5]:
def unique(data):
    # intilize a null list 
    unique_list = [] 
    # traverse for all elements 
    for x in data: 
        # check if exists in unique_list or not 
        if set(x) not in unique_list: 
            unique_list.append(set(x))
        else:
            print(x)
    # return unique elements
    return unique_list

In [6]:
df_good = df_clustering[withoutDimension(df_clustering, ['friendly-help'])]

print()
cluster_itemset = unique(df_good['dimension.set'])

print('\nunique itemsets: %d' % len(cluster_itemset))

without: 18 ['friendly-help']

['harmful-breakable-flyBig' 'friendly-vehicle'
 'harmful-indestructible-fixedBig']
['friendly-vehicle' 'harmful-indestructible-fixedBig']
['harmful-indestructible-shooter' 'friendly-vehicle']

unique itemsets: 15


## Association Rules Analysis

### Apriori

Transactions

In [7]:
X = data_norm.astype(bool).copy()
X.columns = columns
X.head(3)

Unnamed: 0,harmful-breakable-walk,harmful-breakable-walkBig,harmful-breakable-fly,harmful-breakable-flyBig,harmful-breakable-shooter,harmful-breakable-shooterBig,harmful-breakable-shooterFly,harmful-breakable-shooterFlyBig,harmful-indestructible-fixed,harmful-indestructible-fixedBig,...,harmful-indestructible-shooterFly,harmful-indestructible-shooterFlyBig,friendly-powerUp,friendly-vehicle,friendly-help,friendly-jump,friendly-move,friendly-teleport,friendly-puzzle,friendly-coin
0,True,True,False,True,False,False,False,False,True,False,...,False,False,False,False,False,False,True,False,False,False
1,False,False,False,False,False,False,False,False,True,True,...,False,False,False,False,True,False,True,True,False,True
2,True,False,False,False,False,False,False,False,False,False,...,False,False,True,False,True,True,True,True,True,True


In [8]:
# execution
frequent_itemsets = apriori(X, min_support=support_min, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

# orderned itemsets
print('itemsets: %d (<= %.2f)' % (len(frequent_itemsets),support_min))
frequent_itemsets.sort_values(by=['support'], ascending=False).head()

itemsets: 93825 (<= 0.05)


Unnamed: 0,support,itemsets,length
7,0.877778,(harmful-indestructible-fixed),1
20,0.845833,(friendly-teleport),1
17,0.830556,(friendly-help),1
19,0.826389,(friendly-move),1
22,0.777778,(friendly-coin),1


In [9]:
#frequent_itemsets[frequent_itemsets['itemsets'].isin(cluster_itemset)]
common_itemsets = pd.DataFrame(columns=frequent_itemsets.columns)
for idx,row in frequent_itemsets.iterrows():
    if row['itemsets'] in cluster_itemset:
        common_itemsets.loc[idx] = row
common_itemsets = common_itemsets.sort_values(by=['support'], ascending=False)

In [10]:
print('common itemsets:', len(common_itemsets))
common_itemsets

common itemsets: 7


Unnamed: 0,support,itemsets,length
149,0.341667,"(harmful-indestructible-fixed, friendly-vehicle)",2
163,0.268056,"(harmful-indestructible-fixedBig, friendly-veh...",2
218,0.268056,"(harmful-indestructible-shooter, friendly-vehi...",2
770,0.077778,"(harmful-breakable-flyBig, harmful-indestructi...",3
782,0.070833,"(harmful-breakable-flyBig, harmful-indestructi...",3
833,0.070833,"(harmful-breakable-flyBig, friendly-coin, frie...",3
978,0.058333,"(harmful-breakable-shooterBig, harmful-indestr...",3


Add sub-itemsets of the common itemsets

In [11]:
from itertools import chain, combinations

# -- elaborate all subsets of N > size >= 1
def all_subsets(ss):
    return chain(*map(lambda x: combinations(ss, x), range(1, len(ss))))

for idx,row in common_itemsets.iterrows():
    ss = row['itemsets']
    for subset in all_subsets(ss):
        subset_itemsets = set(subset)
        tmp = frequent_itemsets[frequent_itemsets['itemsets'] == subset_itemsets]
        idx2, tmp = tmp.index[0], tmp.iloc[0]
        common_itemsets.loc[idx2] = tmp

In [12]:
# rules by metric
rules = association_rules(common_itemsets, metric='confidence', min_threshold=confidence_min)
rules_columns = ['antecedents','consequents','support','confidence','lift','conviction','leverage']

In [13]:
rules = rules.sort_values(by=['confidence'], ascending=False).reset_index(drop=True)
print('rules:', len(rules))
rules[rules_columns]

rules: 20


Unnamed: 0,antecedents,consequents,support,confidence,lift,conviction,leverage
0,"(harmful-breakable-shooterBig, friendly-vehicle)",(harmful-indestructible-fixed),0.058333,1.0,1.139241,inf,0.00713
1,(harmful-breakable-shooterBig),(harmful-indestructible-fixed),0.1125,0.975904,1.111789,5.072222,0.011312
2,"(harmful-breakable-flyBig, friendly-vehicle)",(harmful-indestructible-fixed),0.077778,0.965517,1.099956,3.544444,0.007068
3,(friendly-vehicle),(harmful-indestructible-fixed),0.341667,0.928302,1.057559,1.704678,0.018596
4,(harmful-breakable-flyBig),(harmful-indestructible-fixed),0.1375,0.916667,1.044304,1.466667,0.005833
5,"(harmful-breakable-flyBig, friendly-vehicle)",(friendly-coin),0.070833,0.87931,1.130542,1.84127,0.008179
6,"(harmful-breakable-flyBig, friendly-vehicle)",(harmful-indestructible-fixedBig),0.070833,0.87931,1.394501,3.061111,0.020039
7,(friendly-vehicle),(friendly-coin),0.315278,0.856604,1.101348,1.549708,0.029012
8,(harmful-breakable-flyBig),(friendly-coin),0.127778,0.851852,1.095238,1.5,0.011111
9,(harmful-breakable-flyBig),(harmful-indestructible-fixedBig),0.116667,0.777778,1.23348,1.6625,0.022083
