# COMM7380 Recommender Systems for Digital Media

In [None]:
# Install NetworkX, Matplotlib, Pandas, Numpy using pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install apyori

# Non-personalised Recommendations
# Association Rules Discovery

## Importing and knowing your data 

In [None]:
import pandas as pd 
import numpy as np

In [None]:
evidence = pd.read_csv('../data/' + 'collector_log.csv')

In [None]:
# checkin the type and take a glance at the head 
print(type(evidence))
evidence.head(5)

In [None]:
evidence.columns

In [None]:
evidence.info()

Number of users, content and sessions

In [None]:
users = evidence.user_id.unique()
print('# users: ',len(users))
content = evidence.content_id.unique()
print('# content: ',len(content))
sessions = evidence.session_id.unique()
print('# sessions: ',len(sessions))

# Session matrix

In [None]:
#Create a session-item matrix
siMatrix = pd.DataFrame(columns=content, index=sessions)
siMatrix.head(2)

In [None]:
evidence.event.unique()

Select only the "moreDetails" events

In [None]:
mdEvidence = evidence[evidence['event'] == 'moreDetails']
mdEvidence.head(5)

Valorize the session-item matrix for the moreDetails events

In [None]:
for index, row in mdEvidence.iterrows():
    currentSession = row['session_id']
    currentContent = row['content_id']
    siMatrix.at[currentSession, currentContent] = currentContent

In [None]:
siMatrix.head(10)

## Association Rules Discovery

In [None]:
from apyori import apriori

Convert the entries in the session-item matrix in the appropriate format.

Remove `NaN` values and create a list of lists (one list for each session).

In [None]:
records = []

npMatrix = siMatrix.to_numpy()
for session in npMatrix:
    # Remove NaN values
    cleanedList = [x for x in session if str(x) != 'nan']

    if not(cleanedList == []):
        records.append(cleanedList)

Generate the **Association Rules** specifying the parameters:
- min_support
- min_confidence
- min_lift
- max_length

In [None]:
association_rules = apriori(records, min_support=0.001, min_confidence=0.05, min_lift=3, max_length=3)
association_results = list(association_rules)

# Print the number of association rules discovered
print(len(association_results))

Print the association rules

In [None]:
for item in association_results:
    
    # Contains base item and add item
    rule = item[0] 
    items = [x for x in rule]
    print("Rule: ", items[0], " -> ", items[1:])

    # Statistics
    supportValue = item[1]
    print("Support: ", supportValue)

    confidenceValue = item[2][0][2]
    liftValue = item[2][0][3]
    print("Confidence: ", confidenceValue)
    print("Lift: ", liftValue)
    print("=====================================")

- Course Instructor: Dr. Paolo Mengoni (Visiting Scholar, School of Communication, Hong Kong Baptist University) 
  - pmengoni@hkbu.edu.hk

- The codes in this notebook take insipiration from various sources. All codes are for educational purposes only and released under the CC1.0. 