# COMM7380 Recommender Systems for Digital Media

In [None]:
# Install NetworkX, Matplotlib, Pandas, Numpy using pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install numpy

# User Behaviour and the User-Item Matrix

## Importing and knowing your data 

In [None]:
import pandas as pd 
import numpy as np

In [None]:
evidence = pd.read_csv('../data/' + 'collector_log.csv')

In [None]:
# checkin the type and take a glance at the head 
print(type(evidence))
evidence.head(5)

## Examining the attributes of the Data Frame (standard procedures)

- ```df.shape``` ("dim" in R) 
- ```df.columns``` (check the variables, like "names" in R) 
- ```df.index``` (check the index of the "rows") 
- ```df.info()```
- ```df.describe()``` (descriptive statistics for numerical variables) 

In [None]:
evidence.shape 
# (the number of cases/observations, the number of variables)

In [None]:
evidence.columns

In [None]:
evidence.index

In [None]:
evidence.info()

In [None]:
evidence.describe() 

In [None]:
users = evidence.user_id.unique()
content = evidence.content_id.unique()
print(type(content))
print(len(content))

# Implicit Ratings
## Binary Matrix
Let's create a user-item binary matrix from the "buy" events

In [None]:
#Create a user-item binary matrix
uiBuyMatrix = pd.DataFrame(columns=content, index=users)
uiBuyMatrix.head(2)

In [None]:
evidence.event.unique()

Select only the "buy" events

In [None]:
buyEvidence = evidence[evidence['event'] == 'buy']
buyEvidence.head(5)

Create the user-item matrix `uiBuyMatrix` for the buy events

In [None]:
for index, row in buyEvidence.iterrows():
    currentUser = row['user_id']
    currentContent = row['content_id']
    uiBuyMatrix.at[currentUser, currentContent] = 1

In [None]:
print(uiBuyMatrix)

## Behavioural Implicit Ratings

Using the formula intruced during lecture

$${IR}_(i,u) = \left(w_1*{\#event}_1\right)+\left(w_2*{\#event}_2\right)+\dots+\left(w_n*{\#event}_n\right)$$

In [None]:
#Create a user-item matrix
uiMatrix = pd.DataFrame(columns=content, index=users)
uiMatrix.head(2)

Type of events recorded in the logs

In [None]:
eventTypes = evidence.event.unique()
print(eventTypes)

Give a weight to each of them

In [None]:
eventWeights = {
    'details': 15,
    'moreDetails': 50,
    'genreView': 0,
    'addToList': 0,
    'buy': 100}

Compute the Implicit Rating for each user-item combination.
Populate the user-item matrix `uiMatrix` with the IR values.

In [None]:
# Iterate the evidence
for index, row in evidence.iterrows():
    # Select the user and items involved
    currentUser = row['user_id']
    currentContent = row['content_id']
    
    # Extract the appropriate weight for the event
    w = eventWeights[row['event']]
    
    # Find the value eventually stored for the current user-item combination
    currentValue = uiMatrix.at[currentUser, currentContent]
    if np.isnan(currentValue):
        currentValue = 0
        
    # Compute the new value and update the user-item matrix
    updatedValue = currentValue + w #+ (1 * w)
    uiMatrix.at[currentUser, currentContent] = updatedValue
    

In [None]:
print(uiMatrix)

- Course Instructor: Dr. Paolo Mengoni (Visiting Scholar, School of Communication, Hong Kong Baptist University) 
  - pmengoni@hkbu.edu.hk

- The codes in this notebook take insipiration from various sources. All codes are for educational purposes only and released under the CC1.0. 