# ALS [Alternating Least Square]

## Building Recommendation system using ALS on MovieLens Dataset

import the required libraries

In [1]:
import pandas as pd

In [2]:

ratings = pd.read_csv('data/ratings.csv')

In [3]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [4]:
ratings.shape

(100000, 4)

In [5]:
ratings.drop(['unix_timestamp'], axis=1, inplace=True)

In [6]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [7]:
ratings.describe()

Unnamed: 0,user_id,movie_id,rating
count,100000.0,100000.0,100000.0
mean,462.48475,425.53013,3.52986
std,266.61442,330.798356,1.125674
min,1.0,1.0,1.0
25%,254.0,175.0,3.0
50%,447.0,322.0,4.0
75%,682.0,631.0,4.0
max,943.0,1682.0,5.0


In [8]:
ratings['user_id'].nunique()

943

In [9]:
ratings['movie_id'].nunique()

1682

## Create Sparse User-Item Matrix

In [10]:
from scipy.sparse import csr_matrix

In [11]:
alpha = 40

In [12]:
ratings.shape[0]

100000

In [13]:
mis = ratings['movie_id'].unique()

import numpy as np
np.sort(mis)

array([   1,    2,    3, ..., 1680, 1681, 1682])

In [14]:
uis = ratings['user_id'].unique()

import numpy as np
np.sort(uis)

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
       157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
       170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 18

In [15]:
sparse_user_item = csr_matrix( ([alpha]*ratings.shape[0], (ratings['user_id'], ratings['movie_id']) ))

In [16]:
sparse_user_item

<944x1683 sparse matrix of type '<class 'numpy.int64'>'
	with 100000 stored elements in Compressed Sparse Row format>

In [17]:
sparse_user_item.indices, sparse_user_item.count_nonzero()

(array([   1,    2,    3, ..., 1188, 1228, 1330], dtype=int32), 100000)

### shape : 944x1683.  since Total No of users = 943, No of Movies = 1682

### Convert to Array

In [18]:
csr_user_array = sparse_user_item.toarray()

In [19]:
csr_user_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0, 40, 40, ...,  0,  0,  0],
       [ 0, 40,  0, ...,  0,  0,  0],
       ...,
       [ 0, 40,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0, 40, ...,  0,  0,  0]])

In [20]:
len(csr_user_array), len(csr_user_array[0]), csr_user_array[1][1]

(944, 1683, 40)

In [21]:
max(csr_user_array[1])

40

### csr matrix only stores where value is 40 [non-zero]. (Compressed Sparse Row)

In [22]:
print(sparse_user_item)

  (1, 1)	40
  (1, 2)	40
  (1, 3)	40
  (1, 4)	40
  (1, 5)	40
  (1, 6)	40
  (1, 7)	40
  (1, 8)	40
  (1, 9)	40
  (1, 10)	40
  (1, 11)	40
  (1, 12)	40
  (1, 13)	40
  (1, 14)	40
  (1, 15)	40
  (1, 16)	40
  (1, 17)	40
  (1, 18)	40
  (1, 19)	40
  (1, 20)	40
  (1, 21)	40
  (1, 22)	40
  (1, 23)	40
  (1, 24)	40
  (1, 25)	40
  :	:
  (943, 739)	40
  (943, 756)	40
  (943, 763)	40
  (943, 765)	40
  (943, 785)	40
  (943, 794)	40
  (943, 796)	40
  (943, 808)	40
  (943, 816)	40
  (943, 824)	40
  (943, 825)	40
  (943, 831)	40
  (943, 840)	40
  (943, 928)	40
  (943, 941)	40
  (943, 943)	40
  (943, 1011)	40
  (943, 1028)	40
  (943, 1044)	40
  (943, 1047)	40
  (943, 1067)	40
  (943, 1074)	40
  (943, 1188)	40
  (943, 1228)	40
  (943, 1330)	40


### Create item-user sparse matrix

In [23]:
sparse_item_user = sparse_user_item.T.tocsr()

In [24]:
sparse_item_user

<1683x944 sparse matrix of type '<class 'numpy.int64'>'
	with 100000 stored elements in Compressed Sparse Row format>

### shape : 1683x944.  since Total No of Movies = 1682 & No of users = 943

In [25]:
csr_item_array = sparse_item_user.toarray()

In [26]:
csr_item_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0, 40, 40, ..., 40,  0,  0],
       [ 0, 40,  0, ...,  0,  0, 40],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

In [27]:
len(csr_item_array), len(csr_item_array[0]), csr_item_array[1][1]

(1683, 944, 40)

In [28]:
print(sparse_item_user)

  (1, 1)	40
  (1, 2)	40
  (1, 5)	40
  (1, 6)	40
  (1, 10)	40
  (1, 13)	40
  (1, 15)	40
  (1, 16)	40
  (1, 17)	40
  (1, 18)	40
  (1, 20)	40
  (1, 21)	40
  (1, 23)	40
  (1, 25)	40
  (1, 26)	40
  (1, 38)	40
  (1, 41)	40
  (1, 42)	40
  (1, 43)	40
  (1, 44)	40
  (1, 45)	40
  (1, 49)	40
  (1, 54)	40
  (1, 56)	40
  (1, 57)	40
  :	:
  (1662, 782)	40
  (1663, 782)	40
  (1664, 782)	40
  (1664, 839)	40
  (1664, 870)	40
  (1664, 880)	40
  (1665, 782)	40
  (1666, 782)	40
  (1667, 782)	40
  (1668, 782)	40
  (1669, 782)	40
  (1670, 782)	40
  (1671, 787)	40
  (1672, 828)	40
  (1672, 896)	40
  (1673, 835)	40
  (1674, 840)	40
  (1675, 851)	40
  (1676, 851)	40
  (1677, 854)	40
  (1678, 863)	40
  (1679, 863)	40
  (1680, 863)	40
  (1681, 896)	40
  (1682, 916)	40


## Create train, test data

In [29]:
from implicit.evaluation import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [30]:

train, test = train_test_split(sparse_user_item, train_percentage=0.8)

In [31]:
train

<944x1683 sparse matrix of type '<class 'numpy.int64'>'
	with 80055 stored elements in Compressed Sparse Row format>

In [32]:
test

<944x1683 sparse matrix of type '<class 'numpy.int64'>'
	with 19945 stored elements in Compressed Sparse Row format>

## Building ALS Model

In [33]:
import implicit

In [34]:
#! pip install implicit

In [35]:
model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False,random_state=42)
model

<implicit.cpu.als.AlternatingLeastSquares at 0x7ff3e1bc3040>

### Train the Model

In [36]:
model.fit(train)

100%|██████████| 20/20 [00:01<00:00, 15.65it/s]


## Generating recommendations for a user_id

In [37]:
user_id = 117
print(sparse_user_item[user_id].toarray())

[[ 0 40  0 ...  0  0  0]]


In [38]:
type(sparse_user_item)

scipy.sparse._csr.csr_matrix

In [39]:
model.recommend(user_id, sparse_user_item[user_id])# sparse_user_item)

(array([234, 294, 154, 204, 183,   8, 672,  89, 275,   4], dtype=int32),
 array([1.1052521 , 1.1038177 , 0.9211259 , 0.8891977 , 0.8810041 ,
        0.873855  , 0.8635683 , 0.85692716, 0.8518163 , 0.8181323 ],
       dtype=float32))

In [40]:
model.recommend(user_id, sparse_user_item[user_id], N=30)# sparse_user_item)

(array([234, 294, 154, 204, 183,   8, 672,  89, 275,   4, 685, 815, 273,
        217,  42, 343,  14,  82, 243,  22, 191, 455, 250,  69, 201, 269,
        127, 710,  24, 833], dtype=int32),
 array([1.1052521 , 1.1038177 , 0.9211259 , 0.8891977 , 0.8810041 ,
        0.873855  , 0.8635683 , 0.85692716, 0.8518163 , 0.8181323 ,
        0.796636  , 0.7830255 , 0.778002  , 0.7749587 , 0.7715314 ,
        0.7616451 , 0.7595427 , 0.7575339 , 0.75547767, 0.74347174,
        0.74268246, 0.7416284 , 0.74123704, 0.7364369 , 0.7221491 ,
        0.70483744, 0.697477  , 0.6931515 , 0.68981016, 0.6883787 ],
       dtype=float32))

In [41]:
output = model.recommend(user_id, sparse_user_item[user_id])# sparse_user_item)
output

(array([234, 294, 154, 204, 183,   8, 672,  89, 275,   4], dtype=int32),
 array([1.1052521 , 1.1038177 , 0.9211259 , 0.8891977 , 0.8810041 ,
        0.873855  , 0.8635683 , 0.85692716, 0.8518163 , 0.8181323 ],
       dtype=float32))

In [42]:
output

(array([234, 294, 154, 204, 183,   8, 672,  89, 275,   4], dtype=int32),
 array([1.1052521 , 1.1038177 , 0.9211259 , 0.8891977 , 0.8810041 ,
        0.873855  , 0.8635683 , 0.85692716, 0.8518163 , 0.8181323 ],
       dtype=float32))

In [43]:
output[0].tolist()

[234, 294, 154, 204, 183, 8, 672, 89, 275, 4]

In [44]:
'''output_df = pd.DataFrame(output, columns=['movie_id', 'als_score'])'''
d = {'movie_id': output[0].tolist(), 'als_score': output[1].tolist()}
output_df = pd.DataFrame(data=d);output_df

Unnamed: 0,movie_id,als_score
0,234,1.105252
1,294,1.103818
2,154,0.921126
3,204,0.889198
4,183,0.881004
5,8,0.873855
6,672,0.863568
7,89,0.856927
8,275,0.851816
9,4,0.818132


# Load Movies Data

In [45]:
movies = pd.read_csv('data/movie_genres.csv')
movies = movies[['movie_id', 'movie title']]
movies.head()

Unnamed: 0,movie_id,movie title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


# Merge recommendation output with Movies Data

In [46]:
merged = pd.merge(output_df, movies, how='left', on='movie_id')
merged

Unnamed: 0,movie_id,als_score,movie title
0,234,1.105252,Jaws (1975)
1,294,1.103818,Liar Liar (1997)
2,154,0.921126,Monty Python's Life of Brian (1979)
3,204,0.889198,Back to the Future (1985)
4,183,0.881004,Alien (1979)
5,8,0.873855,Babe (1995)
6,672,0.863568,Candyman (1992)
7,89,0.856927,Blade Runner (1982)
8,275,0.851816,Sense and Sensibility (1995)
9,4,0.818132,Get Shorty (1995)


## Generating recommendations for Movie_id

In [47]:

item_id = 11
n_similar = 10

In [48]:

similar = model.similar_items(item_id, n_similar)
type(similar),similar


(tuple,
 (array([ 11,  22, 470, 518,  39, 464, 198, 693,  92, 180], dtype=int32),
  array([1.        , 0.34592944, 0.32494295, 0.32035494, 0.3039211 ,
         0.2780625 , 0.273755  , 0.2712476 , 0.26885122, 0.262306  ],
        dtype=float32)))

In [49]:
similar[0]

array([ 11,  22, 470, 518,  39, 464, 198, 693,  92, 180], dtype=int32)

In [50]:
d = {'movie_id': similar[0].tolist(), 'score': similar[1].tolist()}
#similar_df = pd.DataFrame(similar, columns=['movie_id', 'score'])
similar_df = pd.DataFrame(data=d)
similar_df

Unnamed: 0,movie_id,score
0,11,1.0
1,22,0.345929
2,470,0.324943
3,518,0.320355
4,39,0.303921
5,464,0.278062
6,198,0.273755
7,693,0.271248
8,92,0.268851
9,180,0.262306


# Merge recommendation output with Movies Data

In [51]:
merged_similar = pd.merge(similar_df, movies, how='left', on='movie_id')
merged_similar

Unnamed: 0,movie_id,score,movie title
0,11,1.0,Seven (Se7en) (1995)
1,22,0.345929,Braveheart (1995)
2,470,0.324943,Tombstone (1993)
3,518,0.320355,Miller's Crossing (1990)
4,39,0.303921,Strange Days (1995)
5,464,0.278062,Vanya on 42nd Street (1994)
6,198,0.273755,Nikita (La Femme Nikita) (1990)
7,693,0.271248,Casino (1995)
8,92,0.268851,True Romance (1993)
9,180,0.262306,Apocalypse Now (1979)
