# ALS [Alternating Least Square]

## Building Recommendation system using ALS on MovieLens Dataset

import the required libraries

In [2]:
import pandas as pd

In [3]:

ratings = pd.read_csv('ratings.csv')


In [4]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,517,1,4.0
1,213,1,3.5
2,514,1,4.0
3,214,1,3.0
4,216,1,3.0


In [5]:
ratings.shape

(1999, 3)

In [5]:
# ratings.drop(['unix_timestamp'], axis=1, inplace=True)

In [6]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,517,1,4.0
1,213,1,3.5
2,514,1,4.0
3,214,1,3.0
4,216,1,3.0


In [7]:
ratings.describe()

Unnamed: 0,userId,movieId,rating
count,1999.0,1999.0,1999.0
mean,309.868934,17.981991,3.56003
std,183.021167,12.558559,1.002698
min,1.0,1.0,0.5
25%,150.0,6.0,3.0
50%,307.0,17.0,4.0
75%,470.0,31.0,4.0
max,610.0,42.0,5.0


In [8]:
ratings['userId'].nunique()

434

In [9]:
ratings['movieId'].nunique()

39

## Create Sparse User-Item Matrix

In [10]:
from scipy.sparse import csr_matrix

In [11]:
alpha = 40

In [12]:
ratings.shape[0]

1999

In [14]:
sparse_user_item = csr_matrix( ([alpha]*ratings.shape[0], (ratings['userId'], ratings['movieId']) ))

In [15]:
sparse_user_item

<611x43 sparse matrix of type '<class 'numpy.intc'>'
	with 1999 stored elements in Compressed Sparse Row format>

### shape : 944x1683.  since Total No of users = 943, No of Movies = 1682

### Convert to Array

In [15]:
csr_user_array = sparse_user_item.toarray()

In [16]:
csr_user_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       ...,
       [ 0, 40, 40, ...,  0,  0,  0],
       [ 0, 40,  0, ...,  0,  0,  0],
       [ 0, 40,  0, ...,  0,  0,  0]], dtype=int32)

In [17]:
csr_user_array.shape

(611, 43)

In [18]:
len(csr_user_array), len(csr_user_array[0]), csr_user_array[1][1]

(611, 43, 0)

In [19]:
max(csr_user_array[1])

40

### csr matrix only stores where value is 40 [non-zero]. (Compressed Sparse Row)

In [20]:
print(sparse_user_item)

  (1, 3)	40
  (1, 6)	40
  (3, 31)	40
  (4, 21)	40
  (4, 32)	40
  (5, 1)	40
  (5, 21)	40
  (5, 34)	40
  (5, 36)	40
  (5, 39)	40
  (6, 2)	40
  (6, 3)	40
  (6, 4)	40
  (6, 5)	40
  (6, 6)	40
  (6, 7)	40
  (6, 8)	40
  (6, 10)	40
  (6, 11)	40
  (6, 13)	40
  (6, 15)	40
  (6, 16)	40
  (6, 17)	40
  (6, 19)	40
  (6, 21)	40
  :	:
  (606, 32)	40
  (606, 36)	40
  (607, 1)	40
  (607, 11)	40
  (607, 25)	40
  (607, 34)	40
  (607, 36)	40
  (608, 1)	40
  (608, 2)	40
  (608, 3)	40
  (608, 10)	40
  (608, 16)	40
  (608, 19)	40
  (608, 21)	40
  (608, 24)	40
  (608, 31)	40
  (608, 32)	40
  (608, 34)	40
  (608, 39)	40
  (609, 1)	40
  (609, 10)	40
  (610, 1)	40
  (610, 6)	40
  (610, 16)	40
  (610, 32)	40


### Create item-user sparse matrix

In [21]:
sparse_item_user = sparse_user_item.T.tocsr()

In [22]:
sparse_item_user

<43x611 sparse matrix of type '<class 'numpy.intc'>'
	with 1999 stored elements in Compressed Sparse Row format>

### shape : 1683x944.  since Total No of Movies = 1682 & No of users = 943

In [23]:
csr_item_array = sparse_item_user.toarray()

In [24]:
csr_item_array

array([[ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ..., 40, 40, 40],
       [ 0,  0,  0, ..., 40,  0,  0],
       ...,
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]], dtype=int32)

In [25]:
len(csr_item_array), len(csr_item_array[0]), csr_item_array[1][1]

(43, 611, 0)

In [26]:
print(sparse_item_user)

  (1, 5)	40
  (1, 7)	40
  (1, 15)	40
  (1, 17)	40
  (1, 18)	40
  (1, 19)	40
  (1, 21)	40
  (1, 27)	40
  (1, 31)	40
  (1, 32)	40
  (1, 33)	40
  (1, 40)	40
  (1, 43)	40
  (1, 44)	40
  (1, 45)	40
  (1, 46)	40
  (1, 50)	40
  (1, 54)	40
  (1, 57)	40
  (1, 63)	40
  (1, 64)	40
  (1, 66)	40
  (1, 68)	40
  (1, 71)	40
  (1, 73)	40
  :	:
  (39, 597)	40
  (39, 599)	40
  (39, 600)	40
  (39, 602)	40
  (39, 603)	40
  (39, 604)	40
  (39, 608)	40
  (40, 311)	40
  (40, 544)	40
  (41, 6)	40
  (41, 9)	40
  (41, 59)	40
  (41, 91)	40
  (41, 117)	40
  (41, 182)	40
  (41, 268)	40
  (41, 314)	40
  (41, 337)	40
  (41, 372)	40
  (41, 467)	40
  (41, 470)	40
  (41, 474)	40
  (41, 593)	40
  (41, 599)	40
  (42, 599)	40


## Create train, test data

In [27]:
# !pip install implicit



In [28]:
from implicit.evaluation import train_test_split

In [29]:

train, test = train_test_split(sparse_item_user, train_percentage=0.8)

In [30]:
train

<43x611 sparse matrix of type '<class 'numpy.intc'>'
	with 1574 stored elements in Compressed Sparse Row format>

In [31]:
test

<43x611 sparse matrix of type '<class 'numpy.intc'>'
	with 425 stored elements in Compressed Sparse Row format>

## Building ALS Model

In [32]:
import implicit

In [33]:
#! pip install implicit

In [34]:

model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False)




In [35]:
model

<implicit.cpu.als.AlternatingLeastSquares at 0x22843115d48>

### Train the Model

In [36]:
model.fit(train)

  0%|          | 0/20 [00:00<?, ?it/s]

## Generating recommendations for a user_id

In [37]:
user_id = 117

In [38]:
model.recommend(user_id, sparse_user_item)

[(24, 0.052831054),
 (27, 0.034657836),
 (25, 0.024127543),
 (22, 0.020914018),
 (18, 0.020339638),
 (42, 0.01994294),
 (13, 0.01943162),
 (8, 0.019272901),
 (16, 0.018863708),
 (38, 0.01717367)]

In [39]:
model.recommend(user_id, sparse_user_item, N=30)

[(24, 0.052831054),
 (27, 0.034657836),
 (25, 0.024127543),
 (22, 0.020914018),
 (18, 0.020339638),
 (42, 0.01994294),
 (13, 0.01943162),
 (8, 0.019272901),
 (16, 0.018863708),
 (38, 0.01717367),
 (14, 0.011968553),
 (1, 0.010829024),
 (23, 0.008776963),
 (9, 0.008674055),
 (28, 0.0068066046),
 (0, 0.0),
 (33, 0.0),
 (35, 0.0),
 (37, 0.0),
 (30, -0.008363962),
 (4, -0.010951325),
 (40, -0.011768699),
 (15, -0.012550831),
 (29, -0.01572914),
 (12, -0.01619339),
 (20, -0.016723514)]

In [40]:
output = model.recommend(user_id, sparse_user_item)

In [41]:
output

[(24, 0.052831054),
 (27, 0.034657836),
 (25, 0.024127543),
 (22, 0.020914018),
 (18, 0.020339638),
 (42, 0.01994294),
 (13, 0.01943162),
 (8, 0.019272901),
 (16, 0.018863708),
 (38, 0.01717367)]

In [42]:
output_df = pd.DataFrame(output, columns=['movie_id', 'als_score'])

In [43]:
output_df

Unnamed: 0,movie_id,als_score
0,24,0.052831
1,27,0.034658
2,25,0.024128
3,22,0.020914
4,18,0.02034
5,42,0.019943
6,13,0.019432
7,8,0.019273
8,16,0.018864
9,38,0.017174


# Load Movies Data

In [44]:
movies = pd.read_csv('../hybrid/data/movie_genres.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../hybrid/data/movie_genres.csv'

In [None]:
movies.head()

In [None]:
movies = movies[['movie_id', 'movie title']]

In [None]:
movies.head()

# Merge recommendation output with Movies Data

In [None]:
merged = pd.merge(output_df, movies, how='left', on='movie_id')

In [None]:
merged

## Generating recommendations for Movie_id

In [None]:

item_id = 11
n_similar = 10

In [None]:

similar = model.similar_items(item_id, n_similar)


In [None]:
similar

In [None]:
type(similar)

In [None]:
similar[0]

In [None]:
similar_df = pd.DataFrame(similar, columns=['movie_id', 'score'])

In [None]:
similar_df

# Merge recommendation output with Movies Data

In [None]:
merged_similar = pd.merge(similar_df, movies, how='left', on='movie_id')

In [None]:
merged_similar

In [None]:
import numpy as np

In [None]:
a = np.random.random((23,4))

In [None]:
a

In [None]:
from scipy.sparse import csr_matrix

In [None]:
csr = csr_matrix(a)

In [None]:
csr.toarray().shape