In [4]:
import pandas as pd
import numpy as np

In [5]:
import random
from collections import deque
import tensorflow as tf
from tensorflow import keras

In [6]:
movie = pd.read_csv('movies.csv')
rating = pd.read_csv('ratings.csv')

In [7]:
movie['genres_split'] = movie['genres'].str.split('|')
genres_dummies = movie['genres_split'].explode().str.get_dummies().groupby(level=0).sum()
movie = movie[['movieId', 'title']].join(genres_dummies)

In [8]:
rating.drop(columns=['timestamp'], inplace=True)
movie = movie.drop(columns=['(no genres listed)'])

In [9]:
movie_1 = movie

In [10]:
movie_v1 = movie_1.drop(columns=['title'])

In [11]:
movie_v1

Unnamed: 0,movieId,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0
1,2,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,4,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
4,5,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62418,209157,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
62419,209159,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
62420,209163,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
62421,209169,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
data_all = movie_v1.merge(rating, on='movieId')

In [13]:
user1 = data_all[data_all['userId'] == 1]
user1.shape

(70, 22)

In [14]:
user1.nunique()

movieId        70
Action          2
Adventure       2
Animation       2
Children        2
Comedy          2
Crime           2
Documentary     2
Drama           2
Fantasy         2
Film-Noir       2
Horror          2
IMAX            1
Musical         2
Mystery         2
Romance         2
Sci-Fi          2
Thriller        2
War             2
Western         2
userId          1
rating          9
dtype: int64

### Env

In [26]:
class DQL_model:
    def __init__(self, state_size, action_size, epsilon= 0.6,gamma= 0.95 ,epsilon_min=0.01, epsilon_decay=0.995, learning_rate=0.001, batch_size=20):
        self.state_size = state_size
        self.action_size = action_size
        self.epsilon = epsilon
        self.gamma = gamma
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        
        self.memory = deque(maxlen=2000)
        
        self.model = self.build_model()
        
    def build_model(self):
        model = keras.Sequential()
        model.add(keras.layers.Dense(64, input_dim= self.state_size, activation= 'relu'))
        model.add(keras.layers.Dense(32, activation= 'relu'))
        model.add(keras.layers.Dense(self.action_size, activation= 'linear'))
        
        model.compile(loss='mse', optimizer=keras.optimizers.Adam(learning_rate= self.learning_rate))
        
        return model   
    
    def remember(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.sample(range(self.action_size), 1)[0]
        state = state.astype(np.float32)
        act_values = self.model.predict(np.reshape(state, [1,self.state_size]))
        return np.argsort(act_values[0])[-1]
    
    def train(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        
        for state, action, reward, next_state in minibatch:
            
            target = reward + self.gamma*np.amax(self.model.predict(np.reshape(next_state, [1,self.state_size]))[0])
            
            target_f = self.model.predict(np.reshape(next_state, [1,self.state_size])) 
            
            # for act in action:
            #     target_f[0][act] = target
            target_f[0][action] = target
            self.model.fit(np.reshape(state, [1,self.state_size]), target_f, epochs=1, verbose=0)
            
            print(target)           
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [27]:
class Env:
    def __init__(self,data_movie, data, user_id, dqn_model):
        self.data = data
        self.user_id = user_id
        self.data_movie = data_movie
        self.dqn_model = dqn_model
        
        
        self.user_data = self.data[self.data['userId'] == self.user_id]
        self.user_data.drop(columns=['userId'], inplace=True)
        
        self.genres = self.user_data.columns[1:-1]
        self.user_data.drop(columns=['movieId'], inplace=True)
        
        self.storage = self.user_data[self.genres]
        self.memory = None
        self.movie_suggestions = None
        
        self.skip_count = 0
        self.max_skip = 10
        
        self.get_memory_from_user_data()
        self.reset()
        
    def reset(self): 
        self.movie_suggestions = self.data_movie.sample(10)
        self.skip_count = 0
        return self.movie_suggestions
    
    def suggest_next_movie(self):
        if self.movie_suggestions is not None:
            self.reset()
        
        current_state = self.memory[self.genres].values.astype(np.float32)
        
        suggested_genres_index = self.dqn_model.act(current_state)
        
        filtered_movie = self.data_movie[self.data_movie[self.genres[suggested_genres_index]] == 1]
        
        if not filtered_movie.empty:
            self.movie_suggestions = filtered_movie.sample(1).iloc[0]
        else:
            self.movie_suggestions = self.data_movie.sample(1)
            
        print(f"Genre: {suggested_genres_index}")
        return self.movie_suggestions
        
    
    def storage_data(self):
        if self.memory is not None:
            new_row = pd.DataFrame([self.memory[self.genres].values], columns=self.genres)
            self.storage = pd.concat([self.storage, new_row], ignore_index=True)
        return self.storage
    
    def reward_movie(self, rating):
        reward = rating
        
        if reward == 0:
            reward = 0.1
            
        elif reward < 3:
            reward = - (1 - reward/5)
        else:
            reward = reward/5
            
        return reward
    
    def rating_movie(self, rating= None):
        if rating is not None and self.memory is not None:
            
            self.memory = self.memory[self.genres]
            
            state = self.memory.values.astype(np.float32)
            
            action = self.dqn_model.act(state)
            
            reward = self.reward_movie(rating)
            
            next_state = state
            
            
            self.dqn_model.remember(state, action, reward, next_state)
            
            self.dqn_model.train()
            
            return self.memory
    
    def step(self, index = None):
        if index is not None and self.movie_suggestions is not None:
            self.selected_movie = self.movie_suggestions.iloc[index]
            
            self.memory = self.selected_movie
            return self.selected_movie
        else:
            self.skip_count += 1
            if self.skip_count == self.max_skip:
                self.reset()
            
        
    def get_memory_from_user_data(self):
        for _, row in self.user_data.iterrows():
            state = row[self.genres].values
            
            action = [index for index, value in enumerate(state) if value == 1]
            
            if action:
                action = random.choice(action)
            
            rating = row.max()
            
            reward = self.reward_movie(rating)
            
            random_index = np.random.randint(0, len(self.user_data))
            next_state = self.user_data.iloc[random_index][self.genres].values
            
            self.dqn_model.remember(state, action, reward, next_state)
            
        self.dqn_model.train()
    
    

In [28]:
state_size=19
action_size=19
dql = DQL_model(state_size, action_size)
env = Env(movie,data_all, user_id= 1, dqn_model= dql)
env.step(1)
title = movie[movie['movieId'] == env.selected_movie['movieId']]
print(title['title'].values[0])

0.7738192930817603
0.7310670837759972
0.8076434761285781
-0.5571697175502778
0.9345208190381528
-0.3202449530363083
1.1285548992455006
0.7990046434104442
0.8526514180004596
-0.2220457077026367
1.1679855071008205
0.7537739997729659
1.2598811969161035
0.7956901758909225
0.9455792278051376
1.083110511302948
1.1200225785374642
1.3009797886013985
0.9404252737760543
0.9396455273032188
Tall Men (2016)


In [29]:
env.rating_movie(rating=4)

1.2226899981498718
-0.2434864848852158
0.8866148546338082
1.296115128695965
1.0012978471815586
1.0131861433386802
1.0688846051692962
1.494694423675537
-0.320303063839674
1.0580862319096922
1.103770437836647
1.0362935334444046
1.5493227690458298
1.4762330263853074
1.06449790969491
1.3925448328256607
-0.14857769757509232
1.2051097616553306
1.310237243771553
1.4733243420720101


Action         0
Adventure      0
Animation      0
Children       0
Comedy         0
Crime          0
Documentary    0
Drama          0
Fantasy        1
Film-Noir      0
Horror         1
IMAX           0
Musical        0
Mystery        0
Romance        0
Sci-Fi         0
Thriller       1
War            0
Western        0
Name: 48674, dtype: object

In [30]:
env.memory.values

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0],
      dtype=object)

In [31]:
env.storage_data()
env.storage

Unnamed: 0,Action,Adventure,Animation,Children,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0
1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0
4,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0,0
67,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0
68,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
69,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0


In [32]:
env.dqn_model.memory[-1]

(array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1.,
        0., 0.], dtype=float32),
 11,
 0.8,
 array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1.,
        0., 0.], dtype=float32))

In [33]:
env.suggest_next_movie()

Genre: 14


movieId                            153012
title          Aashiq Banaya Aapne (2005)
Action                                  0
Adventure                               0
Animation                               0
Children                                0
Comedy                                  0
Crime                                   0
Documentary                             0
Drama                                   1
Fantasy                                 0
Film-Noir                               0
Horror                                  1
IMAX                                    0
Musical                                 0
Mystery                                 0
Romance                                 1
Sci-Fi                                  1
Thriller                                1
War                                     0
Western                                 0
Name: 37602, dtype: object