In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('anime.csv')

In [3]:
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
data_missing_value = df.isnull().sum().reset_index()
data_missing_value.columns = ['feature','missing_value']
data_missing_value['percentage'] = round((data_missing_value['missing_value']/len(df))*100,2)
data_missing_value = data_missing_value.sort_values('percentage', ascending=False).reset_index(drop=True)
data_missing_value = data_missing_value[data_missing_value['percentage']>0]
data_missing_value

Unnamed: 0,feature,missing_value,percentage
0,rating,230,1.87
1,genre,62,0.5
2,type,25,0.2


In [5]:
df=df.dropna().reset_index(drop=True)

In [6]:
df.isna().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

# Content Based Recommendation System

## Cosine Similarity Illustrasions

In [7]:
df['genre'].value_counts()

Hentai                                                         816
Comedy                                                         521
Music                                                          297
Kids                                                           197
Comedy, Slice of Life                                          174
                                                              ... 
Drama, Fantasy, Sci-Fi, Supernatural                             1
Action, Drama, Game, Seinen                                      1
Fantasy, Magic, Music, Romance, School, Sci-Fi, Shoujo           1
Action, Drama, Horror, Mystery, Psychological, Supernatural      1
Adventure, Magic, Romance, Shoujo                                1
Name: genre, Length: 3229, dtype: int64

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [9]:
cvr = CountVectorizer(tokenizer = lambda x:x.split(', '))

In [10]:
genre_a=cvr.fit_transform(df['genre'])

In [11]:
print(len(cvr.get_feature_names()))
print(cvr.get_feature_names())

43
['action', 'adventure', 'cars', 'comedy', 'dementia', 'demons', 'drama', 'ecchi', 'fantasy', 'game', 'harem', 'hentai', 'historical', 'horror', 'josei', 'kids', 'magic', 'martial arts', 'mecha', 'military', 'music', 'mystery', 'parody', 'police', 'psychological', 'romance', 'samurai', 'school', 'sci-fi', 'seinen', 'shoujo', 'shoujo ai', 'shounen', 'shounen ai', 'slice of life', 'space', 'sports', 'super power', 'supernatural', 'thriller', 'vampire', 'yaoi', 'yuri']


In [12]:
df_cvr = pd.DataFrame(genre_a.toarray(), columns=cvr.get_feature_names())
df_cvr

Unnamed: 0,action,adventure,cars,comedy,dementia,demons,drama,ecchi,fantasy,game,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
0,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,1,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
df['name']

0                                           Kimi no Na wa.
1                         Fullmetal Alchemist: Brotherhood
2                                                 Gintama°
3                                              Steins;Gate
4                                            Gintama&#039;
                               ...                        
12012         Toushindai My Lover: Minami tai Mecha-Minami
12013                                          Under World
12014                       Violence Gekiga David no Hoshi
12015    Violence Gekiga Shin David no Hoshi: Inma Dens...
12016                     Yasuji no Pornorama: Yacchimae!!
Name: name, Length: 12017, dtype: object

In [14]:
df_anime=pd.concat([df['name'],df_cvr],axis=1)
df_anime #Item Profile Matrix

Unnamed: 0,name,action,adventure,cars,comedy,dementia,demons,drama,ecchi,fantasy,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
0,Kimi no Na wa.,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,1,1,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12013,Under World,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12014,Violence Gekiga David no Hoshi,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## test

In [15]:
df_anime[df_anime['name']=='Naruto'].drop('name',axis=1)

Unnamed: 0,action,adventure,cars,comedy,dementia,demons,drama,ecchi,fantasy,game,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
841,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [16]:
cos_score=cosine_similarity(df_anime[df_anime['name']=='Naruto'].drop('name',axis=1),df_anime.drop('name',axis=1))

In [17]:
cos_score.shape

(1, 12017)

In [18]:
df_anime.shape

(12017, 44)

In [19]:
df_score=pd.concat([df['name'],pd.DataFrame(cos_score,index=['Score']).T],axis=1)

In [20]:
df_score

Unnamed: 0,name,Score
0,Kimi no Na wa.,0.000000
1,Fullmetal Alchemist: Brotherhood,0.338062
2,Gintama°,0.507093
3,Steins;Gate,0.000000
4,Gintama&#039;,0.507093
...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,0.000000
12013,Under World,0.000000
12014,Violence Gekiga David no Hoshi,0.000000
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,0.000000


In [21]:
df_score[df_score['name']!='Naruto'].sort_values(by='Score',ascending=False)['name'][:5]

1103    Boruto: Naruto the Movie - Naruto ga Hokage ni...
1343                                          Naruto x UT
486                              Boruto: Naruto the Movie
615                                    Naruto: Shippuuden
2996    Naruto Soyokazeden Movie: Naruto to Mashin to ...
Name: name, dtype: object

In [22]:
def recommend():
    anime = input('Masukan anime yang disuka')

    import pandas as pd
    import numpy as np
    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.metrics.pairwise import cosine_similarity
    
    #buat dataframe dengan judul anime sebagai feature
    df = pd.read_csv('anime.csv')
    df = df.dropna().reset_index(drop=True)
    #memecah kolom gender
    cvr = CountVectorizer(tokenizer=lambda x:x.split(', '))
    genre_a = cvr.fit_transform(df['genre'])
    #membuat kolom genre menjadi array
    genre_a = genre_a=pd.DataFrame(genre_a.toarray(),columns=cvr.get_feature_names())
    df_genre=pd.concat([df[['name']],genre_a],axis=1) #item profile

    cosScore = cosine_similarity(df_genre[df_genre['name']==anime].drop('name',axis=1),df_genre.drop('name',axis=1))
    df_score=pd.concat([df[['name']],pd.DataFrame(cosScore,index=['Score']).T],axis=1)
    return df_score[df_score['name']!=anime].sort_values('Score',ascending=False)['name'][:5]    

In [23]:
recommend()

Masukan anime yang disuka Fullmetal Alchemist: Brotherhood


1558    Fullmetal Alchemist: The Sacred Star of Milos
200                               Fullmetal Alchemist
402         Fullmetal Alchemist: Brotherhood Specials
9223                                  Kkomaeosa Ttori
879               Tales of Vesperia: The First Strike
Name: name, dtype: object

## Content Based Filtering
#### For One User

In [24]:
movies = ['Terminator', 'Interstellar', 'Ant Man 2', '3 Idiots']
scores = [7,9,8,9]
action = [1,0,1,0]
scifi = [1,1,1,0]
adventure = [0,1,1,0]
comedy = [0,0,1,1]
drama = [0,1,0,1]

df_movies = pd.DataFrame({
    'movies':movies,
    'scores':scores,
    'action':action,
    'scifi':scifi,
    'adventures':adventure,
    'comedy':comedy,
    'drama':drama
})

In [25]:
df_movies

Unnamed: 0,movies,scores,action,scifi,adventures,comedy,drama
0,Terminator,7,1,1,0,0,0
1,Interstellar,9,0,1,1,0,1
2,Ant Man 2,8,1,1,1,1,0
3,3 Idiots,9,0,0,0,1,1


In [26]:
df_movies2=df_movies.copy()
df_movies2=df_movies2.drop('movies',axis=1)
df_movies2

Unnamed: 0,scores,action,scifi,adventures,comedy,drama
0,7,1,1,0,0,0
1,9,0,1,1,0,1
2,8,1,1,1,1,0
3,9,0,0,0,1,1


In [27]:
df_movies2.columns

Index(['scores', 'action', 'scifi', 'adventures', 'comedy', 'drama'], dtype='object')

In [28]:
#mengkalikan antara score dengan item feature matrix
for i in ['action', 'scifi', 'adventures', 'comedy', 'drama']:
    df_movies2[i]=df_movies2['scores']*df_movies[i]

df_movies2

Unnamed: 0,scores,action,scifi,adventures,comedy,drama
0,7,7,7,0,0,0
1,9,0,9,9,0,9
2,8,8,8,8,8,0
3,9,0,0,0,9,9


In [29]:
df_movies2=df_movies2.drop('scores',axis=1)

In [30]:
#jumlahkan score pada setiap genre
df_movies2.sum()

action        15
scifi         24
adventures    17
comedy        17
drama         18
dtype: int64

In [31]:
df_movies2.sum().sum()

91

In [32]:
ufv = df_movies2.sum()/df_movies2.sum().sum()
ufv

action        0.164835
scifi         0.263736
adventures    0.186813
comedy        0.186813
drama         0.197802
dtype: float64

In [33]:
# Setelah mendapatkan user feature vector, mari kita ukur ketertarikan user terhadap 3 film baru
movies = ['Titanic', 'Martian', 'GOTG Vol 2']
action = [1,0,1]
scifi = [1,1,1]
adventure = [0,1,1]
comedy = [0,0,1]
drama = [0,1,0]

df_movies_recom = pd.DataFrame({
    'movies':movies,
    'action':action,
    'scifi':scifi,
    'adventures':adventure,
    'comedy':comedy,
    'drama':drama
})

In [34]:
#item feature matrix
df_movies_recom

Unnamed: 0,movies,action,scifi,adventures,comedy,drama
0,Titanic,1,1,0,0,0
1,Martian,0,1,1,0,1
2,GOTG Vol 2,1,1,1,1,0


In [35]:
#kombinasikan item feature matrix denngan user feature vector yg kita hitung sebelumnya
for i in ['action', 'scifi', 'adventures', 'comedy', 'drama']:
    df_movies_recom[i]=df_movies_recom[i]*ufv[i]

df_movies_recom

Unnamed: 0,movies,action,scifi,adventures,comedy,drama
0,Titanic,0.164835,0.263736,0.0,0.0,0.0
1,Martian,0.0,0.263736,0.186813,0.0,0.197802
2,GOTG Vol 2,0.164835,0.263736,0.186813,0.186813,0.0


In [36]:
df_movies_recom['rating prediction']=df_movies_recom.sum(axis=1)
df_movies_recom.sort_values(['rating prediction'], ascending = False)

Unnamed: 0,movies,action,scifi,adventures,comedy,drama,rating prediction
2,GOTG Vol 2,0.164835,0.263736,0.186813,0.186813,0.0,0.802198
1,Martian,0.0,0.263736,0.186813,0.0,0.197802,0.648352
0,Titanic,0.164835,0.263736,0.0,0.0,0.0,0.428571


In [37]:
#Mencoba anime dataset
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [38]:
#pretend that the rating is given by a user, and the user hasn't watch anything that isn't inputed by us
cvr = CountVectorizer(tokenizer=lambda x:x.split(', '))
genre_a=cvr.fit_transform(df['genre']) 

In [39]:
len(cvr.get_feature_names())

43

In [40]:
df_genre = pd.DataFrame(genre_a.toarray(),columns=cvr.get_feature_names())
df_genre.shape

(12017, 43)

In [41]:
df_anime_genre = pd.concat([df[['name', 'rating']],df_genre],axis=1)
df_anime_genre.head()

Unnamed: 0,name,rating,action,adventure,cars,comedy,dementia,demons,drama,ecchi,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
0,Kimi no Na wa.,9.37,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,9.26,1,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,9.25,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,9.17,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,9.16,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
# anime yang disukai Naruto, One Piece, Dragon Ball
df_anime_genre[df_anime_genre['name'].isin(['Naruto', 'One Piece', 'Dragon Ball'])]

Unnamed: 0,name,rating,action,adventure,cars,comedy,dementia,demons,drama,ecchi,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
74,One Piece,8.58,1,1,0,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
346,Dragon Ball,8.16,0,1,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
841,Naruto,7.81,1,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [43]:
df_anime=df_anime_genre[df_anime_genre['name'].isin(['Naruto', 'One Piece', 'Dragon Ball'])]

for i in cvr.get_feature_names():
    df_anime[i] = df_anime['rating']*df_anime[i]

In [44]:
df_anime

Unnamed: 0,name,rating,action,adventure,cars,comedy,dementia,demons,drama,ecchi,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
74,One Piece,8.58,8.58,8.58,0.0,8.58,0.0,0.0,8.58,0.0,...,0.0,0.0,0.0,0.0,8.58,0.0,0.0,0.0,0.0,0.0
346,Dragon Ball,8.16,0.0,8.16,0.0,8.16,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,8.16,0.0,0.0,0.0,0.0,0.0
841,Naruto,7.81,7.81,0.0,0.0,7.81,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,7.81,0.0,0.0,0.0,0.0,0.0


In [45]:
df_anime.drop(['name', 'rating'],axis=1, inplace=True)

In [46]:
df_anime

Unnamed: 0,action,adventure,cars,comedy,dementia,demons,drama,ecchi,fantasy,game,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
74,8.58,8.58,0.0,8.58,0.0,0.0,8.58,0.0,8.58,0.0,...,0.0,0.0,0.0,0.0,8.58,0.0,0.0,0.0,0.0,0.0
346,0.0,8.16,0.0,8.16,0.0,0.0,0.0,0.0,8.16,0.0,...,0.0,0.0,0.0,0.0,8.16,0.0,0.0,0.0,0.0,0.0
841,7.81,0.0,0.0,7.81,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,7.81,0.0,0.0,0.0,0.0,0.0


In [47]:
anime_score = df_anime.sum()/df_anime.sum().sum()
anime_score

action           0.110691
adventure        0.113055
cars             0.000000
comedy           0.165800
dementia         0.000000
demons           0.000000
drama            0.057946
ecchi            0.000000
fantasy          0.113055
game             0.000000
harem            0.000000
hentai           0.000000
historical       0.000000
horror           0.000000
josei            0.000000
kids             0.000000
magic            0.000000
martial arts     0.107854
mecha            0.000000
military         0.000000
music            0.000000
mystery          0.000000
parody           0.000000
police           0.000000
psychological    0.000000
romance          0.000000
samurai          0.000000
school           0.000000
sci-fi           0.000000
seinen           0.000000
shoujo           0.000000
shoujo ai        0.000000
shounen          0.165800
shounen ai       0.000000
slice of life    0.000000
space            0.000000
sports           0.000000
super power      0.165800
supernatural

In [48]:
df_anime_recom = df_anime_genre.copy()

df_anime_recom = df_anime_recom[~df_anime_recom['name'].isin(['Naruto', 'Dragon Ball', 'One Piece'])].drop('rating', axis = 1)

In [49]:
df_anime_recom.head()

Unnamed: 0,name,action,adventure,cars,comedy,dementia,demons,drama,ecchi,fantasy,...,shounen ai,slice of life,space,sports,super power,supernatural,thriller,vampire,yaoi,yuri
0,Kimi no Na wa.,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
1,Fullmetal Alchemist: Brotherhood,1,1,0,0,0,0,1,0,1,...,0,0,0,0,0,0,0,0,0,0
2,Gintama°,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Steins;Gate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Gintama&#039;,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
for i in cvr.get_feature_names():
    df_anime_recom[i]=df_anime_recom[i]*anime_score[i]

In [51]:
df_anime_recom.sum(axis=1)

0        0.057946
1        0.560546
2        0.442291
3        0.000000
4        0.442291
           ...   
12012    0.000000
12013    0.000000
12014    0.000000
12015    0.000000
12016    0.000000
Length: 12014, dtype: float64

In [52]:
df_anime_recom['prediction score'] = df_anime_recom.sum(axis=1)

In [53]:
df_anime_recom[['name', 'prediction score']]

Unnamed: 0,name,prediction score
0,Kimi no Na wa.,0.057946
1,Fullmetal Alchemist: Brotherhood,0.560546
2,Gintama°,0.442291
3,Steins;Gate,0.000000
4,Gintama&#039;,0.442291
...,...,...
12012,Toushindai My Lover: Minami tai Mecha-Minami,0.000000
12013,Under World,0.000000
12014,Violence Gekiga David no Hoshi,0.000000
12015,Violence Gekiga Shin David no Hoshi: Inma Dens...,0.000000


In [54]:
# hasil rekomendasi dari Dragon Ball, One Piece, Naruto
df_anime_recom[['name', 'prediction score']].sort_values('prediction score',ascending=False)[:20]

Unnamed: 0,name,prediction score
5997,Dragon Ball Z Movie 11: Super Senshi Gekiha!! ...,0.942054
1409,Dragon Ball Z Movie 15: Fukkatsu no F,0.942054
1931,Dragon Ball: Episode of Bardock,0.942054
1930,Dragon Ball Super,0.942054
3407,Dragon Ball Z: Zenbu Misemasu Toshi Wasure Dra...,0.942054
3202,Dragon Ball Z: Summer Vacation Special,0.942054
4312,Dragon Ball GT: Goku Gaiden! Yuuki no Akashi w...,0.942054
4273,Dragon Ball Z: Atsumare! Gokuu World,0.942054
588,Dragon Ball Kai,0.942054
206,Dragon Ball Z,0.942054


In [55]:
def recommendMe():
    anime_list=input('Masukkan judul-judul anime yang anda sukai, pisahkan dengan koma').split(', ')
    # anime_list=['Naruto', 'One Piece', 'Dragon Ball']
    import pandas as pd
    import numpy as np
    from sklearn.feature_extraction.text import CountVectorizer

    # Buat dataframe dengan judul anime dan genre sebagai features
    df = pd.read_csv('anime.csv')
    df=df.dropna()
    cvr= CountVectorizer(tokenizer= lambda x:x.split(', ')) #hanya koma untuk memecah kolom genre

    genre_a= cvr.fit_transform(df['genre'])

    df_genre=pd.DataFrame(genre_a.toarray(),columns=cvr.get_feature_names())
    df_anime_genre=pd.concat([df[['name','rating']],df_genre],axis=1) #df item features matrix

    df_anime=df_anime_genre[df_anime_genre['name'].isin(anime_list)] #dataframe yg berisi anime yg disukai
    for i in cvr.get_feature_names():
        df_anime[i] = df_anime['rating']*df_anime[i] 

    df_anime.drop(['name','rating'], axis = 1, inplace = True)
    anime_score = df_anime.sum()/df_anime.sum().sum() #user feature vector

    df_anime_recom=df_anime_genre[~df_anime_genre['name'].isin (anime_list)].drop('rating',axis=1)# dataframe dengan pilihan anime yg belum ditonton

    for i in cvr.get_feature_names():
        df_anime_recom[i] = df_anime_recom[i]*anime_score[i]
   
    
    df_anime_recom['rating prediction'] = df_anime_recom.sum(axis = 1) #hitung score

    # Recommended Anime

    return df_anime_recom[['name','rating prediction']].sort_values('rating prediction',ascending=False)[:20]

In [56]:
#recommendMe()

# Content Based Multiple User

In [57]:
movies = ['Terminator', 'Interstellar', 'Ant Man 2', '3 Idiots']
scores = [7,9,8,9]
action = [1,0,1,0]
scifi = [1,1,1,0]
adventure = [0,1,1,0]
comedy = [0,0,1,1]
drama = [0,1,0,1]

df_item_feature = pd.DataFrame({
    'movies':movies,
    'scores':scores,
    'action':action,
    'scifi':scifi,
    'adventures':adventure,
    'comedy':comedy,
    'drama':drama
})

In [58]:
df_item_feature

Unnamed: 0,movies,scores,action,scifi,adventures,comedy,drama
0,Terminator,7,1,1,0,0,0
1,Interstellar,9,0,1,1,0,1
2,Ant Man 2,8,1,1,1,1,0
3,3 Idiots,9,0,0,0,1,1


In [59]:
user=['user 1', 'user 2', 'user 3', 'user 4']
terminator =[7,8,9,0]
interstellar = [9,0,0,7]
ant_man_2 = [8,6,0,0]
three_idiots = [9,5,10,9]

df_user_item = pd.DataFrame({
    'user':user,
    'terminator':terminator,
    'interstellar':interstellar,
    'ant_man_2':ant_man_2,
    '3 idiots' : three_idiots
})
df_user_item

Unnamed: 0,user,terminator,interstellar,ant_man_2,3 idiots
0,user 1,7,9,8,9
1,user 2,8,0,6,5
2,user 3,9,0,0,10
3,user 4,0,7,0,9


In [60]:
# mencari user feature matrix

arr_user_items= np.array(df_user_item.drop('user',axis=1))
arr_item_features=np.array(df_item_feature.drop('movies',axis=1))

n_user= arr_user_items.shape[0]
n_item= arr_user_items.shape[1]
n_feature= arr_item_features.shape[1]

arr_user_items_score=np.empty((n_user,n_item))
arr_user_feature= np.empty((n_user,n_feature))

for i in range(n_user):
    #print(arr_user_items[i,:])
    user_feature= np.matmul(arr_user_items[i,:], arr_item_features) 
    #print(user_feature)
    user_feature=user_feature/user_feature.sum()
    arr_user_feature[i,:]=user_feature

In [61]:
df_user_feature=pd.DataFrame(arr_user_feature)
df_user_feature.columns = df_item_feature.columns[1:]
df_user_feature.index=user
df_user_feature

Unnamed: 0,scores,action,scifi,adventures,comedy,drama
user 1,0.751366,0.040984,0.065574,0.046448,0.046448,0.04918
user 2,0.748744,0.070352,0.070352,0.030151,0.055276,0.025126
user 3,0.801047,0.04712,0.04712,0.0,0.052356,0.052356
user 4,0.786885,0.0,0.038251,0.038251,0.04918,0.087432


In [63]:
arr_item_features_new= np.array(item_feature3.drop('anime_id',axis=1))
n_item_new=item_feature3.shape[0]

arr_user_items_score_new= np.empty((n_user,n_item_new))

for i in range(n_user):
    user_item_score=np.matmul(arr_item_features_new, arr_user_feature[i,:])
    arr_user_items_score_new[i,:]= user_item_score

df_user_item_score_new= pd.DataFrame(arr_user_items_score_new, columns=item_feature3['anime_id'], index=user_item.index)
df_user_item_score_new

NameError: name 'arr_item_feature' is not defined

4