In [1]:
import pandas as pd
import numpy as np
import itertools 
import matplotlib.pyplot as plt

In [None]:
%run assignment_with_changes.ipynb

iteration: 1


In [None]:
# Find Id of movie with ratings
rated_id = [id in np.array(ratings[['MovieID']]) for id in np.array(movies[['MovieID']])]
movies_rated = movies.loc[rated_id]

In [4]:
dates = [s[-5:-1] for s in movies_rated.loc[:,'Title']]

In [17]:
genres = [s.split('|')[0] for s in movies_rated.loc[:,'Genres']]

In [6]:
np.unique(genres)

array(['Action', 'Adventure', 'Animation', "Children's", 'Comedy',
       'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
       'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',
       'Western'], dtype='<U11')

In [None]:
from sklearn.decomposition import PCA

In [None]:
# Start by visualizing users
pca = PCA(n_components=2)

PCs = pca.fit_transform(U.transpose())

In [None]:
# Visualize users labelled by genders
plt.figure(figsize=(10,10)) 

for gender in ['M', 'F']:
    mask = np.array(users[['Gender']] == gender)
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(PCs[mask, 0], PCs[mask, 1], label=gender, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize users labelled by age
plt.figure(figsize=(10,10)) 
ages = np.unique(users['Age'])

for age in ages:
    mask = np.array(users[['Age']] == age)
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(PCs[mask, 0], PCs[mask, 1], label=age, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize movies labelled by genre
pca = PCA(n_components=2)

PCs = pca.fit_transform(M)

plt.figure(figsize=(10,10)) 

for genre in genres:
    mask = np.array(genres) == genre
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(PCs[mask, 0], PCs[mask, 1], label=genre, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize movies labelled by year of release
plt.figure(figsize=(10,10)) 

for date in dates:
    mask = np.array(dates) == date
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(PCs[mask, 0], PCs[mask, 1], label=date, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

## UMAP

In [7]:
import umap

In [None]:
# Visualize users labelled by genders
umap = umap.UMAP()
X_u = umap.fit_transform(U)

In [None]:
plt.figure(figsize=(10,10)) 

for gender in ['M', 'F']:
    mask = np.array(users[['Gender']]) == gender
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(X_u[mask, 0], X_u[mask, 1], label=gender, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize users labelled by age
plt.figure(figsize=(10,10)) 
ages = np.unique(users['Age'])

for age in ages:
    mask = np.array(users[['Age']]) == age
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(X_u[mask, 0], X_u[mask, 1], label=age, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize movies labelled by genre
umap = umap.UMAP()
X_m = umap.fit_transform(M)

plt.figure(figsize=(10,10)) 

for genre in genres:
    mask = np.array(genres) == genre
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(X_m[mask, 0], X_m[mask, 1], label=genre, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize movies labelled by year of release
plt.figure(figsize=(10,10)) 

for date in dates:
    mask = np.array(dates) == date
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(X_m[mask, 0], X_m[mask, 1], label=date, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

## T-SNE

In [9]:
from sklearn.manifold import TSNE

In [None]:
# Visualize users labelled by genders
t_sne = TSNE(n_components=2)
U_tsne = t_sne.fit_transform(U.transpose())

plt.figure(figsize=(10,10)) 

for gender in ['M', 'F']:
    mask = np.array(users[['Gender']]) == gender
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(U_tsne[mask, 0], U_tsne[mask, 1], label=gender, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize users labelled by age
plt.figure(figsize=(10,10)) 
ages = np.unique(users['Age'])

for age in ages:
    mask = np.array(users[['Age']]) == age
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(U_tsne[mask, 0], U_tsne[mask, 1], label=age, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize movies labelled by genre
t_sne = TSNE(n_components=2)
M_tsne = t_sne.fit_transform(M)

plt.figure(figsize=(10,10)) 

for genre in genres:
    mask = np.array(genres) == genre
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(M_tsne[mask, 0], M_tsne[mask, 1], label=genre, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)

In [None]:
# Visualize movies labelled by year of release
plt.figure(figsize=(10,10)) 

for date in dates:
    mask = np.array(dates) == date
    mask = [mask[i][0] for i in range(len(mask))]
    plt.scatter(M_tsne[mask, 0], M_tsne[mask, 1], label=date, s=25, alpha=1)

plt.legend(bbox_to_anchor=(1.00, 1), loc='upper left',fontsize=20)