## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.sparse import coo_matrix
from sklearn.preprocessing import StandardScaler

%matplotlib inline

## Data loading

In [2]:
# Actually, we don't need the full matrix, especially in csv
# d1 = pd.read_csv('/home/dustpelt/Desktop/AFA_ALDM_publish/data/user_rating_matrix.csv', header=None)

# interactions between users and items
data_inter = pd.read_csv('/home/dustpelt/Desktop/AFA_ALDM_publish/data/user_rating.csv', index_col=[0])

# users' features
data_users_features = pd.read_csv('/home/dustpelt/Desktop/AFA_ALDM_publish/data/user.csv', index_col=[0])
data_users_features.drop(columns=['UserId'], inplace=True)

# movies' features
data_movies_features = pd.read_csv('/home/dustpelt/Desktop/AFA_ALDM_publish/data/movie_holiday.csv',
                                   index_col=['ItemId'])

## Let's make full matrix

In [3]:
data_inter.head()

Unnamed: 0,UserId,ItemId,rating
1,196,242,3
2,186,302,3
3,22,377,1
4,244,51,2
5,166,346,1


In [4]:
interactions = data_inter.values.T
user_ids = interactions[0] - 1
item_ids = interactions[1] - 1
ratings = interactions[2]
R_shape = (np.max(user_ids)+1, np.max(item_ids)+1)

R_coo = coo_matrix((ratings, (user_ids, item_ids)), shape=R_shape, dtype='float64')

## Let's make users' features matrix

In [5]:
data_users_features.head()

Unnamed: 0,age,gender,adminstor,artist,doctor,educator,engineer,entertain,executive,healthcare,...,marketing,none,other,progrmer,retired,salesman,scientist,student,technician,writer
1,24,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,53,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,23,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,24,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,33,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [6]:
users_features = np.array(data_users_features.values, dtype='float64')
ss = StandardScaler()

X = ss.fit_transform(users_features)

## Let's make movies' features matrix

In [7]:
data_movies_features.head()

Unnamed: 0_level_0,Holiday,unknown,Action,Adventure,Animation,Childrens,Comedy,Crime,Documty,Drama,Fantasy,FilmNoir,Horror,Musical,Mystery,Romance,SciFi,Thriller,War,Western
ItemId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
5,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0


In [8]:
movies_features = np.array(data_movies_features.values, dtype='float64')
ss = StandardScaler()

Y = ss.fit_transform(movies_features)

## Saving

In [9]:
data_path = '/home/dustpelt/Desktop/imc_exp_data/movielens/'

np.save(data_path + 'R.npy', R_coo)
np.save(data_path + 'X.npy', X)
np.save(data_path + 'Y.npy', Y)