### Libraries to install

In [None]:
import pandas as pd
import surprise as sp

### Constants

In [None]:
BYTES_TO_MB_DIV = 0.000001

### Function to check the memory usage of the dataframe

In [None]:
def df_mem_usage(df):
    print()
    mem = round(df.memory_usage().sum() * BYTES_TO_MB_DIV, 3) 
    print("Memory usage is " + str(mem) + " MB")

### Reading the dataset

In [None]:
%%time

cols = ['%%MatrixMarket','matrix','coordinate']

dtypes = {
    '%%MatrixMarket':'int32', 
    'matrix':'int16', 
    'coordinate':'int8'
}

df = pd.read_csv('data/netflix_mm', delim_whitespace=True, usecols=cols, dtype=dtypes, skiprows=range(1, 3))
df.columns = ['userID', 'itemID', 'rating']

print(df.head())
df_mem_usage(df)

In [None]:
df = df.head(100000)

# Cosine similarity

**Documentation of scikit-surprise:**

-https://surprise.readthedocs.io/en/stable/matrix_factorization.html <br>
-https://datascience.stackexchange.com/questions/6814/how-to-split-train-test-in-recommender-systems

### Declare a reader object to parse the ratings

In [None]:
reader = sp.Reader(rating_scale=(1,5))

### Load the dataframe in scikit-surprise's format using the Reader

In [None]:
%%time

data = sp.Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

### Divide the data into a training and test set

In [None]:
trainset, testset = sp.model_selection.train_test_split(data, test_size=.25)

### Run the Grid search

In [None]:
algo = sp.KNNBasic(sim_options = {'name': 'cosine'})

### Train the model on the training set and evaluate on the test set

In [None]:
%time

algo.fit(trainset)

In [None]:
predictions = algo.test(testset)
print(sp.accuracy.rmse(predictions))