## To be used from App: Obtain selected rows from the similarity matrix
**Note:** Requires the file "db_credentials.txt" containing the password of the MongoDB user. See password in the pinned file in the Slack general group.

In [1]:
import pandas as pd
import json

# Read configuration file
with open('config_prod.json', 'r') as fp:
    config = json.load(fp)
print(config)

# Connect to MongoDB

db_url = config['db_url']
db_name = config['db_name']
db_user = config['db_user']

import pymongo
import ssl
try:
    # Close previous connection
    if 'conn' in globals():
        conn.close()
        print("Closing connection")
    
    # Read from db_credentials.txt password required to connect to MongoDB.
    with open("db_credentials.txt", 'r') as f:
        [db_password] = f.read().splitlines()
    
    # Connect
    conn=pymongo.MongoClient("mongodb+srv://{}:{}@{}".format(db_user, db_password, db_url), ssl_cert_reqs=ssl.CERT_NONE)
    print ("Connected successfully to MongoDB")
    
except pymongo.errors.ConnectionFailure as e:
    print ("Could not connect to MongoDB: %s" % e) 
    
# Open database and collection
db = conn[db_name]
col_similarity = db['similarity']

# Read movie ids from DB into a dataframe
movie_list = list(col_similarity.find( {}, {'movieId':1, '_id':0} ))
print(len(movie_list))
movie_list[:5]

{'dir_data_raw': '../data/raw', 'dir_data_input': '../data/input', 'db_url': 'cluster0.egjki.mongodb.net', 'db_name': 'gmam', 'db_user': 'getmeamovie_rw'}
Connected successfully to MongoDB
10326


[{'movieId': 1},
 {'movieId': 2},
 {'movieId': 3},
 {'movieId': 4},
 {'movieId': 5}]

In [6]:
def get_similarity_row(movieId, db_connection, num_digits=2):
    '''Obtain similarities of selected movie (will give error if movieId does not exist)'''
    db = db_connection[db_name]
    col_similarity = db['similarity']
    simil_string = col_similarity.find({'movieId':movieId})[0]['similarities']
    simil_row = []
    for i in range(len(movie_list)):
        x_char = simil_string[i*num_digits : (i+1)*num_digits]
        simil_row.append(int(x_char)/10**num_digits)  # Convert to number between 0 and 1
    return simil_row

# Get the similarities for selected movies
simil_1=get_similarity_row(movieId=4963, db_connection=conn, num_digits=2)
simil_2=get_similarity_row(movieId=58559, db_connection=conn, num_digits=2)


In [7]:
# Match similarities with their movieId
df_simil = pd.concat((pd.DataFrame.from_dict(movie_list),
                      pd.DataFrame(data={'simil_1':simil_1, 'simil_2':simil_2})
                     ), axis=1)
df_simil.head()

Unnamed: 0,movieId,simil_1,simil_2
0,1,0.05,0.98
1,2,0.92,0.46
2,3,0.74,0.59
3,4,0.92,0.17
4,5,0.95,0.7
