In [162]:
%pip install aiohttp

Note: you may need to restart the kernel to use updated packages.


In [None]:
import asyncio
import aiohttp
from datetime import datetime
import json
import sqlite3
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String
import urllib
from pandas.io import sql
from datetime import datetime

# Performance Testing

In [1]:
class ConfigReader:
    def get_value(self, key):
        with open('config.json') as json_data_file:
            data = json.load(json_data_file)
        return data[key]

In [4]:
class DbManager:
    def __init__(self):
        reader = ConfigReader()
        db_connection = reader.get_value("db_connection")        
        self.conn_string = '{db_engine}{connector}://{user}:{password}@{server}/{database}'.format(
            db_engine=db_connection['db_engine'],
            connector=db_connection['connector'],
            user=db_connection['user'],
            password=db_connection['password'],
            server=db_connection['server'],
            database=db_connection['database'])
 
    def get_ratings(self):
        return sql.read_sql("SELECT userId, itemId, rating, timestamp FROM ratings;", create_engine(self.conn_string))

    def get_movies(self, term):
        return sql.read_sql("SELECT movieId, title, genres FROM movies WHERE title LIKE '%{term}%';".format(term=term), create_engine(self.conn_string))

    def get_movie(self, id):
        return sql.read_sql("SELECT title, genres FROM movies WHERE movieId = {id};".format(id=id), create_engine(self.conn_string))

    def get_links(self):
        return sql.read_sql("SELECT movieId, imdbId, tmdbId FROM links;", create_engine(self.conn_string))

    def get_users(self):
        return sql.read_sql("SELECT distinct userId FROM ratings;", create_engine(self.conn_string))
    
    def insert_rating(self, user_id, movie_id, rating_value):        
        sql.execute("INSERT INTO ratings VALUES ({userId}, {itemId}, {rating}, '{timestamp}')".format(
                    userId=user_id,
                    itemId=movie_id,
                    rating=rating_value,
                    timestamp=datetime.timestamp(datetime.now()),
                    ), create_engine(self.conn_string))

    def remove_ratings(self, user_id):
        sql.execute("DELETE FROM ratings WHERE userId = {userId}".format(
                    userId=user_id), create_engine(self.conn_string))

    def insert_and_get_min_user_id(self):
        db_list = sql.read_sql("SELECT MIN(userId) as userId FROM users;", create_engine(self.conn_string))
        #min_user_id = [m[1]['userId'] for m in db_list.iterrows()][0]
        min_user_id = db_list.iloc[0]['userId']
        user_id = int(min_user_id - 1)
        sql.execute("INSERT INTO users(userId) VALUES ({userId})".format(userId=user_id), create_engine(self.conn_string))
        return user_id
    

## Get random users

In [5]:
dbManager = DbManager()
db_users = dbManager.get_users()
n_users = len(db_users)
n_users

162541

In [204]:
n_rand_users = 1000
n_rand_users = db_users.sample(n=n_rand_users)
n_rand_users.head()

Unnamed: 0,userId
122370,122371
96646,96647
11476,11477
134814,134815
146974,146975


## Test recommendation endpoint

In [195]:
base_url = 'http://127.0.0.1:8000'
algo_rec = 'popular'
algo_pred = 'biasedmf'
n_recs = 5
items = "10,20,30,40,50"

In [196]:
async def get_recs():
    times = []
    num_requests = len(n_rand_users)
    print(f'Number of requests: {num_requests}')
    start_recs = datetime.now()
    for idx, row in n_rand_users.iterrows():
        start = datetime.now()
        await get_user_results(row['userId'], n_recs, algo_rec, None)
        time_taken = datetime.now() - start
        times.append(time_taken)
#         print(f'user id: {row["userId"]}')
        print(f'Response time: {time_taken}')
#         print()
    time_taken_all = datetime.now() - start_recs
    print(f'Total response time: {time_taken_all}')
    print(f'Average load time: {time_taken_all / num_requests}')
    print(f'Requests per second: {num_requests / time_taken_all.total_seconds()}')
    print(f'Peak response time: {max(times)}')

async def get_preds():
    times = []
    start_preds = datetime.now()
    num_requests = len(n_rand_users)
    print(f'Number of requests: {num_requests}')    
    for idx, row in n_rand_users.iterrows():
        start = datetime.now()
        await get_user_results(row['userId'], None, algo_pred, items)
        time_taken = datetime.now() - start
        times.append(time_taken)
#         print(f'user id: {row["userId"]}')
        print(f'Response time: {time_taken}')
#         print()    
    time_taken_all = datetime.now() - start_preds
    print(f'Total response time: {time_taken_all}')
    print(f'Average load time: {time_taken_all / num_requests}')
    print(f'Requests per second: {num_requests / time_taken_all.total_seconds()}')
    print(f'Peak response time: {max(times)}')
    
async def get_user_results(userId, nr_recs, algo, items):
    is_a_rec_request = True if algo == 'popular' or algo == 'topn' else False
#     recs = []
    if is_a_rec_request:
        url = f'{base_url}/algorithms/{algo}/recommendations?user_id={userId}&num_recs={nr_recs}'
    else:
        url = f'{base_url}/algorithms/{algo}/predictions?user_id={userId}&items={items}'
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            data = await resp.json()
#             recs.append({'user': userId, 'recs': data['recommendations'] if is_a_rec_request else data['predictions']})
#             return recs

In [205]:
print('Predictions results')
loop = asyncio.get_event_loop()
await loop.create_task(get_preds())

Predictions results
Number of requests: 1000
Total response time: 0:01:10.849874
Average load time: 0:00:00.070850
Requests per second: 14.114351141965333
Peak response time: 0:00:00.250720


In [206]:
print('Recomendation results')
loop = asyncio.get_event_loop()
await loop.create_task(get_recs())

Recomendation results
Number of requests: 1000
Total response time: 0:01:15.281620
Average load time: 0:00:00.075282
Requests per second: 13.283454845950445
Peak response time: 0:00:00.242544
