# MovieLens: Carga en base de datos

In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
import sys
sys.path.append('../../src')

In [8]:
import numpy as np
import pandas as pd
import mysql.connector
import logging

In [9]:
DATASET_PATH = '../../datasets'

## Funciones

In [2]:
def execute(connection, query):
    try:
        cursor = connection.cursor()
        cursor.execute(query)
        connection.commit()
    except Exception as error:
        print(error)
        print(f'Query: {query}')


def insert_item(connection, row):
    query = """
        INSERT INTO 
            recsys.recsysweb_item(id, name, description, image)
        VALUES (
            :ID,
            ":NAME",
            ":DESC",
            ":IMAGE"
        );    
        """.replace(':ID', str(row['id'])) \
            .replace(':NAME', str(row['name'])) \
            .replace(':DESC', str(row['description']).replace('"', '')) \
            .replace(':IMAGE', str(row['image']))
    execute(connection, query)
        
def insert_interaction(connection, row):
    query = """
        INSERT INTO 
            recsys.recsysweb_interaction(item_id, user_id, rating)
        VALUES (
            ":ITEM_ID",
            ":USER_ID",
            ":RATING"
        );    
        """.replace(':ITEM_ID', str(int(row['item_id']))) \
            .replace(':USER_ID', str(int(row['user_id']))) \
            .replace(':RATING', str(row['rating']))
    execute(connection, query)

## Procedimiento

**Step 1**: Cargamos los datasets.

In [5]:
movies      = pd.read_csv(f'{DATASET_PATH}/items.csv')
interactions = pd.read_csv(f'{DATASET_PATH}/interactions.csv')

In [6]:
movies.head(3)

Unnamed: 0,id,name,description,image
0,1,Toy Story (1995),"Led by Woody, Andy's toys live happily in his ...",https://image.tmdb.org/t/p/w500//uXDfjJbdP4ijW...
1,10,GoldenEye (1995),James Bond must unmask the mysterious head of ...,https://image.tmdb.org/t/p/w500//bFzjdy6ucvNlX...
2,11,"American President, The (1995)","Widowed U.S. president Andrew Shepherd, one of...",https://image.tmdb.org/t/p/w500//yObOAYFIHXHkF...
3,14,Nixon (1995),An all-star cast powers this epic look at Amer...,https://image.tmdb.org/t/p/w500//ORlTkdDwWU94O...
4,15,Cutthroat Island (1995),"Morgan Adams and her slave, William Shaw, are ...",https://image.tmdb.org/t/p/w500//hYdeBZ4BFXivd...


In [5]:
interactions.head(3)

NameError: name 'interactions' is not defined

**Step 2**: Creamos una conexión a la base de datos.

In [8]:
connection = mysql.connector.connect(
  host     = "localhost",
  user     = "root",
  password = "1234"
)

**Step 3**: Borramos la tabla de items para cargar la nueva version de items desde el dataframe de items.

In [10]:
execute(connection, 'ALTER TABLE recsys.recsysweb_item MODIFY id int(11) NOT NULL')
execute(connection, 'DELETE FROM recsys.recsysweb_item')

**Step 4**: Se insertan los nuevos items en la tabla `recsys.recsysweb_item`.

In [11]:
for _, row in movies.iterrows(): insert_item(connection, row)    

**Step 5**: Se realiza el mismo procedimiento con la tabla de interacciones (`recsys.recsysweb_interaction`). Se borra la tabla y se vuelve a insertar todas las interacciones.

In [12]:
execute(connection, 'DELETE FROM recsys.recsysweb_interaction')

In [13]:
for _, row in interactions.iterrows(): insert_interaction(connection, row)

**Step 6**: Se cierra la conexión con la base de datos.

In [14]:
connection.close()

**Step 7**: Se consulta la cantidad de usuario en el dataset de interacciones. 

In [15]:
interactions.user_id.max()