In [None]:
import numpy as np
import pandas as pd
import subprocess
import argparse
import datetime
import requests
from requests.structures import CaseInsensitiveDict

API-key and Domain-ID which need to be copy/pasted from the Uptrendz platform:

In [None]:
API_KEY = '<INSERT YOUR API KEY>'
DOMAIN_ID = '<INSERT YOUR DOMAIN ID>'

### Download MovieLens user data

In [None]:
user_url = "http://files.grouplens.org/datasets/movielens/ml-100k/u.user"

colnames=['id', 'age', 'gender', 'occupation', 'zip'] 
user_df = pd.read_csv(user_url, sep='|', header=None, names=colnames)
user_df.head(1)

### Prepare user data to be in the appropriate format

Example request:
```json
 curl --location --request POST 'https://app.v2.uptrendz.ai/di/users/user'
--header 'x-api-key: <INSERT YOUR API KEY>'
--header 'x-domain-id: <INSERT YOUR DOMAIN ID>'
--header 'Content-Type: application/json'
--data-raw

'[{
    "zip": "some-zip-value",
    "occupation": "some-occupation-value",
    "gender": "some-gender-value",
    "id": "some-id-value",
    "age": 1
}]'
```

In [None]:
user_data = []

for idx, row in user_df.iterrows():
    # create json object to send
    user_data.append({
        "id": str(row.id),
        "age": row.age,
        "gender": row.gender,
        "occupation": row.occupation,
        "zip": row.zip
    })
print("Prepared " + str(len(user_data)) + " users")

### Send user data to the Uptrendz platform

Use the appropriate API key and Domain Id which is displayed in the platform

In [None]:
headers = {
    'x-api-key': API_KEY,
    'x-domain-id': DOMAIN_ID,
    'Content-Type': 'application/json'
}

response = requests.post('https://app.v2.uptrendz.ai/di/users/user', headers=headers, json=user_data)
response.text

### Download MovieLens item data and clean it up

Aggregate genres to have a list of genres for each movie <br/>
Remove unused columns (i.e., which have not been defined in the Uptrendz platform)

In [None]:
item_url = "http://files.grouplens.org/datasets/movielens/ml-100k/u.item"

genres = ["unknown", "action", "adventure", "animation", "children", "comedy", "crime", "documentary", "drama", 
          "fantasy", "noir", "horror", "musical", "mystery", "romance", "scifi", "thriller", "war", "western"] 
colnames=['id', 'title', 'release_date', 'video_release_date', 'imdb_url'] 
colnames.extend(genres)

item_df = pd.read_csv(item_url, sep='|', header=None, names=colnames, encoding='latin-1')


# merge genres into a list within 1 column
for col in item_df[genres]:
    item_df[col]=item_df[col].replace(1, item_df[col].name)
    item_df['genres'] = [[e for e in row if e!=0] for row in item_df[genres].values.tolist()]

# let's just keep the item id, title, release date and newly created list of genres
item_df = item_df.drop(genres, axis=1)
item_df = item_df.drop(["video_release_date","imdb_url"], axis=1)

item_df.head(2)

### Prepare item data to be in the appropriate format

Date values need to be in the *%Y-%m-%dT%H:%M:%SZ* <br/>
Id needs to be of type string

Example request:
```json
 curl --location --request POST 'https://app.v2.uptrendz.ai/di/items/movie'
--header 'x-api-key: <INSERT YOUR API KEY>'
--header 'x-domain-id: <INSERT YOUR DOMAIN ID>'
--header 'Content-Type: application/json'
--data-raw

'[{
    "release_date": "2022-10-09T15:41:38Z",
    "genres": ["some-genres-value"],
    "id": "some-id-value",
    "title": "some-title-value"
}]'
```

In [None]:
default_date = "01-Jan-1995"
item_data = []

for idx, row in item_df.iterrows():
    # handle NaN values
    if row.release_date is np.nan:
        row.release_date = default_date
    # create json object to send
    item_data.append({
        # convert data string to timestamp and convert to appropriate format for the API
        "release_date": datetime.datetime.strptime(row.release_date, "%d-%b-%Y").strftime("%Y-%m-%dT%H:%M:%SZ"),
        "genres": row.genres,
        "id": str(row.id),
        "title": row.title
    })
print("Prepared " + str(len(item_data)) + " items")

### Send item data to the Uptrendz platform

Use the appropriate API key and Domain Id which is displayed in the platform

In [None]:
headers = {
    'x-api-key': API_KEY,
    'x-domain-id': DOMAIN_ID,
    'Content-Type': 'application/json'
}

response = requests.post('https://app.v2.uptrendz.ai/di/items/movie', headers=headers, json=item_data)
response.text

### Download MovieLens interaction data

Convert timestamp to date

In [None]:
interaction_url = "http://files.grouplens.org/datasets/movielens/ml-100k/u.data"

colnames=['u_id', 'i_id', 'rating', 'timestamp'] 
interaction_df = pd.read_csv(interaction_url, sep='\t', header=None, names=colnames)
    
interaction_df['timestamp'] = interaction_df['timestamp'].apply(lambda x: pd.to_datetime(x, unit='s', origin='unix') )

interaction_df.head(1)

### Prepare interaction data to be in the appropriate format

Date values need to be in the *%Y-%m-%dT%H:%M:%SZ* <br/>
User and item ids needs to be of type string

Example request:
```json
 curl --location --request POST 'https://app.v2.uptrendz.ai/di/interactions'
--header 'x-api-key: <INSERT YOUR API KEY>'
--header 'x-domain-id: <INSERT YOUR DOMAIN ID>'
--header 'Content-Type: application/json'
--data-raw

'[{
    "item_id": "some-item-id",
    "user_id": "some-user-id",
    "type": "rating",
    "value": 1,
    "timestamp": "2022-10-09T15:41:38Z"
}]'
```

In [None]:
interaction_data = []

for idx, row in interaction_df.iterrows():
    # create json object to send
    interaction_data.append({
        # convert data string to timestamp and convert to appropriate format for the API
        "timestamp": row.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
        "user_id": str(row.u_id),
        "item_id": str(row.i_id),
        "type": "rating", # needs to have the value as defined in the dashboard
        "value": row.rating
    })
print("Prepared " + str(len(interaction_data)) + " interactions")

### Send interaction data to the Uptrendz platform

Use the appropriate API key and Domain Id which is displayed in the platform

* Note, we can submit a maximum of 40,000 interactions per request

In [None]:
headers = {
    'x-api-key': API_KEY,
    'x-domain-id': DOMAIN_ID,
    'Content-Type': 'application/json'
}

# let's divide interaction data into chunks of 5,000 interactions
batch_size = 5000
chunks = [interaction_data[x:x+batch_size] for x in range(0, len(interaction_data), batch_size)]

for chunk in chunks:
    response = requests.post('https://app.v2.uptrendz.ai/di/interactions', headers=headers, json=chunk)
    print(response.text)