In [None]:
import pickle

import pandas as pd
from getpass import getpass

from melitk.fda2 import inventory

from app.data.utils.sparksql import SparkSQL
from app.data.utils.load_query import load_format
from app.conf.settings import DEFAULT_PARAMS

# Access to the databases

In [None]:
melidata_user = "niacevedo"
melidata_pass = getpass()
spark = SparkSQL(user=melidata_user, password=melidata_pass)

# Data

## Read

In [None]:
month = '01'
day = '01'

query = f"""
SELECT COUNT(*) AS `N`
FROM advertising.adv_lake_raw
WHERE ds >= "2022-{month}-{day} 00" AND ds < "2022-{month}-{day} 01"
    AND `site` = 'MLA'
    AND `event` = 'display_prints'
    AND (
        LOWER(`device`.user_agent) NOT RLIKE '.*(libwww|wget|lwp|damnBot|bbbike|java|spider|crawl|slurp|bot|feedburner|googleimageproxy|google web preview).*'
    )
"""
df = spark.run_query(query)

In [None]:
df

In [None]:
filename = '../src/app/data/creatives_performance/performance.sql'
query = load_format(filename, DEFAULT_PARAMS)

In [None]:
df = spark.run_query(query)
df.head()

In [None]:
df.shape

In [None]:
df.sort_values(['cday', 'campaign_id', 'line_item_id', 'creative_id'], inplace = True)

## Processing

In [None]:
df = df[~pd.isnull(df['campaign_id'])]
df = df[~pd.isnull(df['line_item_id'])]
df = df[~pd.isnull(df['creative_id'])]

## Save

In [None]:
my_fury_new_token = 'dbe8df67db389e3155d6be40068da69e72413bb746d23bffe74762eb0fb2953b'
inventory.init(token = my_fury_new_token)

In [None]:
# serialize
data_bytes = pickle.dumps(df)

# Create fda artifact
artifact_name = 'clicks_prints_per_day'
version = '1.0.0'
artifact = inventory.create_artifact(artifact_name, version = version, type_ = 'fda.Bytes')
artifact.save_from_bytes(data = data_bytes)