In [1]:
import sys
sys.path.append('../')

In [2]:
import json

import pandas as pd

from app.models import *

In [3]:
def extract_feature_value(feature_name: str):
    def mapper(features):
        for f in features:
            if f['name'] == feature_name:
                return f['value']
        return None
    return mapper

def extract_primary_ean(ean_list):
    for obj in ean_list:
        if obj['variant_id'] is None:
            return obj['ean']
    return len(ean_list)

In [4]:
db_auth_path = '../config/secrets/arangodb.json'
repo = Repository(db_auth_path=db_auth_path)

In [5]:
df_products = pd.DataFrame(repo.get_all_products())

In [6]:
df_products.shape

(7969, 31)

In [7]:
# Create a new products DataFrame where JSON is flattened
columns = [
    'id', 'supplier_id', 'supplier_name', 'category_id', 'category_name', 
    'title', 'model_name', 'description_short', 'description_middle', 'description_long',
    'summary_short', 'summary_long', 'warranty',
    'is_limited', 'on_market', 'quality', 
    'url_details', 'url_manual', 'url_pdf', 
    'created_at', 'updated_at', 'released_on', 'end_of_life_on'
]

df_products_flattened = df_products[columns].copy()

In [8]:
# Flatten simple lists
df_products_flattened['ean'] = df_products.ean.map(extract_primary_ean)
df_products_flattened['n_variants'] = df_products.variants.map(lambda r: len(r))
df_products_flattened['countries'] = df_products.country_markets.map(lambda r: '' if len(r) == 0 else ','.join(sorted(r)))

In [9]:
# Flatten product features
aql = "FOR p IN products FOR f IN p.features COLLECT name = f.name  RETURN name"
result = repo._db.AQLQuery(aql, rawResults=True)
feature_names = list(result)
for feature_name in feature_names:
    df_products_flattened[feature_name] = df_products.features.map(extract_feature_value(feature_name))

In [10]:
df_products_flattened.shape

(7969, 1230)

In [11]:
df_products_flattened.to_csv('../data/ice-cat-office-products.csv.gz',)