In [29]:
import re, os, json

def label_to_price_per_kg(label):
    regex = r'Euro (\d+,\d+) \/ ([a-z]+)'
    match = re.match(regex, label)
    
    if not match:
        return None
    
    groups = list(match.groups())
    price_per_kg = float(groups[0].replace(',', '.'))
    
    # Adjust price per kg if necessary
    if groups[1] == 'g' or groups[1] == 'ml':
        price_per_kg *= 1000
    elif groups[1] == 'hg':
        price_per_kg *= 10
    elif groups[1] == 'pz':
        price_per_kg = None
    
    return price_per_kg

def parse_har(har_filename):
    # Parse har as json
    with open('har/{}'.format(har_filename)) as f:
        js = json.loads(f.read())
    
    # Iterate over responses and get all the entities
    entities = []
    for entry in js['log']['entries']:
        response = json.loads(entry['response']['content']['text'])
        entities.extend(response['entities'])
        
    for entity in entities:
        # Strip description
        entity['description'] = entity['description'].strip()
        
        # Calculate price per kg by parsing the label
        entity['price_per_kg'] = label_to_price_per_kg(entity['label'])
        
        # Add qty
        entity['qty'] = ' '.join([entity['unitValue'], entity['unitText']])
        
        # Add category
        entity['category'] = har_filename[:-4]

        # Only keep products with price per kg
    entities = list(filter(lambda entity: entity['price_per_kg'], entities))
    
    # Keep only interesting keys
    keys_to_keep = ['description', 'category', 'price', 'qty', 'price_per_kg']
    
    # Create dictionary with product id as key
    entities = { entity['id'] : { k: entity[k] for k in keys_to_keep } for entity in entities }
    
    return entities

In [31]:
# Database creation

entities = {}
limit = 100
for har_filename in next(os.walk('har'))[2]:
    entities.update(parse_har(har_filename))

# Print
for elem, index in list(zip(entities, range(limit))): 
    print(entities[elem])

{'description': 'Esselunga, suino salsiccia', 'category': 'carne', 'price': 7.49, 'qty': '1 kg', 'price_per_kg': 7.49}
{'description': 'Esselunga, macinata mista di bovino e suino', 'category': 'carne', 'price': 7.59, 'qty': '1 kg', 'price_per_kg': 7.59}
{'description': 'Esselunga Naturama, petto di pollo a fette allevato senza uso di antibiotici', 'category': 'carne', 'price': 12.49, 'qty': '1 kg', 'price_per_kg': 9.99}
{'description': 'Esselunga, vitello fegato a fette', 'category': 'carne', 'price': 17.14, 'qty': '1 kg', 'price_per_kg': 11.99}
{'description': 'Esselunga Naturama, petto di pollo fettine sottili allevato senza uso di antibiotici', 'category': 'carne', 'price': 14.2, 'qty': '1 kg', 'price_per_kg': 14.2}
{'description': 'Esselunga Naturama, mini filetti di pollo allevato senza uso di antibiotici', 'category': 'carne', 'price': 10.99, 'qty': '1 kg', 'price_per_kg': 10.99}
{'description': 'Esselunga Naturama, sovracosce di pollo senza pelle allevato senza uso di antibioti