In [74]:
import pandas as pd
from pathlib import Path
import requests

In [75]:
# Set the path to the datasets directory
datasets_location = Path("__file__").parent / "data"
data1 = "customers.csv"
data2 = "purchases.csv"

# Check if the datasets exists
if not ((datasets_location / data1 ).exists() or (datasets_location / data2 ).exists()):
    print(f"please include both {data1} and {data2} files in the data folder.")
    exit(1)

# Load the datasets
customer_df = pd.read_csv(datasets_location / data1, delimiter=";")
purchases_df = pd.read_csv(datasets_location / data2, delimiter=";")

In [76]:
customer_df

Unnamed: 0,customer_id,title,lastname,firstname,postal_code,city,email
0,1,2,Norris,Chuck,83600.0,Fréjus,chuck@norris.com
1,2,1,Galante,Marie,,,marie-galante@france.fr
2,3,2,Barbier,Christophe,75009.0,Paris,christophe@fake.email
3,4,1,,,,,
4,5,2,Dupont,Eric,21000.0,Dijon,eric.dupont@bourgogne.fr


In [77]:
purchases_df

Unnamed: 0,purchase_identifier,customer_id,product_id,quantity,price,currency,date
0,2/01,2,1221,1,10,EUR,2017-12-31
1,1/01,1,4324,1,10,EUR,2030-12-31
2,3/01,3,75672,1,10,USD,2050-12-31
3,3/02,3,2123,1,10,EUR,2017-08-01
4,2/02,2,3213,1,10,EUR,2030-12-31


In [78]:
customer_df = customer_df.assign(
    civility = lambda x: x['title'].map({1: 'Male', 2: 'Female'}).fillna(''),
    salutation = lambda x: x['title'].map({1: 'Mr', 2: 'Mrs'}).fillna(''),
)
customer_df

Unnamed: 0,customer_id,title,lastname,firstname,postal_code,city,email,civility,salutation
0,1,2,Norris,Chuck,83600.0,Fréjus,chuck@norris.com,Female,Mrs
1,2,1,Galante,Marie,,,marie-galante@france.fr,Male,Mr
2,3,2,Barbier,Christophe,75009.0,Paris,christophe@fake.email,Female,Mrs
3,4,1,,,,,,Male,Mr
4,5,2,Dupont,Eric,21000.0,Dijon,eric.dupont@bourgogne.fr,Female,Mrs


In [79]:
purchases_df = purchases_df.assign(
    currency = lambda x: x['currency'].map({'EUR': 'euro', 'USD': 'dollars'}))

In [80]:
joined = pd.merge(customer_df, purchases_df, on='customer_id', how='right')
joined

Unnamed: 0,customer_id,title,lastname,firstname,postal_code,city,email,civility,salutation,purchase_identifier,product_id,quantity,price,currency,date
0,2,1,Galante,Marie,,,marie-galante@france.fr,Male,Mr,2/01,1221,1,10,euro,2017-12-31
1,1,2,Norris,Chuck,83600.0,Fréjus,chuck@norris.com,Female,Mrs,1/01,4324,1,10,euro,2030-12-31
2,3,2,Barbier,Christophe,75009.0,Paris,christophe@fake.email,Female,Mrs,3/01,75672,1,10,dollars,2050-12-31
3,3,2,Barbier,Christophe,75009.0,Paris,christophe@fake.email,Female,Mrs,3/02,2123,1,10,euro,2017-08-01
4,2,1,Galante,Marie,,,marie-galante@france.fr,Male,Mr,2/02,3213,1,10,euro,2030-12-31


In [81]:
def purchased_objects(data):
    return data.apply(lambda x: {'product_id': x['product_id'], 'price': x['price'], 'currency': x['currency'], 'quantity': x['quantity'], 'purchased_at': x['date']},axis=1).tolist()

json = joined.groupby(['customer_id', 'salutation', 'lastname', 'firstname', 'email']).apply(purchased_objects, include_groups=False).reset_index(name='purchases')


In [82]:
payload = json.to_dict(orient='records')

for customer in payload:
    del customer['customer_id']

payload

[{'salutation': 'Mrs',
  'lastname': 'Norris',
  'firstname': 'Chuck',
  'email': 'chuck@norris.com',
  'purchases': [{'product_id': 4324,
    'price': 10,
    'currency': 'euro',
    'quantity': 1,
    'purchased_at': '2030-12-31'}]},
 {'salutation': 'Mr',
  'lastname': 'Galante',
  'firstname': 'Marie',
  'email': 'marie-galante@france.fr',
  'purchases': [{'product_id': 1221,
    'price': 10,
    'currency': 'euro',
    'quantity': 1,
    'purchased_at': '2017-12-31'},
   {'product_id': 3213,
    'price': 10,
    'currency': 'euro',
    'quantity': 1,
    'purchased_at': '2030-12-31'}]},
 {'salutation': 'Mrs',
  'lastname': 'Barbier',
  'firstname': 'Christophe',
  'email': 'christophe@fake.email',
  'purchases': [{'product_id': 75672,
    'price': 10,
    'currency': 'dollars',
    'quantity': 1,
    'purchased_at': '2050-12-31'},
   {'product_id': 2123,
    'price': 10,
    'currency': 'euro',
    'quantity': 1,
    'purchased_at': '2017-08-01'}]}]