In [86]:
import sys
sys.path.append('../../')
import os

import pandas as pd
from utils import fill_empty, VColumns
from dotenv import load_dotenv
load_dotenv()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load input file

In [87]:
dfraw = pd.read_csv("v1-start.csv")

# Start elaborating columns

Create a new dataframe with empty values, so that we can start filling in values from the input dataframe, without overriding columns.

In [88]:
dfraw.head()

Unnamed: 0,external_id,type,name_1,name_2,name_3,internal_notes,vintage,info,bottle_price,magnum_price,demi_price,bottle_qty,magnum_qty,demi_qty,purchase_price,to_archive
0,,,Passito,Château d´Yquem,,Sauternes,2004.0,,,,,2.0,,,,
1,,,Passito,Château d´Yquem,,Sauternes,2001.0,,,,,4.0,,,,
2,,,Passito,Cashmere Gewürztraminer,Weingut Elena Walch,PASSITO,2020.0,,19.00 €,,,0.0,,,,1.0
3,,,Passito,Ornus dell Ornelaia,Petit Manseng,PASSITO,2016.0,,,110.00 €,,26.0,,,,
4,,,Passito,Ornus dell Ornelaia,Petit Manseng,PASSITO,2015.0,,,,,4.0,,,,


In [89]:
# Create a new dataframe to store the expanded rows
expanded_rows = []

# Iterate through each row in the original dataframe
for _, row in dfraw.iterrows():
    # Add the original row
    base_row = {
        'external_id': row['external_id'],
        'type': row['type'],
        'full_name': ('' if pd.isna(row['name_1']) else str(row['name_1'])) + ' ' + 
                    ('' if pd.isna(row['name_2']) else str(row['name_2'])) + ' ' + 
                    ('' if pd.isna(row['name_3']) else str(row['name_3'])),
        'vintage': row['vintage'],
        'info': row['info'],
        'internal_notes': row['internal_notes'],
        'purchase_price': row['purchase_price']
    }
    
    # Check each bottle size and add new rows if quantity exists
    bottle_qty = 0 if pd.isna(row['bottle_qty']) else float(row['bottle_qty'])
    if bottle_qty > 0:
        bottle_row = base_row.copy()
        bottle_row['size'] = 'BOTTLE'
        bottle_row['price'] = row['bottle_price']
        bottle_row['quantity'] = bottle_qty
        expanded_rows.append(bottle_row)
    elif pd.notna(row['to_archive']) and float(row['to_archive']) == 1:
        bottle_row = base_row.copy()
        bottle_row['size'] = 'BOTTLE'
        bottle_row['price'] = row['bottle_price']
        bottle_row['quantity'] = 0
        expanded_rows.append(bottle_row)

        
    magnum_qty = 0 if pd.isna(row['magnum_qty']) else float(row['magnum_qty'])
    if magnum_qty > 0:
        magnum_row = base_row.copy()
        magnum_row['size'] = 'MAGNUM'
        magnum_row['price'] = row['magnum_price']
        magnum_row['quantity'] = magnum_qty
        expanded_rows.append(magnum_row)
    elif pd.notna(row['to_archive']) and float(row['to_archive']) == 1:
        magnum_row = base_row.copy()
        magnum_row['size'] = 'MAGNUM'
        magnum_row['price'] = row['magnum_price']
        magnum_row['quantity'] = 0
        expanded_rows.append(magnum_row)
        
    demi_qty = 0 if pd.isna(row['demi_qty']) else float(row['demi_qty'])
    if demi_qty > 0:
        demi_row = base_row.copy()
        demi_row['size'] = 'HALF_BOTTLE'
        demi_row['price'] = row['demi_price']
        demi_row['quantity'] = demi_qty
        expanded_rows.append(demi_row)
    elif pd.notna(row['to_archive']) and float(row['to_archive']) == 1:
        demi_row = base_row.copy()
        demi_row['size'] = 'HALF_BOTTLE'
        demi_row['price'] = row['demi_price']
        demi_row['quantity'] = 0
        expanded_rows.append(demi_row)

# Create the new dataframe with expanded rows
df1 = pd.DataFrame(expanded_rows)

df1.head(26)

Unnamed: 0,external_id,type,full_name,vintage,info,internal_notes,purchase_price,size,price,quantity
0,,,Passito Château d´Yquem,2004.0,,Sauternes,,BOTTLE,,2.0
1,,,Passito Château d´Yquem,2001.0,,Sauternes,,BOTTLE,,4.0
2,,,Passito Cashmere Gewürztraminer Weingut Elena...,2020.0,,PASSITO,,BOTTLE,19.00 €,0.0
3,,,Passito Cashmere Gewürztraminer Weingut Elena...,2020.0,,PASSITO,,MAGNUM,,0.0
4,,,Passito Cashmere Gewürztraminer Weingut Elena...,2020.0,,PASSITO,,HALF_BOTTLE,,0.0
5,,,Passito Ornus dell Ornelaia Petit Manseng,2016.0,,PASSITO,,BOTTLE,,26.0
6,,,Passito Ornus dell Ornelaia Petit Manseng,2015.0,,PASSITO,,BOTTLE,,4.0
7,,,Passito Ornus dell Ornelaia Petit Manseng,2011.0,,PASSITO,,BOTTLE,19.00 €,4.0
8,,,Passito Ornus dell Ornelaia Petit Manseng,2010.0,,PASSITO,,BOTTLE,,3.0
9,,,Passito Ornus dell Ornelaia Petit Manseng,2009.0,,PASSITO,,BOTTLE,,1.0


In [90]:
# create new empty dataframe
df = pd.DataFrame(columns=VColumns.v2())
df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible


In [91]:
# copy values from original columns to new columns
df['external_id'] = None
df['type'] = df1['type']
df['name'] = df1['full_name'].replace(' nan ', '')
df['winery_name'] = None
df["size"] = df1["size"]
df["vintage"] = df1["vintage"]
df["quantity"] = df1["quantity"]
df["storage_area"] = None
df["info"] = df1["info"]
df["internal_notes"] = df1["internal_notes"]
df["visible"] = True

df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,,Passito Château d´Yquem,,,BOTTLE,2004.0,,,2.0,,Sauternes,True
1,,,Passito Château d´Yquem,,,BOTTLE,2001.0,,,4.0,,Sauternes,True
2,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,BOTTLE,2020.0,,,0.0,,PASSITO,True
3,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,MAGNUM,2020.0,,,0.0,,PASSITO,True
4,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,HALF_BOTTLE,2020.0,,,0.0,,PASSITO,True


In [92]:
df1['price'] = df1['price'].fillna(0)
df['price'] = df1['price'].apply(lambda x: int(float(str(x).replace('€', '').replace(',', ''))*100)) # convert to cents

df1['purchase_price'] = df1['purchase_price'].fillna(0)
df['purchase_price'] = df1['purchase_price'].apply(lambda x: int(float(str(x).replace('€', '').replace(',', ''))*100)) # convert to cents

df.head(12)

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,,Passito Château d´Yquem,,,BOTTLE,2004.0,0,0,2.0,,Sauternes,True
1,,,Passito Château d´Yquem,,,BOTTLE,2001.0,0,0,4.0,,Sauternes,True
2,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,BOTTLE,2020.0,1900,0,0.0,,PASSITO,True
3,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,MAGNUM,2020.0,0,0,0.0,,PASSITO,True
4,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,HALF_BOTTLE,2020.0,0,0,0.0,,PASSITO,True
5,,,Passito Ornus dell Ornelaia Petit Manseng,,,BOTTLE,2016.0,0,0,26.0,,PASSITO,True
6,,,Passito Ornus dell Ornelaia Petit Manseng,,,BOTTLE,2015.0,0,0,4.0,,PASSITO,True
7,,,Passito Ornus dell Ornelaia Petit Manseng,,,BOTTLE,2011.0,1900,0,4.0,,PASSITO,True
8,,,Passito Ornus dell Ornelaia Petit Manseng,,,BOTTLE,2010.0,0,0,3.0,,PASSITO,True
9,,,Passito Ornus dell Ornelaia Petit Manseng,,,BOTTLE,2009.0,0,0,1.0,,PASSITO,True


In [93]:
df = fill_empty(df, VColumns.v2(), False)
df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,,Passito Château d´Yquem,,,BOTTLE,2004,0,0,2,,Sauternes,True
1,,,Passito Château d´Yquem,,,BOTTLE,2001,0,0,4,,Sauternes,True
2,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,BOTTLE,2020,1900,0,0,,PASSITO,True
3,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,MAGNUM,2020,0,0,0,,PASSITO,True
4,,,Passito Cashmere Gewürztraminer Weingut Elena...,,,HALF_BOTTLE,2020,0,0,0,,PASSITO,True


# Merge files and write output

If there are multiple files, merge them into one and create single output file.

In [94]:
df.head()

df_out = pd.concat([df], ignore_index=True)
df_out.to_csv("v2-cleaned.csv", index=False)

bool()

False