In [41]:
import sys
sys.path.append('../../')
import os

import pandas as pd
from utils import fill_empty, VColumns
from dotenv import load_dotenv
load_dotenv()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load input file

In [42]:
df1 = pd.read_csv("v1-start.csv")

# Start elaborating columns

Create a new dataframe with empty values, so that we can start filling in values from the input dataframe, without overriding columns.

In [43]:
df1.head()

Unnamed: 0,type,name,size,vintage,winery,qty,purchase_price_eur,sales_price_eur
0,,Prosecco DOCG Extra Dry,,,La Tordera,156.0,6.96 €,28.0
1,,(Prosecco Valdobbiadene),,,,,,
2,,Prosecco DOCG Extra Dry Magnum,,,La Tordera,13.0,15.50 €,58.0
3,,(Prosecco Valdobbiadene),,,,,,
4,,Prosecco DOCG Extra Dry 3L,JEROBOAM,,La Tordera,3.0,49.95 €,120.0


In [44]:
# print to file df1 where qty and purchase_price_eur are empty
df1[df1['qty'].isna() | df1['purchase_price_eur'].isna()].to_csv("v5-insert-manual.csv", index=False)

In [35]:
# drop rows where qty and eur_price are empty
df1 = df1[df1['qty'].notna() & df1['purchase_price_eur'].notna()]
df1.head()

Unnamed: 0,type,name,size,vintage,winery,qty,purchase_price_eur,sales_price_eur
0,,Prosecco DOCG Extra Dry,,,La Tordera,156.0,6.96 €,28.0
2,,Prosecco DOCG Extra Dry Magnum,,,La Tordera,13.0,15.50 €,58.0
4,,Prosecco DOCG Extra Dry 3L,JEROBOAM,,La Tordera,3.0,49.95 €,120.0
6,,Lamm 12 Extra Brut - Südt. Sekt,,,Winkler Sektmanifaktur,15.0,28.50 €,54.0
7,,Haderburg Pas Dosè Millesimato – Südt. Sekt,,,Haderburg,3.0,25.50 €,62.0


In [36]:
# create new empty dataframe
df = pd.DataFrame(columns=VColumns.v2())
df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible


In [37]:
import re

# copy values from original columns to new columns
df['external_id'] = None
df['type'] = df1['type']
df['name'] = df1['name']
df['winery_name'] = df1['winery']
df["vintage"] = df1["vintage"]
df["quantity"] = df1["qty"]
df["storage_area"] = None
df["internal_notes"] = None
df["visible"] = True

df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,,Prosecco DOCG Extra Dry,La Tordera,,,,,,156.0,,,True
2,,,Prosecco DOCG Extra Dry Magnum,La Tordera,,,,,,13.0,,,True
4,,,Prosecco DOCG Extra Dry 3L,La Tordera,,,,,,3.0,,,True
6,,,Lamm 12 Extra Brut - Südt. Sekt,Winkler Sektmanifaktur,,,,,,15.0,,,True
7,,,Haderburg Pas Dosè Millesimato – Südt. Sekt,Haderburg,,,,,,3.0,,,True


In [38]:
# size
df['size'] = df1["size"].fillna("BOTTLE")

# fill empty prices with 0
df1['sales_price_eur'] = df1['sales_price_eur'].fillna(0)
df1['purchase_price_eur'] = df1['purchase_price_eur'].fillna(0)

# convert prices to cents
df['price'] = df1['sales_price_eur'].apply(lambda x: int(float(str(x).replace('€', ''))*100)) # convert to cents
df['purchase_price'] = df1['purchase_price_eur'].apply(lambda x: int(float(str(x).replace('€', ''))*100)) # convert to cents

df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,,Prosecco DOCG Extra Dry,La Tordera,,BOTTLE,,2800,696,156.0,,,True
2,,,Prosecco DOCG Extra Dry Magnum,La Tordera,,BOTTLE,,5800,1550,13.0,,,True
4,,,Prosecco DOCG Extra Dry 3L,La Tordera,,JEROBOAM,,12000,4995,3.0,,,True
6,,,Lamm 12 Extra Brut - Südt. Sekt,Winkler Sektmanifaktur,,BOTTLE,,5400,2850,15.0,,,True
7,,,Haderburg Pas Dosè Millesimato – Südt. Sekt,Haderburg,,BOTTLE,,6200,2550,3.0,,,True


In [39]:
# print to file df1 where quantity <= 0
df[df['quantity'] <= 0].to_csv("v1-manual-insert-qty.csv", index=False)


In [19]:
# discard rows with 0 qty
df = df[df["quantity"] > 0]

df = fill_empty(df, VColumns.v2(), False)
df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,,Prosecco DOCG Extra Dry,La Tordera,,BOTTLE,0,2800,696,156,,,True
2,,,Prosecco DOCG Extra Dry Magnum,La Tordera,,BOTTLE,0,5800,1550,13,,,True
4,,,Prosecco DOCG Extra Dry 3L,La Tordera,,JEROBOAM,0,12000,4995,3,,,True
6,,,Lamm 12 Extra Brut - Südt. Sekt,Winkler Sektmanifaktur,,BOTTLE,0,5400,2850,15,,,True
7,,,Haderburg Pas Dosè Millesimato – Südt. Sekt,Haderburg,,BOTTLE,0,6200,2550,3,,,True


# Merge files and write output

If there are multiple files, merge them into one and create single output file.

In [20]:
df.head()

df_out = pd.concat([df], ignore_index=True)
df_out.to_csv("v2-cleaned.csv", index=False)

bool()

False