In [31]:
import sys
sys.path.append('../../')
import os

import pandas as pd
from utils import fill_empty, VColumns
from dotenv import load_dotenv
load_dotenv()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load input file

In [32]:
df1 = pd.read_csv("v1-start.csv")

# Start elaborating columns

Create a new dataframe with empty values, so that we can start filling in values from the input dataframe, without overriding columns.

In [33]:
df1.head()

Unnamed: 0,type,winery,name,size,vintage,supplier,price_eur,qty,Storage_1,Storage_2,Storage_3,purchase_price_inclvat
0,RED,Agricoltori del Chianti Geografico,Chianti Classico,,1971.0,,,1.0,Collezione,,,1 €
1,RED,Allegrini,Amarone della Valpolicella Classico docg,,2017.0,,150 €,4.0,s2 - legno,,,76 €
2,RED,Allegrini,Amarone della Valpolicella Classico docg,,2019.0,,120 €,5.0,s2 - legno,,,53 €
3,RED,Allegrini,La Grola igt Rosso Veronese,,2018.0,,,,,,,
4,RED,Allegrini,La Grola igt Rosso Veronese,,2019.0,,45 €,17.0,s4,bar005,,18 €


In [34]:
# create new empty dataframe
df = pd.DataFrame(columns=VColumns.v2())
df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible


In [35]:
# copy values from original columns to new columns
df['external_id'] = None
df['type'] = df1['type']
df['name'] = df1['name']
df['winery_name'] = df1['winery']
df["size"] = df1["size"]
df["vintage"] = df1["vintage"]
df["quantity"] = df1["qty"]
df["storage_area"] = df1.apply(lambda x: str(x["Storage_1"]) + ("+" + str(x["Storage_2"]) if pd.notna(x["Storage_2"]) else "") + ("+" + str(x["Storage_3"]) if pd.notna(x["Storage_3"]) else ""), axis=1)
df["info"] = None
df["internal_notes"] = df1.apply(lambda x: "Fornitore: " + x["supplier"] if pd.notna(x["supplier"]) else None, axis=1)
df["visible"] = True

df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible
0,,RED,Chianti Classico,Agricoltori del Chianti Geografico,,,1971.0,,,1.0,Collezione,,True
1,,RED,Amarone della Valpolicella Classico docg,Allegrini,,,2017.0,,,4.0,s2 - legno,,True
2,,RED,Amarone della Valpolicella Classico docg,Allegrini,,,2019.0,,,5.0,s2 - legno,,True
3,,RED,La Grola igt Rosso Veronese,Allegrini,,,2018.0,,,,,,True
4,,RED,La Grola igt Rosso Veronese,Allegrini,,,2019.0,,,17.0,s4+bar005,,True


In [36]:
# fill size with BOTTLE
df['size'] = df['size'].fillna('BOTTLE')

In [37]:
df1['price_eur'] = df1['price_eur'].fillna(0)
df['price'] = df1['price_eur'].apply(lambda x: int(float(str(x).replace('€', '').replace(',', ''))*100)) # convert to cents

df['purchase_price_inclvat'] = df1['purchase_price_inclvat'].fillna(0)
df['purchase_price'] = df1['purchase_price_inclvat'].apply(lambda x: int(float(str(x).replace('€', '').replace(',', '').strip())*100/1.22) if pd.notna(x) else 0) # convert to cents and remove 22% VAT

df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible,purchase_price_inclvat
0,,RED,Chianti Classico,Agricoltori del Chianti Geografico,,BOTTLE,1971.0,0,81,1.0,Collezione,,True,1 €
1,,RED,Amarone della Valpolicella Classico docg,Allegrini,,BOTTLE,2017.0,15000,6229,4.0,s2 - legno,,True,76 €
2,,RED,Amarone della Valpolicella Classico docg,Allegrini,,BOTTLE,2019.0,12000,4344,5.0,s2 - legno,,True,53 €
3,,RED,La Grola igt Rosso Veronese,Allegrini,,BOTTLE,2018.0,0,0,,,,True,0
4,,RED,La Grola igt Rosso Veronese,Allegrini,,BOTTLE,2019.0,4500,1475,17.0,s4+bar005,,True,18 €


In [38]:
df = fill_empty(df, VColumns.v2(), False)
df.head()

Unnamed: 0,external_id,type,name,winery_name,info,size,vintage,price,purchase_price,quantity,storage_area,internal_notes,visible,purchase_price_inclvat
0,,RED,Chianti Classico,Agricoltori del Chianti Geografico,,BOTTLE,1971,0,81,1,Collezione,,True,1 €
1,,RED,Amarone della Valpolicella Classico docg,Allegrini,,BOTTLE,2017,15000,6229,4,s2 - legno,,True,76 €
2,,RED,Amarone della Valpolicella Classico docg,Allegrini,,BOTTLE,2019,12000,4344,5,s2 - legno,,True,53 €
3,,RED,La Grola igt Rosso Veronese,Allegrini,,BOTTLE,2018,0,0,0,,,True,0
4,,RED,La Grola igt Rosso Veronese,Allegrini,,BOTTLE,2019,4500,1475,17,s4+bar005,,True,18 €


# Merge files and write output

If there are multiple files, merge them into one and create single output file.

In [39]:
df.head()

df_out = pd.concat([df], ignore_index=True)
df_out.to_csv("v2-cleaned.csv", index=False)

bool()

False