In [10]:
import numpy as np
import pandas as pd

np.random.seed(7)

menu = pd.DataFrame({
    "dish": ["taco", "ramen", "falafel", "taco", "pierogi", "gelato", "falafel"],
    "cuisine": ["mex", "jpn", "me", "mex", "pl", "it", "me"],
    "label": ["veggie", "picante", "vegano", "veggie", "mięsne", "dolce", "vegetarian"],
    "base_price": [5.0, 9.5, 7.0, 5.0, 6.0, 4.5, 7.0]
})

In [11]:
menu

Unnamed: 0,dish,cuisine,label,base_price
0,taco,mex,veggie,5.0
1,ramen,jpn,picante,9.5
2,falafel,me,vegano,7.0
3,taco,mex,veggie,5.0
4,pierogi,pl,mięsne,6.0
5,gelato,it,dolce,4.5
6,falafel,me,vegetarian,7.0


In [12]:
popularity = pd.Series([4.2, np.nan, 3.8, 4.2, 4.0, np.nan, 3.8], name="popularity")
popularity

0    4.2
1    NaN
2    3.8
3    4.2
4    4.0
5    NaN
6    3.8
Name: popularity, dtype: float64

In [13]:
duplicates_count = menu.duplicated().sum()
duplicates_count

np.int64(1)

In [14]:
duplicates = menu[menu.duplicated()]
duplicates

Unnamed: 0,dish,cuisine,label,base_price
3,taco,mex,veggie,5.0


In [15]:
menu.drop_duplicates(inplace=True)
menu

Unnamed: 0,dish,cuisine,label,base_price
0,taco,mex,veggie,5.0
1,ramen,jpn,picante,9.5
2,falafel,me,vegano,7.0
4,pierogi,pl,mięsne,6.0
5,gelato,it,dolce,4.5
6,falafel,me,vegetarian,7.0


In [16]:
mapping = {
    "veggie": "vegetarian",
    "vegano": "vegetarian",
    "vegetarian": "vegetarian",
    "picante": "spicy",
    "piccante": "spicy",
    "mięsne": "meat",
    "dolce": "dessert"
}
menu["label"] = menu["label"].replace(mapping)
menu

Unnamed: 0,dish,cuisine,label,base_price
0,taco,mex,vegetarian,5.0
1,ramen,jpn,spicy,9.5
2,falafel,me,vegetarian,7.0
4,pierogi,pl,meat,6.0
5,gelato,it,dessert,4.5
6,falafel,me,vegetarian,7.0


In [17]:
popularity = popularity.replace(np.nan, 0)
menu["popularity"] = popularity
menu

Unnamed: 0,dish,cuisine,label,base_price,popularity
0,taco,mex,vegetarian,5.0,4.2
1,ramen,jpn,spicy,9.5,0.0
2,falafel,me,vegetarian,7.0,3.8
4,pierogi,pl,meat,6.0,4.0
5,gelato,it,dessert,4.5,0.0
6,falafel,me,vegetarian,7.0,3.8


In [18]:
service_fee_dict = {"mex": 1.5, "jpn": 2.0, "me": 1.0, "pl": 0.8, "it": 1.2}
menu["service_fee"] = menu["cuisine"].map(service_fee_dict)
menu["final_price"] = menu["base_price"] + menu["service_fee"]
menu

Unnamed: 0,dish,cuisine,label,base_price,popularity,service_fee,final_price
0,taco,mex,vegetarian,5.0,4.2,1.5,6.5
1,ramen,jpn,spicy,9.5,0.0,2.0,11.5
2,falafel,me,vegetarian,7.0,3.8,1.0,8.0
4,pierogi,pl,meat,6.0,4.0,0.8,6.8
5,gelato,it,dessert,4.5,0.0,1.2,5.7
6,falafel,me,vegetarian,7.0,3.8,1.0,8.0


In [21]:
menu = menu.rename(columns={"dish": "item"})
index_dict = {i: f"r{i:03d}" for i in range(0, len(menu) + 1)}
menu = menu.rename(columns={"base_price": "price_base"}, index=index_dict)

In [22]:
menu
#

Unnamed: 0,item,cuisine,label,price_base,popularity,service_fee,final_price
r000,taco,mex,vegetarian,5.0,4.2,1.5,6.5
r001,ramen,jpn,spicy,9.5,0.0,2.0,11.5
r002,falafel,me,vegetarian,7.0,3.8,1.0,8.0
r004,pierogi,pl,meat,6.0,4.0,0.8,6.8
r005,gelato,it,dessert,4.5,0.0,1.2,5.7
r006,falafel,me,vegetarian,7.0,3.8,1.0,8.0
