In [1]:
import numpy as np
import pandas as pd

In [2]:
pizza = pd.read_csv("pizza_v1.csv")

In [3]:
pizza.head(10)

Unnamed: 0,company,price_rupiah,diameter,topping,variant,size,extra_sauce,extra_cheese
0,A,"Rp235,000",22.0,chicken,double_signature,jumbo,yes,yes
1,A,"Rp198,000",20.0,papperoni,double_signature,jumbo,yes,yes
2,A,"Rp120,000",16.0,mushrooms,double_signature,reguler,yes,yes
3,A,"Rp155,000",14.0,smoked beef,double_signature,reguler,yes,no
4,A,"Rp248,000",18.0,mozzarella,double_signature,jumbo,yes,no
5,A,"Rp140,000",18.5,black papper,american_favorite,jumbo,no,no
6,A,"Rp110,000",16.0,smoked beef,american_favorite,jumbo,no,yes
7,A,"Rp70,000",8.0,papperoni,american_favorite,reguler,no,no
8,A,"Rp90,000",12.0,mushrooms,american_favorite,reguler,yes,no
9,A,"Rp90,000",12.0,smoked beef,american_favorite,reguler,no,no


In [4]:
pizza.shape

(129, 8)

In [5]:
pizza.price_rupiah[0]

'Rp235,000'

## change rupiah to us dollar

In [6]:
#First we need to get rid of the comma for floatizing.
comma_to_dot = lambda x: x.replace(",",".")
pizza.price_rupiah = pizza.price_rupiah.apply(comma_to_dot)

In [7]:
#After that we use indexing for deleting rupiah symbol
floatize = lambda x: float(x[2:])
pizza.price_rupiah = pizza.price_rupiah.apply(floatize)

In [8]:
pizza.price_rupiah

0      235.0
1      198.0
2      120.0
3      155.0
4      248.0
       ...  
124     39.0
125     72.0
126     99.0
127     44.0
128     78.0
Name: price_rupiah, Length: 129, dtype: float64

In [9]:
#Converting rupiah to us dollar. The US dollar equivalent of the rupiah on today's date is 14,383.(08-14-2021) 
rupiah_to_usd = lambda x: x/14.383
pizza.price_rupiah = pizza.price_rupiah.apply(rupiah_to_usd)

In [10]:
pizza.rename(columns = {'price_rupiah': 'price_usd'}, inplace=True)

In [20]:
#pizza.price_usd = pizza.price_usd.map("{:,.2f}".format)
#pizza.options.display.float_format = '{:,.2f}'.format
pizza.price_usd = pizza.price_usd.round(2)

In [21]:
pizza.head()

Unnamed: 0,company,price_usd,diameter,topping,variant,size,extra_sauce,extra_cheese
0,A,16.34,22.0,chicken,double_signature,jumbo,yes,yes
1,A,13.77,20.0,papperoni,double_signature,jumbo,yes,yes
2,A,8.34,16.0,mushrooms,double_signature,regular,yes,yes
3,A,10.78,14.0,smoked beef,double_signature,regular,yes,no
4,A,17.24,18.0,mozzarella,double_signature,jumbo,yes,no


## correcting typo in size column(reguler to regular)

In [22]:
pizza["size"] = pizza["size"].replace(['reguler'],'regular')

In [23]:
pizza.head()

Unnamed: 0,company,price_usd,diameter,topping,variant,size,extra_sauce,extra_cheese
0,A,16.34,22.0,chicken,double_signature,jumbo,yes,yes
1,A,13.77,20.0,papperoni,double_signature,jumbo,yes,yes
2,A,8.34,16.0,mushrooms,double_signature,regular,yes,yes
3,A,10.78,14.0,smoked beef,double_signature,regular,yes,no
4,A,17.24,18.0,mozzarella,double_signature,jumbo,yes,no


## How many different toppings, variants and sizes?

In [24]:
topping_count = pizza.topping.value_counts().count()
variant_count = pizza.variant.value_counts().count()
size_count = pizza["size"].value_counts().count()
print(f"There are {topping_count} different toppings, {variant_count} different variants and {size_count} different sizes.")

There are 12 different toppings, 20 different variants and 6 different sizes.


## The average diameter of the pizzas:

In [25]:
pizza.diameter.mean()

12.976744186046512

## The company with the highest prices

In [29]:
pizza.groupby("company").sum()

Unnamed: 0_level_0,price_usd,diameter
company,Unnamed: 1_level_1,Unnamed: 2_level_1
A,228.49,377.0
B,123.0,302.0
C,139.3,345.0
D,136.81,276.0
E,154.08,374.0
