In [None]:
#STEP 1: Upload Dataset
import pandas as pd

from google.colab import files
uploaded = files.upload()

df = pd.read_excel('/content/Data Model - Pizza Sales.xlsx')

In [None]:
#STEP 2: Data Cleaning
df.isnull().sum()       #cek missing value
df.duplicated().sum()   #cek duplikasi

In [None]:
#STEP 3: EDA
df.head()               #melihat 5 data teratas
df.info()               #info struktur data
df.describe()           #cek statistik ringkasan
df.shape                #cek ukuran data (banyak data, kolom)
df.dtypes               #cek tipe data
df.columns              #cek nama-nama kolom

In [None]:
#Analisa 1: top 5 pizza terlaris
top5_pizza = df['pizza_name'].value_counts().head(5).reset_index()
top5_pizza.columns = ['pizza_name', 'quantity']
print(top5_pizza)

#Visualisasi
import matplotlib.pyplot as plt
import seaborn as sns

pizza_color = ['#E16E03']
sns.set(style="ticks")

plt.figure(figsize=(8, 6))
sns.barplot(data=top5_pizza, x='pizza_name', y='quantity', color = pizza_color[0])

for index, row in top5_pizza.iterrows():
    plt.text(
        x=index,
        y=row['quantity'] -100,
        s=row['quantity'],
        ha='center',
        va='top',
        color='white',
        fontweight='bold')

plt.title('Top 5 Pizza Terlaris Berdasarkan Jumlah Pesanan')
plt.xlabel('Nama Pizza')
plt.ylabel('Jumlah Pesanan')
plt.tight_layout()
plt.xticks(rotation=10)
plt.tight_layout()
plt.show()


In [None]:
# Analisa 2: top 3 pizza revenue tertinggi
df['revenue'] = df['total_price']
top3_revenue = df.groupby('pizza_name')['revenue'].sum().sort_values(ascending=False).head(3).reset_index()
print(top3_revenue)

#Visualisasi
import matplotlib.pyplot as plt
import seaborn as sns

pizza_color = ['#536304']
sns.set(style="ticks")

plt.figure(figsize=(8, 6))
sns.barplot(data=top3_revenue, x='pizza_name', y='revenue', color = pizza_color[0])

for index, row in top3_revenue.iterrows():
    plt.text(
        x=index,
        y=row['revenue'] -5000,
        s=round(row['revenue'],2),
        ha='center',
        va='bottom',
        color='white',
        fontweight='bold'
    )

plt.title('Top 3 Pizza dengan Revenue Tertinggi')
plt.xlabel('Nama Pizza')
plt.ylabel('Revenue (USD)')
plt.tight_layout()
plt.xticks(rotation=10)
plt.tight_layout()
plt.show()





In [None]:
# Analisa 3: distribusi ukuran pizza
size_pizza = df['pizza_size'].value_counts().reset_index()
size_pizza.columns = ['size_pizza', 'quantity']
print(size_pizza)

#Visualisasi
import matplotlib.pyplot as plt
import seaborn as sns

pizza_color = ['#B92F17']
sns.set(style="ticks")

plt.figure(figsize=(6, 5))
sns.barplot(data=size_pizza, x='size_pizza', y='quantity',color = pizza_color[0], order=sorted(size_pizza['size_pizza']))

for index, row in size_pizza.iterrows():
    plt.text(
        x=index,
        y=row['quantity'] +700,
        s=row['quantity'],
        ha='center',
        va='top',
        color='black',
        fontweight='bold')

plt.title('Distribusi Ukuran Pizza')
plt.xlabel('Ukuran Pizza')
plt.ylabel('Jumlah Pesanan')
plt.tight_layout()
plt.show()

In [None]:
# Analisa 4: apa varian pizza yang sering dipesan dalam ukuran large?
large_pizza = df[df['pizza_size'] == 'L'].reset_index()
large_pizza_quantity = large_pizza.groupby('pizza_name')['quantity'].sum().sort_values(ascending=False).head(5)
print(large_pizza_quantity)

# Visualisasi
import matplotlib.pyplot as plt
import seaborn as sns

pizza_color = ['#EA9109']
sns.set(style="ticks")

plt.figure(figsize=(8, 5))
sns.barplot(
    x=large_pizza_quantity.index,
    y=large_pizza_quantity.values,
    color=pizza_color[0]
)

for index, value in enumerate(large_pizza_quantity.values):
    plt.text(
        x=index,
        y=value -100,
        s=int(value),
        ha='center',
        va='bottom',
        color='white',
        fontweight='bold'
    )

plt.title('Top 5 Pizza Terlaris (Ukuran Large)')
plt.xlabel('Nama Pizza')
plt.ylabel('Jumlah Terjual')
plt.xticks(rotation=10)
plt.tight_layout()
plt.show()