## Pizza Sales Data Cleaning.

#### Importing Python libraries for data analysis.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Importing pizza sales data into Python.

In [None]:
orders = pd.read_excel("Pizza Sales Data.xlsx")

#### Selecting top 5 rows in the data.

In [None]:
orders.head()

#### Getting info on the data.

In [None]:
orders.info()

In [None]:
orders.columns

### 1. Removing duplicated rows.

In [None]:
orders.duplicated().sum()

In [None]:
orders["pizza_id"].nunique()

In [None]:
orders.duplicated(subset = ['order_id', 'pizza_name_id', 'quantity', 'order_date',
       'order_time', 'unit_price', 'total_price', 'pizza_size',
       'pizza_category', 'pizza_ingredients', 'pizza_name']).sum()

### 2. Data formatting & standardisation.

In [None]:
orders["order_id"].sort_values().unique()

In [None]:
orders["pizza_name_id"].sort_values().unique()

In [None]:
orders["quantity"].sort_values().unique()

In [None]:
orders["order_date"].sort_values().unique()

In [None]:
orders["order_time"].sort_values().unique()

In [None]:
orders["order_datetime"] = pd.to_datetime(orders["order_date"] + " " + orders["order_time"].astype(str), format="%d-%m-%Y %H:%M:%S")

In [None]:
orders["order_datetime"]

In [None]:
orders["unit_price"].sort_values().unique()

In [None]:
orders["total_price"].sort_values().unique()

In [None]:
orders["pizza_size"].sort_values().unique()

In [None]:
orders["pizza_category"].sort_values().unique()

In [None]:
orders["pizza_ingredients"].sort_values().unique()

In [None]:
orders["pizza_ingredients"] = orders["pizza_ingredients"].str.replace("?","N")

In [None]:
orders["pizza_name"].sort_values().unique()

### 3. Imputation of blank/null values

In [None]:
orders.isna().sum()

## Pizza Sales Data Analysis.

#### 1. Total Revenue

In [None]:
orders["total_price"].sum()

#### 2. Total Sales

In [None]:
orders["quantity"].sum()

#### 3. Total Orders

In [None]:
orders["order_id"].nunique()

#### 4. Average Order Value

In [None]:
(orders["total_price"].sum()/orders["order_id"].nunique()).round(2)

#### 5. Average Pizza Price

In [None]:
(orders["total_price"].sum()/orders["quantity"].sum()).round(2)

#### 6. Average Pizzas per Order

In [None]:
(orders["quantity"].sum()/orders["order_id"].nunique()).round(2)