In [18]:
import pandas
import numpy
import datetime

sales = pandas.read_csv("Data Model - Pizza Sales.csv")

In [19]:
sales.columns

Index(['order_details_id', 'order_id', 'pizza_id', 'quantity', 'order_date',
       'order_time', 'unit_price', 'total_price', 'pizza_size',
       'pizza_category', 'pizza_ingredients', 'pizza_name'],
      dtype='object')

In [20]:
for column_id in sales.columns:
    print(sales[column_id].unique())

[    1     2     3 ... 48618 48619 48620]
[    1     2     3 ... 21348 21349 21350]
['hawaiian_m' 'classic_dlx_m' 'five_cheese_l' 'ital_supr_l' 'mexicana_m'
 'thai_ckn_l' 'ital_supr_m' 'prsc_argla_l' 'bbq_ckn_s' 'the_greek_s'
 'spinach_supr_s' 'classic_dlx_s' 'green_garden_s' 'ital_cpcllo_l'
 'ital_supr_s' 'mexicana_s' 'spicy_ital_l' 'spin_pesto_l' 'veggie_veg_s'
 'mexicana_l' 'southw_ckn_l' 'bbq_ckn_l' 'cali_ckn_l' 'cali_ckn_m'
 'pepperoni_l' 'cali_ckn_s' 'ckn_pesto_l' 'big_meat_s' 'soppressata_l'
 'four_cheese_l' 'napolitana_s' 'calabrese_m' 'four_cheese_m'
 'ital_veggie_s' 'mediterraneo_m' 'peppr_salami_s' 'spinach_fet_l'
 'napolitana_l' 'sicilian_l' 'ital_cpcllo_m' 'southw_ckn_s' 'bbq_ckn_m'
 'pepperoni_m' 'prsc_argla_s' 'sicilian_m' 'veggie_veg_l' 'ckn_alfredo_s'
 'pepperoni_s' 'green_garden_l' 'green_garden_m' 'pep_msh_pep_l'
 'hawaiian_s' 'peppr_salami_m' 'ckn_alfredo_m' 'peppr_salami_l'
 'spin_pesto_s' 'thai_ckn_m' 'classic_dlx_l' 'ckn_pesto_m' 'the_greek_xl'
 'hawaiian_l' 'pep

In [21]:
#convert order dates to a nicer format
def changeDateFormat(date: str) -> str:
    datetime_obj = datetime.datetime.strptime(date, "%m/%d/%Y")
    return datetime_obj.strftime("%Y-%m-%d")

sales["order_date"] = sales['order_date'].apply(changeDateFormat)

#convert order dates to an indexed list for graphing/counting purposes
def dateToIndex(date: str) -> int:
    datetime_obj = datetime.datetime.strptime(date, "%Y-%m-%d")
    return datetime_obj.timetuple().tm_yday

def dateToWeekday(date: str) -> str:
    datetime_obj = datetime.datetime.strptime(date, "%Y-%m-%d")
    return ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"][datetime_obj.weekday()]

sales["order_date_index"] = sales["order_date"].apply(dateToIndex)
sales["order_date_weekday"] = sales["order_date"].apply(dateToWeekday)

In [22]:
orders = pandas.DataFrame(columns = ["item_count", "unique_item_count", "total_price", "order_date_index", "order_date", "order_date_weekday", "order_time"])

order_row = None
order_id = None
for row_index in sales.index:
    item = sales.loc[row_index]
    if order_row is None or order_id != item["order_id"]:
        if not order_row is None:
            orders.loc[order_id] = order_row

        order_id = item["order_id"]
        order_row = {
            "item_count": 0,
            "unique_item_count": 0,
            "total_price": 0,
            "order_date_index": item["order_date_index"],
            "order_date": item["order_date"],
            "order_date_weekday": item["order_date_weekday"],
            "order_time": item["order_time"]
        }
    order_row["item_count"] += item["quantity"]
    order_row["unique_item_count"] += 1
    order_row["total_price"] += item["total_price"]
orders.loc[order_id] = order_row

orders

Unnamed: 0,item_count,unique_item_count,total_price,order_date_index,order_date,order_date_weekday,order_time
1,1,1,13.25,1,2015-01-01,Thursday,11:38:36
2,5,5,92.00,1,2015-01-01,Thursday,11:57:40
3,2,2,37.25,1,2015-01-01,Thursday,12:12:28
4,1,1,16.50,1,2015-01-01,Thursday,12:16:31
5,1,1,16.50,1,2015-01-01,Thursday,12:21:30
...,...,...,...,...,...,...,...
21346,4,4,62.25,365,2015-12-31,Thursday,20:51:07
21347,4,4,66.50,365,2015-12-31,Thursday,21:14:37
21348,3,3,46.70,365,2015-12-31,Thursday,21:23:10
21349,1,1,20.25,365,2015-12-31,Thursday,22:09:54


In [23]:
#create a separate dataframe to help count daily statistics
daily_sales = pandas.DataFrame(columns = ["item_count", "order_count", "revenue", "date", "weekday"])

day_row = None
order_date_index = None
for row_index in orders.index:
    order = orders.loc[row_index]
    if day_row is None or order_date_index != order["order_date_index"]:
        if not day_row is None:
            daily_sales.loc[order_date_index] = day_row

        order_date_index = order["order_date_index"]
        day_row = {
            "item_count": 0,
            "order_count": 0,
            "revenue": 0,
            "date": order["order_date"],
            "weekday": order["order_date_weekday"]
        }
    day_row["item_count"] += order["item_count"]
    day_row["order_count"] += 1
    day_row["revenue"] += order["total_price"]
daily_sales.loc[order_date_index] = day_row

#remove all the days with no sales
daily_sales = daily_sales.drop(daily_sales.index[daily_sales["item_count"] == 0])

daily_sales

Unnamed: 0,item_count,order_count,revenue,date,weekday
1,162,69,2713.85,2015-01-01,Thursday
2,165,67,2731.90,2015-01-02,Friday
3,158,66,2662.40,2015-01-03,Saturday
4,106,52,1755.45,2015-01-04,Sunday
5,125,54,2065.95,2015-01-05,Monday
...,...,...,...,...,...
361,89,35,1419.00,2015-12-27,Sunday
362,102,39,1637.20,2015-12-28,Monday
363,80,27,1353.25,2015-12-29,Tuesday
364,82,32,1337.80,2015-12-30,Wednesday


In [24]:
sales.to_csv("sales.csv")
daily_sales.to_csv("daily_sales.csv")
orders.to_csv("orders.csv")