# Origami Sales Dataset

In [151]:
# import libraries
import pandas as pd
import numpy as np
import datetime


In [152]:
# Define the dataset parameters
dates = pd.date_range(start="2015-01-01", end="2019-12-31")
countries = ['Canada', 'United Kingdom', 'United States', 'France', 'Germany', 'Italy', 'Japan', 'China', 'India']
cost_data = [['Origami Crane', 7.0, 4.0], 
             ['Origami Flower', 9.0, 5.5], 
             ['Origami Boat', 6.0, 4.0],
             ['Origami Dragon', 10.25, 9.75],
             ['Origami Penguin', 8.5, 3.5]]
products = [item[0] for item in cost_data]
df_columns = ['Date', 'Country', 'Product']
cost_columns = ['Product', 'Revenue per unit', 'Cost per unit']
np.random.seed(42)

# Construct the dataframe
permutations = [[i, j, k] for i in dates for j in countries for k in products]
df = pd.DataFrame(data=permutations, columns=df_columns)
units_sold = np.random.lognormal(1, 2, len(df))
df['Units sold'] = [round(x) for x in units_sold]
cost_df = pd.DataFrame(data=cost_data, columns=cost_columns)
df = pd.merge(df, cost_df)
df['Profit'] = (df['Revenue per unit'] - df['Cost per unit']) * df['Units sold']
df['Day'] = df.Date.dt.day
df['Month'] = df.Date.dt.month
df['Month name'] = df.Date.dt.month_name()
df['Year'] = df.Date.dt.year

df

Unnamed: 0,Date,Country,Product,Units sold,Revenue per unit,Cost per unit,Profit,Day,Month,Month name,Year
0,2015-01-01,Canada,Origami Crane,7,7.0,4.0,21.0,1,1,January,2015
1,2015-01-01,United Kingdom,Origami Crane,2,7.0,4.0,6.0,1,1,January,2015
2,2015-01-01,United States,Origami Crane,1,7.0,4.0,3.0,1,1,January,2015
3,2015-01-01,France,Origami Crane,1,7.0,4.0,3.0,1,1,January,2015
4,2015-01-01,Germany,Origami Crane,51,7.0,4.0,153.0,1,1,January,2015
...,...,...,...,...,...,...,...,...,...,...,...
82165,2019-12-31,Germany,Origami Penguin,1,8.5,3.5,5.0,31,12,December,2019
82166,2019-12-31,Italy,Origami Penguin,9,8.5,3.5,45.0,31,12,December,2019
82167,2019-12-31,Japan,Origami Penguin,7,8.5,3.5,35.0,31,12,December,2019
82168,2019-12-31,China,Origami Penguin,98,8.5,3.5,490.0,31,12,December,2019


In [153]:
# add errors to the dataset


In [154]:
# output the dataset as a .csv
df.to_csv('origami_sales.csv', index=False)