In [1]:
# Packages/Libraries that we will need.

import pandas as pd
import numpy as np
import datetime

# I use this option just in case I need to validate columns.  

pd.set_option('display.max_columns',None)

In [2]:
# Since this is an Excel file, let's get our sheets

sales = pd.read_excel('Prepping_w12.xlsx',0)
sales_percent = pd.read_excel('Prepping_w12.xlsx',1)
lookup = pd.read_excel('Prepping_w12.xlsx',2)

In [3]:
# Let's get ahead of the curve and knock out our datetime stuff.

sales_percent['Week of Year'] = sales_percent['Week Commencing'].dt.week.astype(str).str.zfill(2)
sales_percent['Year'] = sales_percent['Week Commencing'].dt.year

# Let's create the Year of Week Scenario

sales_percent['Year Week Number'] = sales_percent['Year'].astype(str)  + sales_percent['Week of Year'].astype(str)

# Let's make sure that the Year Week Number is identical between the sales table and the sales percentage table.

sales['Year Week Number'] = sales['Year Week Number'].astype(str)

In [4]:
# Let's work around this Scent thing by making both columns identical through some string manipulation.
# We will remove all spaces and lowercase everything.  Low-res & high value!

lookup['Join'] = lookup['Scent'].str.replace(' ','').str.lower()
sales['Join'] = sales['Scent'].str.replace(' ','').str.lower()

# Since the Sales sheet has a really mangled column for Scent, let's blow that up since we created a joinable column.

sales = sales.drop(columns='Scent')

In [5]:
# Next criteria was knocking out sales that accounted for 0 percent.

sales_percent = sales_percent[sales_percent['Percentage of Sales'] > 0]

In [6]:
# Let's double check it again, and lo and behold, we have knocked out 13 rows.

sales_percent

Unnamed: 0,Product ID,Week Commencing,Size,Product Type,Percentage of Sales,Week of Year,Year,Year Week Number
0,0c60c126,2020-01-06,0.5l,Liquid,0.33,02,2020,202002
1,0c60c126,2020-01-06,100g,Bar,0.13,02,2020,202002
2,0c60c126,2020-01-06,250ml,Liquid,0.20,02,2020,202002
3,0c60c126,2020-01-06,50g,Bar,0.34,02,2020,202002
4,0c60c126,2020-01-13,0.5l,Liquid,0.80,03,2020,202003
...,...,...,...,...,...,...,...,...
315,bd51102bfa9620233d72512f66,2020-03-02,50g,Bar,0.11,10,2020,202010
316,bd51102bfa9620233d72512f66,2020-03-09,0.5l,Liquid,0.13,11,2020,202011
317,bd51102bfa9620233d72512f66,2020-03-09,100g,Bar,0.24,11,2020,202011
318,bd51102bfa9620233d72512f66,2020-03-09,250ml,Liquid,0.60,11,2020,202011


In [7]:
# Product ID & Size is a merged in our lookup table.  This I couldn't think of a good method, so we will re-create this
# concatenation using our percent sheet and back out of it.  

sales_percent['Product'] = sales_percent['Product ID'] + sales_percent['Size']

In [8]:
# Let's bring it together!

join_1 = pd.merge(sales_percent, lookup, on='Product')

In [9]:
# Let's bring it all together!

join_2 = pd.merge(sales, join_1, on=['Year Week Number','Join'])

In [10]:
# Let's drop those columns we created for joining and duplicate columns.

join_2 = join_2.drop(columns=['Product'])

In [11]:
# Finally, we need to figure out the actual sales values to wrap things up.  
join_2['Sales'] = join_2['Total Scent Sales'] * join_2['Percentage of Sales']

# I don't like more than 2 decimal places, so let's square that away.
join_2['Sales'] = join_2['Sales'].round(2)

In [12]:
# Let us hone in on the columns that we need to move this over the goal line accordingly!

solution = join_2[['Year Week Number', 'Scent','Size','Product Type','Sales']]

In [13]:
# Show me the solution!  Take a look at this!

solution

Unnamed: 0,Year Week Number,Scent,Size,Product Type,Sales
0,202002,Coconut,0.5l,Liquid,0.20
1,202002,Coconut,100g,Bar,0.81
2,202002,Coconut,250ml,Liquid,15.28
3,202002,Coconut,50g,Bar,4.07
4,202002,Honey,0.5l,Liquid,1141.49
...,...,...,...,...,...
302,202011,Tea Tree,50g,Bar,298.21
303,202011,Vanilla,0.5l,Liquid,252.97
304,202011,Vanilla,100g,Bar,469.80
305,202011,Vanilla,250ml,Liquid,1084.16
