#### Create an extract with the Actual sales, by store, for the whole store (OLG aggregated)
##### This is for the timeframe after the new layout was put in place

In [22]:
import pandas as pd

PATH = '/home/tbrownex/data/Hackett/JLP/'
FILE = 'All_Data_for_10_Treatment_Branches.csv'
OLGFILE = "mapStore-LG.csv"
BS   = "UseCase1/Banstead/Banstead_summary.csv"     # Banstead has imputed values (not in the main file)

WAVE1  = ["Twickenham", "Thame", "Wokingham", "Harrogate"]   # Banstead processed separately
WAVE2  = ["Chandlers Ford", "Sidmouth", "Barry", "Westbury Park", "Monmouth"]
WEEKS = {"wave1":("2017(16)", "2017(35)"),
         "wave2":("2017(22)", "2017(35)")}

In [23]:
cols = ["Business Unit Long Name","Trading Week","Layout Group", "Line Sales £"]
df = pd.read_csv(PATH+FILE, usecols=cols)
print("{:,.0f} rows in the master file".format(df.shape[0]))

649,291 rows in the master file


##### Create a dictionary mapping Store:OLGs

In [24]:
lgDict = {}
with open(PATH+OLGFILE, "r") as mapping:
    for rec in mapping:
        rec = rec.rstrip()
        fields = rec.split(",")
        lgDict[fields[0]]= fields[1:]   # First entry holds the store; rest are LGs

##### Process Wave 1

In [25]:
# Filter by Week number
df1 = df.loc[(df["Trading Week"] > WEEKS["wave1"][0]) & (df["Trading Week"] < WEEKS["wave1"][1])]

# Loop through the stores in the wave
dfList = []
for store in WAVE1:
    tmp = df1.loc[df1["Business Unit Long Name"] == store]
    lg = lgDict[store]
    tmp = tmp.loc[tmp["Layout Group"].isin(lg)]
    dfList.append(tmp)
wave1 = pd.concat(dfList)

##### Process Wave 2

In [26]:
# Filter by Week number
df2 = df.loc[(df["Trading Week"] > WEEKS["wave2"][0]) & (df["Trading Week"] < WEEKS["wave2"][1])]

# Loop through the stores in the wave
dfList = []
for store in WAVE2:
    tmp = df2.loc[df2["Business Unit Long Name"] == store]
    lg = lgDict[store]
    tmp = tmp.loc[tmp["Layout Group"].isin(lg)]
    dfList.append(tmp)
wave2 = pd.concat(dfList)

In [27]:
# Combine the 2 waves
combined = pd.concat([wave1,wave2])

# Get the aggregated sales by LG
summary = combined.groupby(["Business Unit Long Name", "Trading Week"])["Line Sales £"].sum()

summary = summary.reset_index()

##### Add Banstead

In [28]:
bs = pd.read_csv(PATH+BS)

# Add the name of the store and remove the actual (to match format of "summary")
bs["Line Sales £"] = bs["Line Sales - imputed"]
del bs["Line Sales - imputed"]

bs["Business Unit Long Name"] = "Banstead"

# Reorder the columns, again to match "summary"
cols = ["Business Unit Long Name", "Trading Week", "Line Sales £"]
bs = bs[cols]

In [30]:
final = pd.concat([summary, bs])

final.to_csv(PATH+"/UseCase1/salesForecast/store_sales_by_OLG-post.csv", index=False, header=True)