In [20]:
# Import appropriate libraries

import pyodbc
import os.path
import glob
import pandas as pd
import numpy as np
from datetime import date
from datetime import datetime as dt
from dateutil.relativedelta import relativedelta
import getpass

import warnings
warnings.simplefilter("ignore")

pd.set_option('display.max_columns', None)

In [21]:
# Create some date variables

start = dt.now()
today_date = dt.today().date()
tm_day_one = dt.today().date().replace(day=1)
tmly_day_one = tm_day_one - relativedelta(months=11)

replenish = tm_day_one - relativedelta(months=2)

In [22]:
# SQL python connection to our server
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=sql-2-db;'
                      'Database=CBQ2;')
                    
cursor = conn.cursor()

In [23]:
query_item = f'''
SELECT
    i.PUBLISHER_CODE Publisher
    ,i.PRODUCT_TYPE pt
    ,i.REPORTING_CATEGORY cat
    ,case
        when i.PUBLISHING_GROUP in('BAR-ENT','BAR-ART','BAR-FWN','BAR-LIF','BAR-CHL') then 'BAR'
        else i.PUBLISHING_GROUP
    end pgrp
    ,i.ITEM_TITLE ISBN
    ,i.SHORT_TITLE title
    ,i.PRICE_AMOUNT price
    ,convert(char,coalesce(convert(varchar,i.AMORTIZATION_DATE,101),shdt.shipdate),101) pub
    ,case
        when i.AMORTIZATION_DATE is not null then year(i.AMORTIZATION_DATE)
        when substring(i.season,1,4) <> 'No S' then substring(i.season,1,4)
        else year(getdate())
    end [year]
    ,[AVAILABILITY_STATUS] avail
    ,[FORMAT_DESCRIPTION] Product_Type
FROM ebs.Item i
    left join (SELECT [ISBN],[SHIPDATE] FROM [CBQ2].[pm].[ItemInfo] ii WHERE ii.IMPRESSION = 1 and ii.SHIPDATE is not null) shdt on shdt.ISBN = i.ISBN 
WHERE
    i.PUBLISHER_CODE in('Chronicle','Hardie Grant Publishing','Levine Querido', 'Sierra Club', 'Princeton', 'Laurence King', 'Tourbillon', 'Galison', 'Creative Company', 'Do Books', 'AMMO Books')
'''

In [24]:
# df_item = pd.read_sql_query(query_item,conn)

In [25]:
# df_item.columns

In [26]:
# Code to get last sales diagnostic file from folder.
folder_path = r'G:\SALES\Amazon\rbh\weekly_customer_order\sales_diagnostic_archive'
file_glob_detail = r'\*Detail*csv'
files = glob.glob(folder_path + file_glob_detail)
file_detail = max(files, key=os.path.getctime)

file_glob_summary = r'\*Summary*csv'
files = glob.glob(folder_path + file_glob_summary)
file_summary = max(files, key=os.path.getctime)

print(f"The latest file is:\n{file_detail}")
print()
print(f"The latest summary file is:\n{file_summary}")

The latest file is:
G:\SALES\Amazon\rbh\weekly_customer_order\sales_diagnostic_archive\US_Detail View_Sales Diagnostic.csv

The latest summary file is:
G:\SALES\Amazon\rbh\weekly_customer_order\sales_diagnostic_archive\US_Summary View_Sales Diagnostic.csv


In [27]:
cols = ['ASIN','Conversion Rate','EAN','Glance Views','ISBN-13','Ordered Units','Ordered Units - Prior Period'
        ,'Brand','Binding','Product Title','Release Date','Ordered Revenue']

df_detail = pd.read_csv(file_detail
                        ,skiprows = 1
                        ,na_values = '—'
                        ,usecols = cols
                        ,parse_dates=['Release Date']
                        ,infer_datetime_format=False)

df_summary = pd.read_csv(file_summary
                        ,skiprows = 1
                        ,na_values = '—'
                        ,usecols=['KPIs', 'Reported', 'Prior Period', 'Last Year'])

# # Probably should be updated every few months
# catalog = 'G:\SALES\Amazon\RBH\weekly_customer_order\sales_diagnostic_archive\Product Catalog_US.csv'

# df_catalog = pd.read_csv(catalog
#                         ,skiprows = 1
#                         ,na_values = '—'
#                         ,usecols=['ASIN', 'ISBN-13', 'EAN'])

In [28]:
# Creating the a new column called "ISBN" that grabs the EAN or ISBN-13, whichever one isn't an 
# condlist = [(df_detail['EAN']=="UNKNOWN"),(df_detail['ISBN-13']=="UNKNOWN"),(df_detail['ISBN-13']!="UNKNOWN")\
#             &(df_detail['ISBN-13']!="UNKNOWN")]
# choicelist = [df_detail['ISBN-13'],df_detail['EAN'],df_detail['EAN']]

# df_detail['ISBN'] = np.select(condlist,choicelist)

In [29]:
print(df_detail.shape)
df_detail['Ordered Units'] = df_detail['Ordered Units'].str.replace(",", "").astype(float)
df_detail['Ordered Revenue'] = df_detail['Ordered Revenue'].str.replace(",", "").str.replace("$", "").astype(float)
# df_detail = df_detail[(df_detail['EAN']!= 'UNKNOWN') & (df_detail['ISBN-13']!= 'UNKNOWN')]
print(df_detail.shape)

(8002, 12)
(8002, 12)


In [30]:
df_detail.loc[df_detail['ASIN'] == '1399600915']

Unnamed: 0,ASIN,EAN,ISBN-13,Product Title,Brand,Binding,Release Date,Ordered Revenue,Ordered Units,Ordered Units - Prior Period,Glance Views,Conversion Rate
68,1399600915,9781399600910,,The World of Agatha Christie: 1000 Piece Jigsa...,Laurence King Publishing,game_puzzle,2022-08-16,4655.47,242.0,27.37%,2033,11.90%


In [31]:
df_summary.head()

Unnamed: 0,KPIs,Reported,Prior Period,Last Year
0,Ordered Revenue,"$1,979,109.04",14.90%,-24.27%
1,Ordered Units,138316,11.86%,-23.81%
2,Average Sales Price,$14.31,2.72%,-0.61%
3,Glance Views,1300316,1.22%,-15.18%
4,Conversion Rate,10.64%,10.52%,-10.17%


# Look at Tables

In [32]:
# # Creating a df with only the ISBN and the availability fields
# df_avail = df_item.loc[:,['ISBN','avail','Product_Type','Publisher']]

# Top 20 Pre-Ordered NYP Titles – All Publishers

In [33]:
# df_top20_nyp = df_detail.merge(df_avail,how = 'left',left_on = 'ISBN',right_on = 'ISBN')

today_date_str = np.datetime64(today_date)

df_top20_nyp = df_detail[df_detail['Release Date'] >= today_date_str]

df_top20_nyp = df_top20_nyp.loc[:,['ASIN','EAN','ISBN-13','Product Title','Brand','Binding','Release Date','Ordered Units']]
df_top20_nyp = df_top20_nyp.sort_values(by='Ordered Units',ascending=False)
df_top20_nyp = df_top20_nyp.head(20)

# Top 20 Ordered Frontlist Titles - All Publisher

In [34]:
# df_top20_fl = df_detail.merge(df_item,how = 'left',left_on = 'ISBN',right_on = 'ISBN')

today_date_str = np.datetime64(today_date)

df_top20_fl = df_detail[df_detail['Release Date'].dt.year >= today_date.year]

df_top20_fl = df_top20_fl.loc[:,['ASIN','EAN','ISBN-13','Product Title','Brand','Binding','Release Date'\
                                 ,'Ordered Units','Ordered Units - Prior Period','Glance Views'
                                ,'Conversion Rate']]
df_top20_fl = df_top20_fl.sort_values(by='Ordered Units',ascending=False)
df_top20_fl = df_top20_fl.head(20)
df_top20_fl.head(5)

Unnamed: 0,ASIN,EAN,ISBN-13,Product Title,Brand,Binding,Release Date,Ordered Units,Ordered Units - Prior Period,Glance Views,Conversion Rate
1,1797216260,9781797216263,9781797216263,How We Heal: Uncover Your Power and Set Yourse...,Chronicle Books,hardcover,2022-11-08,2259.0,-3.46%,14052,16.08%
2,157805236X,9781578052363,9781578052363,Sierra Club Engagement Calendar 2023,SIERRA CLUB,calendar,2022-08-02,1498.0,10.64%,3549,42.21%
6,1578052351,9781578052356,9781578052356,Sierra Club Wilderness Calendar 2023,SIERRA CLUB,calendar,2022-08-02,1197.0,11.45%,3568,33.55%
16,1797213873,9781797213873,9781797213873,"Construction Site: Farming Strong, All Year Lo...",Chronicle Books,hardcover,2022-11-15,722.0,57.30%,3183,22.68%
5,1797208012,9781797208015,9781797208015,Men in Blazers Present Gods of Soccer: The Pan...,Chronicle Prism,hardcover,2022-10-11,590.0,58.18%,2798,21.09%


# Top 30 Ordered - Chronicle

In [35]:
# df_top30_CB = df_detail.merge(df_item,how = 'left',left_on = 'ISBN',right_on = 'ISBN')

df_top30_CB = df_detail[df_detail['Brand'] == 'Chronicle Books']

df_top30_CB = df_top30_CB.loc[:,['ASIN','EAN','ISBN-13','Product Title','Brand','Binding','Release Date'\
                                 ,'Ordered Units','Ordered Units - Prior Period','Glance Views'
                                ,'Conversion Rate']]
df_top30_CB = df_top30_CB.sort_values(by='Ordered Units',ascending=False)
df_top30_CB = df_top30_CB.head(30)
df_top30_CB.head(5)

Unnamed: 0,ASIN,EAN,ISBN-13,Product Title,Brand,Binding,Release Date,Ordered Units,Ordered Units - Prior Period,Glance Views,Conversion Rate
0,1452179611,9781452179612,9781452179612,From Crook to Cook: Platinum Recipes from Tha ...,Chronicle Books,hardcover,2018-10-23,5982.0,32.79%,47712,12.54%
13,1452111731,9781452111735,9781452111735,"Goodnight, Goodnight Construction Site (Board ...",Chronicle Books,board_book,2017-02-14,2758.0,-22.16%,6615,41.69%
3,1452139113,9781452139111,9781452139111,Construction Site on Christmas Night: (Christm...,Chronicle Books,hardcover,2018-10-16,2333.0,-2.30%,6517,35.80%
1,1797216260,9781797216263,9781797216263,How We Heal: Uncover Your Power and Set Yourse...,Chronicle Books,hardcover,2022-11-08,2259.0,-3.46%,14052,16.08%
4,811864952,9780811864954,9780811864954,Ivy and Bean's Treasure Box: (Beginning Chapte...,Chronicle Books,paperback,2008-10-01,1642.0,-19.71%,10117,16.23%


In [36]:
# df_top_DP_main[df_top_DP_main['Brand'] == 'Laurence King Publishing'] #.sort_values(by='Ordered Units',ascending=False)

# df_top_DP_main[df_top_DP_main['ASIN'] == '1399600915'] #.sort_values(by='Ordered Units',ascending=False)

# Top 5 Ordered Units by Destribution

In [37]:
# Combining today's file with the ebs.item table
# df_top_DP_main = df_detail.merge(df_item,how = 'left',left_on = 'ISBN',right_on = 'ISBN')

# There are some ISBN's that seem to be cb/dp titles but not in ebs.item table
df_top_DP_main = df_detail[(df_detail.Brand.notnull())]

# Removing CB from the DP list
df_top_DP_main = df_top_DP_main[df_top_DP_main['Brand']!='Chronicle Books']

# Request that this be all backlast
df_top_DP_main = df_top_DP_main[df_top_DP_main['Release Date'].dt.year < today_date.year]

# List of the DP's in today's file
dp_list = list(df_top_DP_main.Brand.unique())
print(dp_list)
print()
print(f"There are {len((dp_list))} DP accounts.")

#Creating a list to save off the DP dataframes
dp_list_df = list()

for dp in dp_list:
    df_top5_DP = df_top_DP_main[df_top_DP_main['Brand'] == dp]

    df_top5_DP = df_top5_DP.loc[:,['ASIN','EAN','ISBN-13','Product Title','Brand','Binding','Release Date'\
                                     ,'Ordered Units','Ordered Units - Prior Period','Glance Views'
                                    ,'Conversion Rate']]
    df_top5_DP = df_top5_DP.sort_values(by='Ordered Units',ascending=False)
    df_top5_DP = df_top5_DP.head()
    dp_list_df.append(df_top5_DP)

['Hardie Grant', 'Galison', 'Tourbillon', 'Princeton Architectural Press', 'LEVINE QUERIDO', 'Mudpuppy', "Ridley's", 'Laurence King Publishing', 'Twirl', 'The Do Book Company', 'Danerhouse', 'Petit Collage', 'Chronicle Chroma', 'Chronicle Prism', 'UNKNOWN', 'LEGO', 'Games Room', 'Brass Monkey', 'Christian Lacroix', 'Disabled - Do not use', 'The Creative Company', 'Do Books', 'AMMO', 'Moleskine Publishing', 'BIS Publishers', 'Hardie Grant Books', 'Creative Paperbacks Inc', 'Creative Editions', 'SIERRA CLUB', 'Seastar Pub Co', "Ridley's Games", 'Moleskine', 'Laurence King', 'Hyphen', 'PQ Blackwell']

There are 35 DP accounts.


# Saving off the File

In [38]:
path = r'G:\SALES\Amazon\RBH\weekly_customer_order\atelier\amazon_weekly_customer_order_py.xlsx'
writer = pd.ExcelWriter(path, engine='xlsxwriter')

df_summary.to_excel(writer,sheet_name='summary',index = False)
df_top20_nyp.to_excel(writer,sheet_name='top20_nyp',index = False)
df_top20_fl.to_excel(writer,sheet_name='top20_fl',index = False)
df_top30_CB.to_excel(writer,sheet_name='top30_cb',index = False)

for dp_name,dp_df in zip(dp_list,dp_list_df): 
    dp_df.to_excel(writer,sheet_name=dp_name,index=False)

writer.save()