### McCain Sell-In
### 1. Libraries and constants
Run cell below

In [1]:
import teradatasql
import pandas as pd
import numpy as np
import datetime
from datetime import datetime as dt
from datetime import timedelta

#path where dictionary file can be found
#Neil
DICTIONARY = r'C:\Users\NEWATTER\OneDrive - McCain Foods Limited\Distributor Sell-Out Dictionaries\\'
#Joe
#DICTIONARY = r'C:\Users\jcronk\McCain Foods Limited\GNA Data Strategy & Analytics - COVID Recovery\Distributor Sell-Out Dictionaries\\'

#main path
#Neil
PATH = r'C:\Users\NEWATTER\OneDrive - McCain Foods Limited\Historical Sell-Out Sales\\'
#Joe
#PATH = r'C:\Users\jcronk\McCain Foods Limited\GNA Data Strategy & Analytics - COVID Recovery\Historical Sell-Out Sales\\'

#current fiscal year and week (YYYYWW)
TIME = pd.read_excel(DICTIONARY + 'Time Definitions.xlsx')

#the current week is pulled from the time dictionary table
WEEK = int(TIME[(TIME['Week Starting (Mon)'] <= dt.now()) & (TIME['Week Ending (Sun)'] >= dt.now())]['Calendar Week Year'].values)

#WEEK = 202218

### 2. Calculation Functions
Run cell below

In [3]:
def add_rolling(df, _list):
    #groupby _list
    df = df.groupby(_list, dropna = False)[['LBS','LBS_LY','LBS_Baseline']].sum().reset_index()
    
    #set index to all but last column in list
    df = df.set_index(_list)
    
    #add new metric SMA_4 (simple moving average - 4 periods)
    #level = all but last 2 items in list
    df['LBS_Lag_1'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 1)
    df['LBS_Lag_2'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 2)
    df['LBS_Lag_3'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 3)
    df['LBS_Lag_4'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 4)
    
    df['SMA_4'] = df.groupby(level=_list[0:-1])['LBS'].apply(lambda x: x.rolling(4, min_periods=1).mean())
    df['SMA_8'] = df.groupby(level=_list[0:-1])['LBS'].apply(lambda x: x.rolling(8, min_periods=1).mean())
    df['SMA_12'] = df.groupby(level=_list[0:-1])['LBS'].apply(lambda x: x.rolling(12, min_periods=1).mean())
    
    df['SMA_4_LY'] = df.groupby(level=_list[0:-1])['LBS_LY'].apply(lambda x: x.rolling(4, min_periods=1).mean())
    df['SMA_8_LY'] = df.groupby(level=_list[0:-1])['LBS_LY'].apply(lambda x: x.rolling(8, min_periods=1).mean())
    df['SMA_12_LY'] = df.groupby(level=_list[0:-1])['LBS_LY'].apply(lambda x: x.rolling(12, min_periods=1).mean())
    
    df['SMA_4_Baseline'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].apply(lambda x: x.rolling(4, min_periods=1).mean())
    df['SMA_8_Baseline'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].apply(lambda x: x.rolling(8, min_periods=1).mean())
    df['SMA_12_Baseline'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].apply(lambda x: x.rolling(12, min_periods=1).mean())
    
    df['LBS_Baseline_Lag_1'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].shift(periods = 1)
    df['LBS_LY_Lag_1'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 1)
    
    df['SMA_4_Lag_1'] = df.groupby(level=_list[0:-1])['SMA_4'].shift(periods = 1)
    df['SMA_4_LY_Lag_1'] = df.groupby(level=_list[0:-1])['SMA_4_LY'].shift(periods = 1)
    df['SMA_4_Baseline_Lag_1'] = df.groupby(level=_list[0:-1])['SMA_4_Baseline'].shift(periods = 1)
    
    return df.reset_index()


def add_last_year(df, _list):
    #list of groupby columns
    #last item in list is Calendar Week Year which is used to pull previous history (Baseline Week = Calendar Week Year) of copied dataframe
    _groupby = _list.copy()
    
    _merge_yoy = _list.copy()[0:-1]
    _merge_yoy.extend(['YOY Week'])
    
    _merge_baseline = _list.copy()[0:-1]
    _merge_baseline.extend(['Baseline Week'])
    
    df1 = df.groupby(_list, dropna = False)['LBS'].sum().reset_index()
    
    #groupby _list
    df_new = df.groupby(_list, dropna = False)['LBS'].sum().reset_index()
    
    #add week dimensions to main dataframe
    df_new = df_new.merge(TIME[['Calendar Week Year','YOY Week','Baseline Week']], how = 'left', left_on = 'Calendar Week Year', right_on = 'Calendar Week Year')
    
    df_new = df_new.merge(df1, how='left', left_on=_merge_yoy, right_on=_groupby).drop(columns={'Calendar Week Year_y'}).rename(columns={'LBS_y':'LBS_LY'})
    
    df_new = df_new.merge(df1, how='left', left_on=_merge_baseline, right_on=_groupby).drop(columns={'Calendar Week Year'}).rename(columns={
        'LBS':'LBS_Baseline','Calendar Week Year_x':'Calendar Week Year','LBS_x':'LBS'})
    
    return df_new


def add_precovid(df, _list, begin, end):
    #datefield should be last in _list
    datefield = _list[-1]
          
    #remove datefield from list
    _list = _list[0:-1]
    
    #filter data not using last and rename columns
    _df = df[(df[datefield] >= begin) & (df[datefield] <= end)].groupby(_list)['LBS'].sum() / 52
    
    return df.merge(
        _df, how = 'left', left_on = _list, right_on = _list).rename(
        columns = {'LBS_x':'LBS', 'LBS_y':'LBS_PRECOVID'}).fillna(
        value = {'LBS_PRECOVID': 0})


def add_time(df):
    df = df.merge(TIME[['Calendar Week Year','Week Starting (Mon)','Week Ending (Sun)', 'COVID Week']],
                   how = 'left', 
                   on = 'Calendar Week Year')
    
    df = df.merge(TIME[['Calendar Week Year','YOY Week','Baseline Week']], how = 'left', left_on = 'Calendar Week Year', right_on = 'Calendar Week Year')
    
    return df

def add_weight(df, _list):
    test = df.groupby(_list)[['LBS','LBS_Baseline']].sum().reset_index()
    test['Wt'] = test['LBS'] / test.groupby(['Calendar Week Year'])['LBS'].transform('sum')
    
    return test['Wt']

def analyze_1(df, _list, begin, end):
    if 'Calendar Week Year' not in _list:
        _list.extend(['Calendar Week Year'])
    
    df = full_dataframe(df, _list)
    
    #add last year lbs
    df = add_last_year(df, _list)
    
    #add rolling calculation
    df = add_rolling(df, _list)
    
    #add preCOVID baseline
    df = add_precovid(df, _list, begin, end)
    
    if _list[0] == 'Brand Desc':
        df['Wt'] = add_weight(df, _list)
    
    df = df.round({
        'LBS' : 2,    
        'SMA_4' : 2,
        'SMA_8' : 2,
        'SMA_12' : 2,
        'LBS_LY' : 2,    
        'SMA_4_LY' : 2,
        'SMA_8_LY' : 2,
        'SMA_12_LY' : 2,
        'LBS_Baseline' : 2,    
        'SMA_4_Baseline' : 2,
        'SMA_8_Baseline' : 2,
        'SMA_12_Baseline' : 2,
        'LBS_PRECOVID' : 2,
        'LBS_Lag_1' : 2,
        'LBS_Lag_2' : 2,
        'LBS_Lag_3' : 2,
        'LBS_Lag_4' : 2,
        'LBS_Baseline_Lag_1': 2,
        'LBS_LY_Lag_1': 2,
        'SMA_4_Lag_1' : 2,
        'SMA_4_LY_Lag_1' : 2,
        'SMA_4_Baseline_Lag_1' : 2
        
    }).fillna(value = {
        'LBS' : 0,    
        'SMA_4' : 0,
        'SMA_8' : 0,
        'SMA_12' : 0,
        'LBS_LY' : 0,    
        'SMA_4_LY' : 0,
        'SMA_8_LY' : 0,
        'SMA_12_LY' : 0,
        'LBS_Baseline' : 0,    
        'SMA_4_Baseline' : 0,
        'SMA_8_Baseline' : 0,
        'SMA_12_Baseline' : 0,
        'LBS_PRECOVID' : 0,
        'LBS_Lag_1' : 0,
        'LBS_Lag_2' : 0,
        'LBS_Lag_3' : 0,
        'LBS_Lag_4' : 0,
        'LBS_Baseline_Lag_1': 2,
        'LBS_LY_Lag_1': 2,
        'SMA_4_Lag_1' : 0,
        'SMA_4_LY_Lag_1' : 0,
        'SMA_4_Baseline_Lag_1' : 0
    })
    
    return df


def save_backup(df, file_name):
    
    df.to_csv(BACKUP + file_name)
    
    return


def td_to_pandas(query, cur, title=''):
    _data = []
    _start=dt.now()
    print(dt.now().strftime('%m/%d/%Y %r'))
    print(f'{title} Execution started...', end='', flush=True)
    cur.execute (query)
    print(f'finished. {dt.now() - _start}', flush=True) 
    _start_fetch=dt.now()
    print(f'{title} Fetching data started...', end='', flush=True)
    for row in cur.fetchall():
        _data.append(row) 
    print(f'finished. {dt.now() - _start_fetch}', flush=True) 
    _start=dt.now()
    print(f'{title} Creating DataFrame for started...', end='', flush=True)
    _df = pd.DataFrame(_data)
    _df.columns = [x[0].replace('SAP_', '').lower() for x in cur.description]
    print(f'finished. {dt.now() - _start}', flush=True)
    return _df


def td_dataframe(select_db, query):
    with teradatasql.connect(None, 
                         host='172.29.3.43',
                         user='PNWATTERS',
                         password='teradata123') as con:
        with con.cursor() as cur:
            cur.execute (select_db)
            print('Database selected!', flush=True)            
            dim_df = td_to_pandas(query, cur, 'Query:')
            print('Dim:', dim_df.shape)
    
    return dim_df


def process_list(df, work_list):
    
    _process = analyze_1(df, work_list, 201910, 202009)
    
    _process['Country'] = 'US'
    
    _process = add_time(_process)
    
    #for standardizing output
    work_list.extend(['Country','LBS','SMA_4','SMA_8','SMA_12',
                      'YOY Week','LBS_LY','SMA_4_LY','SMA_8_LY','SMA_12_LY',
                      'Baseline Week','LBS_Baseline','SMA_4_Baseline','SMA_8_Baseline','SMA_12_Baseline',
                      'LBS_Lag_1','LBS_Lag_2','LBS_Lag_3','LBS_Lag_4','LBS_Baseline_Lag_1','LBS_LY_Lag_1',
                      'SMA_4_Lag_1', 'SMA_4_LY_Lag_1', 'SMA_4_Baseline_Lag_1',
                      'LBS_PRECOVID','Week Starting (Mon)','Week Ending (Sun)','COVID Week'])
    
    if work_list[0] == 'Brand Desc':
        work_list.extend(['Wt'])
    
    return _process[work_list]


def full_dataframe(df, _list):
    weeks = df.groupby(['Calendar Week Year']).size().reset_index().drop(columns={0})
    segments = df.groupby(_list[0:-1]).size().reset_index().drop(columns={0})
    
    _df = segments.assign(key=1).merge(weeks.assign(key=1), how='outer', on='key').drop(columns = {'key'}) 
    
    return _df.merge(df, how = 'left', on = _list) 

In [3]:
from azureml.core import Workspace, Datastore, Run
from azureml.data import TabularDataset
from azureml.data.azure_sql_database_datastore import AzureSqlDatabaseDatastore
from azureml.data.dataset_factory import TabularDatasetFactory

In [46]:
query_str = '''
--DECLARE @MyDate DATE;
--SET @MyDate = DATEADD(DAY, 7-DATEPART(WEEKDAY, GETDATE())-7,GETDATE());
--SET DATEFIRST 1;

SELECT 
      a1.[Invoice Date],
      DATEADD(DAY, 7-DATEPART(WEEKDAY, a1.[Invoice Date]), a1.[Invoice Date]) as week_end_date
      ,CONCAT(FORMAT(a3.[YearNumber], '0000'),FORMAT(a3.[WeekNumberOfYear], '0#')) as calendar_week_number
      --,a1.[CustomerID]
      ,a1.[Sales Org] as sales_org
      ,a2.[Division] as division_name
      ,a2.[Brand] as brand_desc
      ,a2.[Family] as family_desc
      ,a4.[Material Pricing Group] as material_pricing_group_description
      ,CASE WHEN CHARINDEX(a2.[Category], 'Potato') > 0 THEN 'Potato' ELSE 'Prepared Foods' END as cat
      ,SUM(a1.[Weight Lbs]) as actual_volume_lbs
      ,SUM(a1.[Gross Sales]) as gross_sales
      ,SUM(a1.[Net Sales]) as net_sales
      ,SUM(a1.[Gross Profit]) as gross_profit
      ,SUM(a1.[Cost Of Goods Sold]) as cost_of_goods_sold
      ,SUM(a1.[Cost Of Sales]) as cost_of_sales
  FROM [BI].[Factsellin] as a1
  JOIN [BI].[DimProduct] as a2
    ON a1.[ProductID] = a2.[ProductID]
  JOIN [BI].[DimDate] as a3
    ON a1.[Invoice Date] = a3.[FullDate]
  JOIN [BI].[DimProductSalesData] as a4
    ON a1.[ProductID] = a4.[ProductID]
    AND a1.[Sales Org] = a4.[Sales Org]
  WHERE a1.[Distribution Channel] = '10'
  and a1.[Invoice Date] BETWEEN '2019-01-01' AND DATEADD(DAY, 7-DATEPART(WEEKDAY, GETDATE())-7,GETDATE())
  and a1.[Sales Org] in ('US01')
  GROUP BY
    a1.[Invoice Date],
    DATEADD(DAY, 7-DATEPART(WEEKDAY, a1.[Invoice Date]), a1.[Invoice Date])
    ,CONCAT(FORMAT(a3.[YearNumber], '0000'),FORMAT(a3.[WeekNumberOfYear], '0#'))
    --,a1.[CustomerID]
    ,a1.[Sales Org]
    ,a2.[Division]
    ,a2.[Brand]
    ,a2.[Family]
    ,a4.[Material Pricing Group]
    ,CASE WHEN CHARINDEX(a2.[Category], 'Potato') > 0 THEN 'Potato' ELSE 'Prepared Foods' END'''

In [5]:
query_str = '''
SELECT 
      DATEADD(DAY, 7-((DatePart(WEEKDAY, a1.[Invoice Date]) + @@DATEFIRST + 6 - 1 ) % 7), a1.[Invoice Date]) as week_end_date
      ,CONCAT(FORMAT(a3.[YearNumber], '0000'),FORMAT(a3.[WeekNumberOfYear], '0#')) as calendar_week_number
      --,a1.[CustomerID]
      ,a1.[Sales Org] as sales_org
      ,a2.[Division] as division_name
      ,a2.[Brand] as brand_desc
      ,a2.[Family] as family_desc
      ,a4.[Material Pricing Group] as material_pricing_group_description
      ,CASE WHEN CHARINDEX(a2.[Category], 'Potato') > 0 THEN 'Potato' ELSE 'Prepared Foods' END as consolidated_category
      ,SUM(a1.[Weight Lbs]) as actual_volume_lbs
      ,SUM(a1.[Gross Sales]) as gross_sales
      ,SUM(a1.[Net Sales]) as net_sales
      ,SUM(a1.[Gross Profit]) as gross_profit
      ,SUM(a1.[Cost Of Goods Sold]) as cost_of_goods_sold
      ,SUM(a1.[Cost Of Sales]) as cost_of_sales
  FROM [BI].[Factsellin] as a1
  JOIN [BI].[DimProduct] as a2
    ON a1.[ProductID] = a2.[ProductID]
  JOIN [BI].[DimDate] as a3
    ON a1.[Invoice Date] = a3.[FullDate]
  JOIN [BI].[DimProductSalesData] as a4
    ON a1.[ProductID] = a4.[ProductID]
    AND a1.[Sales Org] = a4.[Sales Org]
  WHERE a1.[Distribution Channel] = '10'
  and a1.[Invoice Date] BETWEEN '2019-01-01' AND DATEADD(DAY, 7-DATEPART(WEEKDAY, GETDATE())-7,GETDATE())
  and a1.[Sales Org] in ('US01')
  GROUP BY
    DATEADD(DAY, 7-((DatePart(WEEKDAY, a1.[Invoice Date]) + @@DATEFIRST + 6 - 1 ) % 7), a1.[Invoice Date])
    ,CONCAT(FORMAT(a3.[YearNumber], '0000'),FORMAT(a3.[WeekNumberOfYear], '0#'))
    --,a1.[CustomerID]
    ,a1.[Sales Org]
    ,a2.[Division]
    ,a2.[Brand]
    ,a2.[Family]
    ,a4.[Material Pricing Group]
    ,CASE WHEN CHARINDEX(a2.[Category], 'Potato') > 0 THEN 'Potato' ELSE 'Prepared Foods' END'''

In [None]:
query_str = '''
SELECT 
      DATEADD(DAY, 7-((DatePart(WEEKDAY, a1.[Invoice Date]) + @@DATEFIRST + 6 - 1 ) % 7), a1.[Invoice Date]) as week_end_date
      ,CONCAT(FORMAT(a3.[YearNumber], '0000'),FORMAT(a3.[WeekNumberOfYear], '0#')) as calendar_week_number
      --,a1.[CustomerID]
      ,a1.[Sales Org] as sales_org
      ,a2.[Division] as division_name
      ,a2.[Brand] as brand_desc
      ,a2.[Family] as family_desc
      ,a4.[Material Pricing Group] as material_pricing_group_description
      ,CASE WHEN CHARINDEX(a2.[Category], 'Potato') > 0 THEN 'Potato' ELSE 'Prepared Foods' END as consolidated_category
      ,SUM(a1.[Weight Lbs]) as actual_volume_lbs
      ,SUM(a1.[Gross Sales]) as gross_sales
      ,SUM(a1.[Net Sales]) as net_sales
      ,SUM(a1.[Gross Profit]) as gross_profit
      ,SUM(a1.[Cost Of Goods Sold]) as cost_of_goods_sold
      ,SUM(a1.[Cost Of Sales]) as cost_of_sales
  FROM [BI].[Factsellin] as a1
  JOIN [BI].[DimProduct] as a2
    ON a1.[ProductID] = a2.[ProductID]
  JOIN [BI].[DimDate] as a3
    ON a1.[Invoice Date] = a3.[FullDate]
  JOIN [BI].[DimProductSalesData] as a4
    ON a1.[ProductID] = a4.[ProductID]
    AND a1.[Sales Org] = a4.[Sales Org]
  WHERE a1.[Distribution Channel] = '10'
  and a1.[Invoice Date] BETWEEN '2019-01-01' AND DATEADD(DAY, 7-DATEPART(WEEKDAY, GETDATE())-7,GETDATE())
  and a1.[Sales Org] in ('US01')
  GROUP BY
    DATEADD(DAY, 7-((DatePart(WEEKDAY, a1.[Invoice Date]) + @@DATEFIRST + 6 - 1 ) % 7), a1.[Invoice Date])
    ,CONCAT(FORMAT(a3.[YearNumber], '0000'),FORMAT(a3.[WeekNumberOfYear], '0#'))
    --,a1.[CustomerID]
    ,a1.[Sales Org]
    ,a2.[Division]
    ,a2.[Brand]
    ,a2.[Family]
    ,a4.[Material Pricing Group]
    ,CASE WHEN CHARINDEX(a2.[Category], 'Potato') > 0 THEN 'Potato' ELSE 'Prepared Foods' END'''

In [12]:
%%time

ws = Workspace.get(
        name='Azure-ML-workspace-01-dev',
        subscription_id='54d8e51a-03ad-4e44-b067-d6f54ef09a15',
        resource_group='MF_GDA-ENT-ML-RG'
    )

#print(query_str)

datastore: AzureSqlDatabaseDatastore = Datastore.get(ws, 'synapse_sql_datastore')
query = (datastore, query_str)
ds: TabularDataset = TabularDatasetFactory().from_sql_query(query, query_timeout=0)
df = ds.to_pandas_dataframe()

Wall time: 4min 46s


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63921 entries, 0 to 63920
Data columns (total 14 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   week_end_date                       63921 non-null  datetime64[ns]
 1   calendar_week_number                63921 non-null  object        
 2   sales_org                           63921 non-null  object        
 3   division_name                       63920 non-null  object        
 4   brand_desc                          63921 non-null  object        
 5   family_desc                         63921 non-null  object        
 6   material_pricing_group_description  63921 non-null  object        
 7   consolidated_category               63921 non-null  object        
 8   actual_volume_lbs                   63921 non-null  float64       
 9   gross_sales                         63921 non-null  float64       
 10  net_sales             

In [13]:
df['week_end_date'].max()

Timestamp('2022-05-30 00:00:00')

In [10]:
df.head()

Unnamed: 0,week_end_date,calendar_week_number,sales_org,division_name,brand_desc,family_desc,material_pricing_group_description,cat,actual_volume_lbs,gross_sales,net_sales,gross_profit,cost_of_goods_sold,cost_of_sales
0,2020-03-10,202010,US01,QSR,Portillo's,App Onion-Regular Rings,NAT'L CHAIN SNACKS,Prepared Foods,29760.0,40176.0,38554.08,9696.86,27777.22,28857.22
1,2020-12-15,202050,US01,Retail,Wegmans,App Onion-Regular Rings,RT PRIV LBL SNACKS,Prepared Foods,8400.0,12978.0,12487.44,42.06,10635.78,12445.38
2,2021-01-05,202153,US01,Food Service,McCain,SweetPotSpec-CutWhole Regular,PRIMARY BRANDED,Prepared Foods,7920.0,15444.0,9353.74,3604.58,4747.86,5749.16
3,2021-01-26,202103,US01,Food Service,Alpine,Pot Fries-Conventional,PRIVATE LABEL POTATO,Potato,217404.0,108702.0,102777.74,774.0,83699.54,102003.74
4,2019-10-22,201942,US01,Retail,Hy-Vee,SweetPot Fries-Battered,RT PL POTATO GRADE A,Prepared Foods,2280.0,2848.0,2755.12,890.32,1659.88,1864.8


In [29]:
def transform_azure(df):
    #df['YearNumber'] = df['YearNumber'].astype('str').replace('\.0', '', regex=True)
    #df['WeekNumberOfYear'] = df['WeekNumberOfYear'].astype('str').replace('\.0', '', regex=True)
    
    #df['calendar_week_number'] = df['YearNumber'] + df['WeekNumberOfYear'].str.zfill(2)
    #df['calendar_week_number'] = df.apply(lambda x: x['YearNumber'] + x['WeekNumberOfYear'])
    
    #update category_desc values based on row qualifiers
    
    #update calendar_week_name to numeric for future functions
    #df['calendar_week_number'] = pd.to_numeric(df['calendar_week_number'], errors = 'coerce')
    
    #df = df.astype({'actual_volume_lbs':'float64'})
    
    df = df.astype({'calendar_week_number':'int64'}).rename(columns={'actual_volume_lbs':'LBS',
                          'calendar_week_number':'Calendar Week Year',
                          'division_name':'Division Name',
                          'material_pricing_group_description':'Material Pricing Group Description',
                          'customer_hier_lvl_1_name':'Customer L1 Name',
                          'brand_desc':'Brand Desc',
                           'consolidated_category':'Consolidated Category',
                           'family_desc':'Family Desc'})
    
    return df

In [30]:
df_test = transform_azure(df)

df_test.head()

Unnamed: 0,week_end_date,Calendar Week Year,sales_org,Division Name,Brand Desc,Family Desc,Material Pricing Group Description,Consolidated Category,LBS,gross_sales,net_sales,gross_profit,cost_of_goods_sold,cost_of_sales
0,2020-03-10,202010,US01,Retail,Wakefern,Pot Fries-Battered,RT PL POTATO GRADE A,Potato,3456.0,2950.56,2866.48,583.54,1857.9,2282.94
1,2019-12-17,201950,US01,Food Service,Anchor,App Cheese-Regular Sticks,ANCHOR SNACKS,Prepared Foods,421692.0,1665315.82,1137441.5,569036.78,517871.56,568404.72
2,2019-09-10,201936,US01,Food Service,Quaker Steak and Lube,App Onion-Regular Rings,KEY ACCOUNT SNACKS,Prepared Foods,7104.0,10559.12,10152.58,851.68,7703.54,9300.9
3,2021-01-26,202103,US01,Food Service,Idaho Valley,Pot Fries-Conventional,SECONDARY BRANDED,Potato,458940.0,282668.4,230091.76,69476.94,145793.04,160614.82
4,2019-10-22,201942,US01,Retail,Hy-Vee,SweetPot Fries-Battered,RT PL POTATO GRADE A,Prepared Foods,2280.0,2848.0,2755.12,890.32,1659.88,1864.8


### 3. Teradata Queries
Run cell below

In [6]:
def teradata_sales():
    #SET QUERY_BAND = 'ApplicationName=MicroStrategy;Version=9.0;ClientUser=NEWATTER;Source=Vantage; Action=Sysco COVID Performance;StartTime=20200901T131109;JobID=68215;Importance=666;'  FOR SESSION;
    select_db = "DATABASE DL_GBL_TAS_BI"

    query = '''
        select a14.FISCAL_WEEK_NUMBER as FISCAL_WEEK_NUMBER,
            (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW) as FISCAL_WEEK,
            a14.CALENDAR_WEEK_NAME as CALENDAR_WEEK_NUMBER,
            (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW) as CALENDAR_WEEK,
            RIGHT(a16.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)) as CUSTOMER_HIER_LVL_1,
            a16.CUSTOMER_HIER_LVL_1_NAME as CUSTOMER_HIER_LVL_1_NAME,
            a13.DIVISION_ID as DIVISION,
            a17.DIVISION_NAME as DIVISION_NAME,
            a12.CATEGORY_SHORT_CODE as CATEGORY_SHORT_CODE,
            a12.CATEGORY_DESC as CATEGORY_DESC,
            a12.SUB_CATEGORY_SHORT_CODE as SUB_CATEGORY_SHORT_CODE,
            a12.SUB_CATEGORY_DESC as SUB_CATEGORY_DESC,
            a15.MATERIAL_PRICING_GROUP_ID as MATERIAL_PRICING_GROUP_ID,
            a18.MATERIAL_PRICING_GROUP_DESCRIPTION as MATERIAL_PRICING_GROUP_DESCRIPTION,
            TRIM (LEADING '0' FROM a13.MATERIAL_ID) as MATERIAL_ID,
            a13.MATERIAL_DESCRIPTION as MATERIAL_NAME,
            sum(a11.SALES_VOLUME_WEIGHT_LBS) as ACTUAL_VOLUME_LBS
        from DL_GBL_TAS_BI.FACT_SALES_ACTUAL as a11
        join DL_GBL_TAS_BI.VW_H_PRODUCT_ALL_SALES as a12
        on (a11.MATERIAL_ID = a12.MATERIAL_ID)
        join DL_GBL_TAS_BI.D_MATERIAL_DN_ALL as a13
        on (a11.MATERIAL_ID = a13.MATERIAL_ID)
        join DL_GBL_TAS_BI.D_TIME_FY_V6 as a14
        on (a11.ACCOUNTING_PERIOD_DATE = a14.DAY_CALENDAR_DATE)
        join DL_GBL_TAS_BI.D_MATERIAL_SALES_DATA as a15
        on (a11.DISTRIBUTION_CHANNEL_ID = a15.DISTRIBUTION_CHANNEL_ID and 
        a11.MATERIAL_ID = a15.MATERIAL_ID and 
        a11.SALES_ORGANISATION_ID = a15.SALES_ORGANISATION_ID)
        join DL_GBL_TAS_BI.VW_H_CUSTOMER_ALL_DIVISION00 as a16
        on (a11.CUSTOMER_ID = a16.CUSTOMER and 
        a11.DISTRIBUTION_CHANNEL_ID = a16.DISTRIBUTION_CHANNEL and 
        a11.SALES_ORGANISATION_ID = a16.SALES_ORGANISATION)
        join DL_GBL_TAS_BI.D_DIVISION as a17
        on (a13.DIVISION_ID = a17.DIVISION_ID)
        join DL_GBL_TAS_BI.D_MATERIAL_PRICING_GROUP as a18
        on (a15.MATERIAL_PRICING_GROUP_ID = a18.MATERIAL_PRICING_GROUP_ID)
        left join DL_GBL_TAS_BI.FACT_OM_ORDER_FULFILLMENT as a19
        on (a11.SALES_ORDER_ID = a19.SALES_ORDER_ID) and (a11.MATERIAL_ID = a19.MATERIAL_ID)
        where (a14.FISCAL_YEAR_CODE in ('FY2019', 'FY2020', 'FY2021','FY2022')
        and a11.SALES_ORGANISATION_ID in ('US01')
        and a11.DISTRIBUTION_CHANNEL_ID in ('10'))
        and a14.CALENDAR_WEEK_NAME between 201901 and ''' + str(WEEK - 1) + ''' 
        group by a14.FISCAL_WEEK_NUMBER,
        (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW),
        RIGHT(a16.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)),
        a14.CALENDAR_WEEK_NAME,
        (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW),
        a16.CUSTOMER_HIER_LVL_1_NAME,
        a13.DIVISION_ID,
        a17.DIVISION_NAME,
        a12.CATEGORY_SHORT_CODE,
        a12.CATEGORY_DESC,
        a12.SUB_CATEGORY_SHORT_CODE,
        a12.SUB_CATEGORY_DESC,
        a15.MATERIAL_PRICING_GROUP_ID,
        a18.MATERIAL_PRICING_GROUP_DESCRIPTION,
        TRIM (LEADING '0' FROM a13.MATERIAL_ID),
        a13.MATERIAL_DESCRIPTION
        '''
    
    #build dataframe from teradata query
    df = td_dataframe(select_db, query)
    
    #return transformed dataframe
    return transform_teradata(df)


def teradata_brand():
    #SET QUERY_BAND = 'ApplicationName=MicroStrategy;Version=9.0;ClientUser=NEWATTER;Source=Vantage; Action=Brand COVID Performance;StartTime=20200901T113649;JobID=55922;Importance=666;'  FOR SESSION;
    select_db = "DATABASE DL_GBL_TAS_BI"

    query = '''
    select a14.FISCAL_WEEK_NUMBER as FISCAL_WEEK_NUMBER,
    (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW) as FISCAL_WEEK,
    a14.CALENDAR_WEEK_NAME as CALENDAR_WEEK_NUMBER,
    (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW) as CALENDAR_WEEK,
    RIGHT(a16.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)) as CUSTOMER_HIER_LVL_1,
    a16.CUSTOMER_HIER_LVL_1_NAME as CUSTOMER_HIER_LVL_1_NAME,
    a13.DIVISION_ID as DIVISION,
    a17.DIVISION_NAME as DIVISION_NAME,
    a12.CATEGORY_SHORT_CODE as CATEGORY_SHORT_CODE,
    a12.CATEGORY_DESC as CATEGORY_DESC,
    a15.MATERIAL_PRICING_GROUP_ID as MATERIAL_PRICING_GROUP_ID,
    a18.MATERIAL_PRICING_GROUP_DESCRIPTION as MATERIAL_PRICING_GROUP_DESCRIPTION,
    TRIM (LEADING '0' FROM a13.MATERIAL_ID) as MATERIAL_ID,
    a13.MATERIAL_DESCRIPTION as MATERIAL_NAME,
    a12.BRAND_SHORT_CODE as BRAND_SHORT_CODE,
    a12.BRAND_DESC as BRAND_DESC,
    sum(a11.SALES_VOLUME_WEIGHT_LBS) as ACTUAL_VOLUME_LBS
    from DL_GBL_TAS_BI.FACT_SALES_ACTUAL as a11
    join DL_GBL_TAS_BI.VW_H_PRODUCT_ALL_SALES as a12
    on (a11.MATERIAL_ID = a12.MATERIAL_ID)
    join DL_GBL_TAS_BI.D_MATERIAL_DN_ALL as a13
    on (a11.MATERIAL_ID = a13.MATERIAL_ID)
    join DL_GBL_TAS_BI.D_TIME_FY_V6 as a14
    on (a11.ACCOUNTING_PERIOD_DATE = a14.DAY_CALENDAR_DATE)
    join DL_GBL_TAS_BI.D_MATERIAL_SALES_DATA as a15
    on (a11.DISTRIBUTION_CHANNEL_ID = a15.DISTRIBUTION_CHANNEL_ID and 
    a11.MATERIAL_ID = a15.MATERIAL_ID and 
    a11.SALES_ORGANISATION_ID = a15.SALES_ORGANISATION_ID)
    join DL_GBL_TAS_BI.VW_H_CUSTOMER_ALL_DIVISION00 as a16
    on (a11.CUSTOMER_ID = a16.CUSTOMER and 
    a11.DISTRIBUTION_CHANNEL_ID = a16.DISTRIBUTION_CHANNEL and 
    a11.SALES_ORGANISATION_ID = a16.SALES_ORGANISATION)
    join DL_GBL_TAS_BI.D_DIVISION as a17
    on (a13.DIVISION_ID = a17.DIVISION_ID)
    join DL_GBL_TAS_BI.D_MATERIAL_PRICING_GROUP as a18
    on (a15.MATERIAL_PRICING_GROUP_ID = a18.MATERIAL_PRICING_GROUP_ID)
    where (a14.FISCAL_YEAR_CODE in ('FY2019', 'FY2020', 'FY2021','FY2022')
    and a11.SALES_ORGANISATION_ID in ('US01')
    and a11.DISTRIBUTION_CHANNEL_ID in ('10')
    and a12.BRAND_SHORT_CODE in ('002', '005', '042', '536', '544', '545', '638', '659', '688', '694', '093'))
    and a14.CALENDAR_WEEK_NAME between 201901 and ''' + str(WEEK - 1) + ''' 
    group by a14.FISCAL_WEEK_NUMBER,
    (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW),
    a14.CALENDAR_WEEK_NAME,
    (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW),
    RIGHT(a16.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)),
    a16.CUSTOMER_HIER_LVL_1_NAME,
    a13.DIVISION_ID,
    a17.DIVISION_NAME,
    a12.CATEGORY_SHORT_CODE,
    a12.CATEGORY_DESC,
    a15.MATERIAL_PRICING_GROUP_ID,
    a18.MATERIAL_PRICING_GROUP_DESCRIPTION,
    TRIM (LEADING '0' FROM a13.MATERIAL_ID),
    a13.MATERIAL_DESCRIPTION,
    a12.BRAND_SHORT_CODE,
    a12.BRAND_DESC
    ;
    '''

    #create dataframe using both functions td_to_pandas and td_dataframe
    df = td_dataframe(select_db, query)
    
    return transform_teradata(df)


def transform_teradata(df):

    #update category_desc values based on row qualifiers
    df['Consolidated Category'] = df['category_desc']
    df.loc[df['Consolidated Category'] == 'Sweet Potato' , 'Consolidated Category'] = 'Potato'
    df.loc[df['Consolidated Category'] != 'Potato' , 'Consolidated Category'] = 'Prepared Foods'
    
    #update calendar_week_name to numeric for future functions
    df['calendar_week_number'] = pd.to_numeric(df['calendar_week_number'], errors = 'coerce')
    
    df = df.astype({'actual_volume_lbs':'float64'})

    df = df.rename(columns={'actual_volume_lbs':'LBS',
                          'calendar_week_number':'Calendar Week Year',
                          'division_name':'Division Name',
                          'material_pricing_group_description':'Material Pricing Group Description',
                          'customer_hier_lvl_1_name':'Customer L1 Name',
                          'brand_desc':'Brand Desc'})
    
    return df

### 4. Execute Analysis
Run cell below

In [7]:
#pull sales data from teradata
_sales = teradata_sales()
#_sales = df_test
#pull sales data by brand from teradata
#seperate from the query above becaues of filters
_brand = teradata_brand()


Database selected!
07/08/2022 07:49:38 AM
Query: Execution started...finished. 0:03:44.464201
Query: Fetching data started...finished. 0:17:22.561265
Query: Creating DataFrame for started...finished. 0:00:00.733931
Dim: (531537, 17)
Database selected!
07/08/2022 08:10:52 AM
Query: Execution started...finished. 0:01:31.837933
Query: Fetching data started...finished. 0:00:14.721962
Query: Creating DataFrame for started...finished. 0:00:00.023049
Dim: (19809, 17)


In [8]:
#blank list to add lists to
_list = []

#Output 1: Division Name - List 0
_list.append(['Division Name','Consolidated Category'])

#Output 2: MPG - List 1
_list.append(['Material Pricing Group Description', 'Consolidated Category'])

#Output 3: Customer L1 - List 2
_list.append(['Division Name','Customer L1 Name'])

#Create dataframes
output1 = process_list(_sales, _list[0])
output2 = process_list(_sales, _list[1])
output3 = process_list(_sales, _list[2])


#Output 4: Brand - List 3
_list.append(['Brand Desc'])

output4 = process_list(_brand, _list[3])


print('All done')

All done


In [41]:
#table_list = ['SELLIN_DIVISION','SELLIN_MPG','SELLIN_BRAND','SELLIN_RETAIL','SELLIN_CUSTOMER_L1']
table_list = ['SELLIN_DIVISION','SELLIN_MPG','SELLIN_BRAND']

# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(PATH + 'Weekly Sellin Data.xlsx', engine='xlsxwriter')

output1.columns = output1.columns.str.lower()
output2.columns = output2.columns.str.lower()
output4.columns = output4.columns.str.lower()

output1.to_excel(writer, sheet_name = 'SELLIN_DIVISION', index = False)
output2.to_excel(writer, sheet_name = 'SELLIN_MPG', index = False)
output4.to_excel(writer, sheet_name = 'SELLIN_BRAND', index = False)

writer.save()

print('All done', flush = True)

All done


### 5. Teradata Update
Run cell below

In [9]:
def td_upload(select_db, df, table_name):
    with teradatasql.connect(None, 
                         host='172.29.3.43',
                         user='PNWATTERS',
                         password='teradata123') as con:
        with con.cursor() as cur:
            cur.execute (select_db)
            d = dt.now().strftime('%m/%d/%Y %H:%M:%S %p')
            print(f'Database selected! {d}', flush=True)           

            delete_from_td(df, table_name, cur)
            insert_into_td(df, table_name, cur)

def delete_from_td(df, table_name, cur):
    distributor = df.groupby('Country').size().reset_index().drop(columns=0).to_numpy()[0][0]
    
    print(f'Deleting records for: {distributor} in table: {table_name}', flush = True)          
        
    query = '''
    DELETE FROM ''' + table_name  + ''' 
    WHERE "Country" = ''' + "'" + distributor + "'"
    
    cur.execute (query)
    
def insert_into_td(df, table_name, cur):
    insert_list = df.values.tolist()
    
    #creates ?, ?,.... string used in query for teradata fastload
    insert_columns = ('?, ' * len(df.columns)).rstrip(', ')

    print(f'Inserting records into {table_name}', flush = True)
    
    query = "INSERT INTO " + table_name  + " (" + insert_columns + ")"
    #query = "{fn teradata_try_fastload}INSERT INTO " + table_name  + " (" + insert_columns + ")"
    
    cur.execute (query, insert_list)
    
    print(f'Inserted {df.shape[0]} records', flush = True)
    

select_db = 'DATABASE DL_NA_PROTOTYPING'

td_upload(select_db, output1, 'SELLIN_DIVISION')
td_upload(select_db, output2, 'SELLIN_MPG')
td_upload(select_db, output3, 'SELLIN_CUSTOMER_L1')
td_upload(select_db, output4, 'SELLIN_BRAND')

print('All done', flush = True)

Database selected! 07/08/2022 08:12:50 AM
Deleting records for: US in table: SELLIN_DIVISION
Inserting records into SELLIN_DIVISION
Inserted 2013 records
Database selected! 07/08/2022 08:12:56 AM
Deleting records for: US in table: SELLIN_MPG
Inserting records into SELLIN_MPG
Inserted 7137 records
Database selected! 07/08/2022 08:13:07 AM
Deleting records for: US in table: SELLIN_CUSTOMER_L1
Inserting records into SELLIN_CUSTOMER_L1
Inserted 36783 records
Database selected! 07/08/2022 08:13:51 AM
Deleting records for: US in table: SELLIN_BRAND
Inserting records into SELLIN_BRAND
Inserted 2013 records
All done


### 6. Output to Excel
Only used for Brand

In [10]:
table_list = ['SELLIN_DIVISION','SELLIN_MPG','SELLIN_BRAND','SELLIN_RETAIL','SELLIN_CUSTOMER_L1']

select_db = 'DATABASE DL_NA_PROTOTYPING'

# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(PATH + 'Weekly Sellin Data.xlsx', engine='xlsxwriter')

for table_name in table_list:
    print(f'Exporting table: {table_name}', flush = True)
    query = '''
    SELECT * FROM ''' + table_name + '''
    '''
    df = td_dataframe(select_db, query)
    df.to_excel(writer, sheet_name = table_name, index = False)
    
writer.save()

print('All done', flush = True)

Exporting table: SELLIN_DIVISION
Database selected!
07/08/2022 08:20:46 AM
Query: Execution started...finished. 0:00:05.402885
Query: Fetching data started...finished. 0:00:00.344795
Query: Creating DataFrame for started...finished. 0:00:00.027756
Dim: (3300, 31)
Exporting table: SELLIN_MPG
Database selected!
07/08/2022 08:20:55 AM
Query: Execution started...finished. 0:00:07.401392
Query: Fetching data started...finished. 0:00:00.742142
Query: Creating DataFrame for started...finished. 0:00:00.045977
Dim: (7137, 31)
Exporting table: SELLIN_BRAND
Database selected!
07/08/2022 08:21:07 AM
Query: Execution started...finished. 0:00:05.051469
Query: Fetching data started...finished. 0:00:00.378976
Query: Creating DataFrame for started...finished. 0:00:00.027109
Dim: (3821, 31)
Exporting table: SELLIN_RETAIL
Database selected!
07/08/2022 08:21:16 AM
Query: Execution started...finished. 0:00:01.511803
Query: Fetching data started...finished. 0:00:00.196695
Query: Creating DataFrame for start

# Fill Rates

In [4]:
def teradata_fillrate():
    #SET QUERY_BAND = 'ApplicationName=MicroStrategy;Version=9.0;ClientUser=NEWATTER;Source=Vantage; Action=Brand COVID Performance;StartTime=20200901T113649;JobID=55922;Importance=666;'  FOR SESSION;
    select_db = "DATABASE DL_GBL_TAS_BI"

    query = '''
    SELECT
    a1.SALES_ORGANISATION_ID,
    a3.CUSTOMER_HIER_LVL_1_NAME as CUSTOMER_L1,
    a3.CUSTOMER_HIER_LVL_2_NAME as CUSTOMER_L2,
    a2.DIVISION_DESCRIPTION,
    a2.CATEGORY_DESC,
    a2.PRODUCT_GROUP_FORMAT_DESC,
    a2.PRODUCT_GROUP_SUB_FORMAT_DESC,
    a2.MATERIAL_ID,
    a2.MATERIAL_DESCRIPTION,
    TD_WEEK_END(a1.ACTUAL_OR_PLANNED_PGI_DATE)+1 as WEEK_ENDING,
    sum(cast(a1.SUM_ORDER_QUANTITY as float)) as ORDERED_CASES,
    sum(cast(a1.DELIVERED_QUANTITY as float)) as DELIVERED_CASES
    FROM DL_GBL_TAS_BI.FACT_OM_ORDER_FULFILLMENT as a1
    JOIN DL_GBL_TAS_BI.D_MATERIAL_DN_ALL as a2
        on a1.MATERIAL_ID = a2.MATERIAL_ID
    JOIN DL_GBL_TAS_BI.H_CUSTOMER as a3
        on a1.SOLD_TO_CUSTOMER_ID = a3.CUSTOMER_ID
        and a1.SALES_ORGANISATION_ID = a3.SALES_ORGANISATION
        and a1.DISTRIBUTION_CHANNEL_ID = a3.DISTRIBUTION_CHANNEL
    WHERE a1.PGI_COMPLETE = 'PGIED ORDER'
    and a1.DOCUMENT_TYPE = 'ZOR'
    and a1.DISTRIBUTION_CHANNEL_ID = 10
    and a1.SALES_ORGANISATION_ID in ('US01', 'CA01')
    and a1.ACTUAL_OR_PLANNED_PGI_DATE > '2021-01-01'

    GROUP BY
    a1.SALES_ORGANISATION_ID,
    a3.CUSTOMER_HIER_LVL_1_NAME,
    a3.CUSTOMER_HIER_LVL_2_NAME,
    a2.DIVISION_DESCRIPTION,
    a2.CATEGORY_DESC,
    a2.PRODUCT_GROUP_FORMAT_DESC,
    a2.PRODUCT_GROUP_SUB_FORMAT_DESC,
    a2.MATERIAL_ID,
    a2.MATERIAL_DESCRIPTION,
    TD_WEEK_END(a1.ACTUAL_OR_PLANNED_PGI_DATE)+1
    ;'''

    #create dataframe using both functions td_to_pandas and td_dataframe
    df = td_dataframe(select_db, query)
    
    return df

fill_rate = teradata_fillrate()

Database selected!
08/17/2022 10:42:33 AM
Query: Execution started...finished. 0:02:54.793075
Query: Fetching data started...finished. 0:02:20.240372
Query: Creating DataFrame for started...finished. 0:00:00.439261
Dim: (489352, 12)


In [5]:
fill_rate = fill_rate.astype({'week_ending':'datetime64'})

fill_rate.loc[fill_rate['category_desc'].str.contains('Potato'), 'category_desc'] = 'Potato'
fill_rate.loc[~fill_rate['category_desc'].str.contains('Potato'), 'category_desc'] = 'Prepared Foods'

display(fill_rate.head())

fill_rate.info()

Unnamed: 0,sales_organisation_id,customer_l1,customer_l2,division_description,category_desc,product_group_format_desc,product_group_sub_format_desc,material_id,material_description,week_ending,ordered_cases,delivered_cases
0,US01,BOR NATIONAL L1,BOR US - CENTRAL L2,Food Service,Potato,SPECIALTY,CUT,OIF252A,FS OREIDA DICED RNDM CT HB 6X5,2021-07-04,45.0,45.0
1,US01,ENDICO L1,ENDICO L2,Food Service,Potato,CONVENTIONAL FRIES,FROZEN FRIES,000000001000000529,"DD_END 6/5 3/8"" STRAIGHT CUT A GRADE FRY",2021-10-10,1150.0,1150.0
2,US01,CASH WA DISTRIBUTING L1,CASH WA DISTRIBUTING L2,Food Service,Prepared Foods,APPETIZERS,WHOLE VEGETABLE,000000000080008473,GCP BAT WHLE MSHRMS 6X2LB,2021-04-04,60.0,60.0
3,US01,KROGER L1,KROGER L2,Retail,Prepared Foods,LOCAL PORTFOLIO,LOCAL PORTFOLIO OTHER,000000001000007301,RT KRO BCN TWBE POT 8X10OZ,2021-07-25,1104.0,74.0
4,US01,CASH WA DISTRIBUTING L1,CASH WA DISTRIBUTING L2,Food Service,Prepared Foods,APPETIZERS,ONION RINGS,000000000070010011,BCI 5/8 BRB THCK ON RINGS 6X2.5LB,2021-08-15,480.0,0.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 489352 entries, 0 to 489351
Data columns (total 12 columns):
 #   Column                         Non-Null Count   Dtype         
---  ------                         --------------   -----         
 0   sales_organisation_id          489352 non-null  object        
 1   customer_l1                    489352 non-null  object        
 2   customer_l2                    489352 non-null  object        
 3   division_description           489352 non-null  object        
 4   category_desc                  489352 non-null  object        
 5   product_group_format_desc      489352 non-null  object        
 6   product_group_sub_format_desc  489351 non-null  object        
 7   material_id                    489352 non-null  object        
 8   material_description           489352 non-null  object        
 9   week_ending                    489352 non-null  datetime64[ns]
 10  ordered_cases                  489351 non-null  float64       
 11  

In [6]:
fill_rate.to_csv('FILL_RATE.csv')