### McCain Sell-In
### 1. Libraries and constants
Run cell below

In [1]:
import teradatasql
import pandas as pd
import datetime
from datetime import datetime as dt
from datetime import timedelta

#path where dictionary file can be found
#Neil
DICTIONARY = r'C:\Users\NEWATTER\OneDrive - McCain Foods Limited\Distributor Sell-Out Dictionaries\\'
#Joe
#DICTIONARY = r'C:\Users\jcronk\McCain Foods Limited\GNA Data Strategy & Analytics - COVID Recovery\Distributor Sell-Out Dictionaries\\'

#main path
#Neil
PATH = r'C:\Users\NEWATTER\OneDrive - McCain Foods Limited\Historical Sell-Out Sales\\'
#Joe
#PATH = r'C:\Users\jcronk\McCain Foods Limited\GNA Data Strategy & Analytics - COVID Recovery\Historical Sell-Out Sales\\'

#current fiscal year and week (YYYYWW)
TIME = pd.read_excel(DICTIONARY + 'Time Definitions.xlsx')

#the current week is pulled from the time dictionary table
WEEK = int(TIME[(TIME['Week Starting (Mon)'] <= dt.now()) & (TIME['Week Ending (Sun)'] >= dt.now())]['Calendar Week Year'].values)


### 2. Calculation Functions
Run cell below

In [2]:
def add_rolling(df, _list):
    #groupby _list
    df = df.groupby(_list, dropna = False)[['LBS','LBS_LY','LBS_Baseline']].sum().reset_index()
    
    #set index to all but last column in list
    df = df.set_index(_list)
    
    #add new metric SMA_4 (simple moving average - 4 periods)
    #level = all but last 2 items in list
    df['LBS_Lag_1'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 1)
    df['LBS_Lag_2'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 2)
    df['LBS_Lag_3'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 3)
    df['LBS_Lag_4'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 4)
    
    df['SMA_4'] = df.groupby(level=_list[0:-1])['LBS'].apply(lambda x: x.rolling(4, min_periods=1).mean())
    df['SMA_8'] = df.groupby(level=_list[0:-1])['LBS'].apply(lambda x: x.rolling(8, min_periods=1).mean())
    df['SMA_12'] = df.groupby(level=_list[0:-1])['LBS'].apply(lambda x: x.rolling(12, min_periods=1).mean())
    
    df['SMA_4_LY'] = df.groupby(level=_list[0:-1])['LBS_LY'].apply(lambda x: x.rolling(4, min_periods=1).mean())
    df['SMA_8_LY'] = df.groupby(level=_list[0:-1])['LBS_LY'].apply(lambda x: x.rolling(8, min_periods=1).mean())
    df['SMA_12_LY'] = df.groupby(level=_list[0:-1])['LBS_LY'].apply(lambda x: x.rolling(12, min_periods=1).mean())
    
    df['SMA_4_Baseline'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].apply(lambda x: x.rolling(4, min_periods=1).mean())
    df['SMA_8_Baseline'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].apply(lambda x: x.rolling(8, min_periods=1).mean())
    df['SMA_12_Baseline'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].apply(lambda x: x.rolling(12, min_periods=1).mean())
    
    df['LBS_Baseline_Lag_1'] = df.groupby(level=_list[0:-1])['LBS_Baseline'].shift(periods = 1)
    df['LBS_LY_Lag_1'] = df.groupby(level=_list[0:-1])['LBS'].shift(periods = 1)
    
    df['SMA_4_Lag_1'] = df.groupby(level=_list[0:-1])['SMA_4'].shift(periods = 1)
    df['SMA_4_LY_Lag_1'] = df.groupby(level=_list[0:-1])['SMA_4_LY'].shift(periods = 1)
    df['SMA_4_Baseline_Lag_1'] = df.groupby(level=_list[0:-1])['SMA_4_Baseline'].shift(periods = 1)
    
    return df.reset_index()


def add_last_year(df, _list):
    #list of groupby columns
    #last item in list is Calendar Week Year which is used to pull previous history (Baseline Week = Calendar Week Year) of copied dataframe
    _groupby = _list.copy()
    
    _merge_yoy = _list.copy()[0:-1]
    _merge_yoy.extend(['YOY Week'])
    
    _merge_baseline = _list.copy()[0:-1]
    _merge_baseline.extend(['Baseline Week'])
    
    df1 = df.groupby(_list, dropna = False)['LBS'].sum().reset_index()
    
    #groupby _list
    df_new = df.groupby(_list, dropna = False)['LBS'].sum().reset_index()
    
    #add week dimensions to main dataframe
    df_new = df_new.merge(TIME[['Calendar Week Year','YOY Week','Baseline Week']], how = 'left', left_on = 'Calendar Week Year', right_on = 'Calendar Week Year')
    
    df_new = df_new.merge(df1, how='left', left_on=_merge_yoy, right_on=_groupby).drop(columns={'Calendar Week Year_y'}).rename(columns={'LBS_y':'LBS_LY'})
    
    df_new = df_new.merge(df1, how='left', left_on=_merge_baseline, right_on=_groupby).drop(columns={'Calendar Week Year'}).rename(columns={
        'LBS':'LBS_Baseline','Calendar Week Year_x':'Calendar Week Year','LBS_x':'LBS'})
    
    return df_new


def add_precovid(df, _list, begin, end):
    #datefield should be last in _list
    datefield = _list[-1]
          
    #remove datefield from list
    _list = _list[0:-1]
    
    #filter data not using last and rename columns
    _df = df[(df[datefield] >= begin) & (df[datefield] <= end)].groupby(_list)['LBS'].sum() / 52
    
    return df.merge(
        _df, how = 'left', left_on = _list, right_on = _list).rename(
        columns = {'LBS_x':'LBS', 'LBS_y':'LBS_PRECOVID'}).fillna(
        value = {'LBS_PRECOVID': 0})


def add_time(df):
    df = df.merge(TIME[['Calendar Week Year','Week Starting (Sun)','Week Ending (Sat)', 'COVID Week']],
                   how = 'left', 
                   on = 'Calendar Week Year')
    
    df = df.merge(TIME[['Calendar Week Year','YOY Week','Baseline Week']], how = 'left', left_on = 'Calendar Week Year', right_on = 'Calendar Week Year')
    
    return df

def add_weight(df, _list):
    test = df.groupby(_list)[['LBS','LBS_Baseline']].sum().reset_index()
    test['Wt'] = test['LBS'] / test.groupby(['Calendar Week Year'])['LBS'].transform('sum')
    
    return test['Wt']

def analyze_1(df, _list, begin, end):
    if 'Calendar Week Year' not in _list:
        _list.extend(['Calendar Week Year'])
    
     #add last year lbs
    df = add_last_year(df, _list)
    
    #add rolling calculation
    df = add_rolling(df, _list)
        
    #add preCOVID baseline
    df = add_precovid(df, _list, begin, end)
    
    if _list[0] == 'Brand Desc':
        df['Wt'] = add_weight(df, _list)
    
    df = df.round({
        'LBS' : 2,    
        'SMA_4' : 2,
        'SMA_8' : 2,
        'SMA_12' : 2,
        'LBS_LY' : 2,    
        'SMA_4_LY' : 2,
        'SMA_8_LY' : 2,
        'SMA_12_LY' : 2,
        'LBS_Baseline' : 2,    
        'SMA_4_Baseline' : 2,
        'SMA_8_Baseline' : 2,
        'SMA_12_Baseline' : 2,
        'LBS_PRECOVID' : 2,
        'LBS_Lag_1' : 2,
        'LBS_Lag_2' : 2,
        'LBS_Lag_3' : 2,
        'LBS_Lag_4' : 2,
        'LBS_Baseline_Lag_1': 2,
        'LBS_LY_Lag_1': 2,
        'SMA_4_Lag_1' : 2,
        'SMA_4_LY_Lag_1' : 2,
        'SMA_4_Baseline_Lag_1' : 2
        
    }).fillna(value = {
        'LBS' : 0,    
        'SMA_4' : 0,
        'SMA_8' : 0,
        'SMA_12' : 0,
        'LBS_LY' : 0,    
        'SMA_4_LY' : 0,
        'SMA_8_LY' : 0,
        'SMA_12_LY' : 0,
        'LBS_Baseline' : 0,    
        'SMA_4_Baseline' : 0,
        'SMA_8_Baseline' : 0,
        'SMA_12_Baseline' : 0,
        'LBS_PRECOVID' : 0,
        'LBS_Lag_1' : 0,
        'LBS_Lag_2' : 0,
        'LBS_Lag_3' : 0,
        'LBS_Lag_4' : 0,
        'LBS_Baseline_Lag_1': 2,
        'LBS_LY_Lag_1': 2,
        'SMA_4_Lag_1' : 0,
        'SMA_4_LY_Lag_1' : 0,
        'SMA_4_Baseline_Lag_1' : 0
    })
    
    return df


def save_backup(df, file_name):
    
    df.to_csv(BACKUP + file_name)
    
    return


def td_to_pandas(query, cur, title=''):
    _data = []
    _start=dt.now()
    print(dt.now().strftime('%m/%d/%Y %r'))
    print(f'{title} Execution started...', end='', flush=True)
    cur.execute (query)
    print(f'finished. {dt.now() - _start}', flush=True) 
    _start_fetch=dt.now()
    print(f'{title} Fetching data started...', end='', flush=True)
    for row in cur.fetchall():
        _data.append(row) 
    print(f'finished. {dt.now() - _start_fetch}', flush=True) 
    _start=dt.now()
    print(f'{title} Creating DataFrame for started...', end='', flush=True)
    _df = pd.DataFrame(_data)
    _df.columns = [x[0].replace('SAP_', '').lower() for x in cur.description]
    print(f'finished. {dt.now() - _start}', flush=True)
    return _df


def td_dataframe(select_db, query):
    with teradatasql.connect(None, 
                         host='172.29.3.43',
                         user='PNWATTERS',
                         password='teradata123') as con:
        with con.cursor() as cur:
            cur.execute (select_db)
            print('Database selected!', flush=True)            
            dim_df = td_to_pandas(query, cur, 'Query:')
            print('Dim:', dim_df.shape)
    
    return dim_df


def process_list(df, work_list):
    
    _process = analyze_1(df, work_list, 201910, 202009)
    
    _process['Country'] = 'Canada'
    
    _process = add_time(_process)
    
    #for standardizing output
    work_list.extend(['Country','LBS','SMA_4','SMA_8','SMA_12',
                      'YOY Week','LBS_LY','SMA_4_LY','SMA_8_LY','SMA_12_LY',
                      'Baseline Week','LBS_Baseline','SMA_4_Baseline','SMA_8_Baseline','SMA_12_Baseline',
                      'LBS_Lag_1','LBS_Lag_2','LBS_Lag_3','LBS_Lag_4','LBS_Baseline_Lag_1','LBS_LY_Lag_1',
                      'SMA_4_Lag_1', 'SMA_4_LY_Lag_1', 'SMA_4_Baseline_Lag_1',
                      'LBS_PRECOVID','Week Starting (Sun)','Week Ending (Sat)','COVID Week'])
    
    if work_list[0] == 'Brand Desc':
        work_list.extend(['Wt'])
        
    return _process[work_list]

### 3. Teradata Queries
Run cell below

In [3]:

#exclude Wong Wing - 4/7/21
#('0048','0052','0053','0054','0055','0079','0546','0547')

def teradata_sales():
    #SET QUERY_BAND = 'ApplicationName=MicroStrategy;Version=9.0;ClientUser=NEWATTER;Source=Vantage; Action=Sysco COVID Performance;StartTime=20200901T131109;JobID=68215;Importance=666;'  FOR SESSION;
    select_db = "DATABASE DL_GBL_TAS_BI"

    query ='''select a14.FISCAL_WEEK_NUMBER as FISCAL_WEEK_NUMBER,
    (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW) as FISCAL_WEEK,
    a14.CALENDAR_WEEK_NAME as CALENDAR_WEEK_NUMBER,
    (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW) as CALENDAR_WEEK,
    RIGHT(a15.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)) as CUSTOMER_HIER_LVL_1,
    a15.CUSTOMER_HIER_LVL_1_NAME as CUSTOMER_HIER_LVL_1_NAME,
    RIGHT(a15.CUSTOMER_HIER_LVL_2,CAST(10 AS INTEGER)) as CUSTOMER_HIER_LVL_2,
    a15.CUSTOMER_HIER_LVL_2_NAME as CUSTOMER_HIER_LVL_2_NAME,
    a11.CUSTOMER_ID as CUSTOMER_ID,
    a17.CUSTOMER_NAME as CUSTOMER_NAME,
    a13.DIVISION_ID as DIVISION,
    a16.DIVISION_NAME as DIVISION_NAME,
    a12.CATEGORY_SHORT_CODE as CATEGORY_SHORT_CODE,
    a12.CATEGORY_DESC as CATEGORY_DESC,
    a12.SUB_CATEGORY_SHORT_CODE as SUB_CATEGORY_SHORT_CODE,
    a12.SUB_CATEGORY_DESC as SUB_CATEGORY_DESC,
    a12.FAMILY_SHORT_CODE as FAMILY_SHORT_CODE,
    a12.FAMILY_DESC as FAMILY_DESC,
    TRIM (LEADING '0' FROM a13.MATERIAL_ID) as MATERIAL_ID,
    a13.MATERIAL_DESCRIPTION as MATERIAL_NAME,
    sum(a11.SALES_VOLUME_WEIGHT_LBS) as ACTUAL_VOLUME_LBS
    from DL_GBL_TAS_BI.FACT_SALES_ACTUAL as a11
    join DL_GBL_TAS_BI.VW_H_PRODUCT_ALL_SALES as a12
     on (a11.MATERIAL_ID = a12.MATERIAL_ID)
    join DL_GBL_TAS_BI.D_MATERIAL_DN_ALL as a13
     on (a11.MATERIAL_ID = a13.MATERIAL_ID)
    join DL_GBL_TAS_BI.D_TIME_FY_V6 as a14
     on (a11.ACCOUNTING_PERIOD_DATE = a14.DAY_CALENDAR_DATE)
    join DL_GBL_TAS_BI.VW_H_CUSTOMER_ALL_DIVISION00 as a15
     on (a11.CUSTOMER_ID = a15.CUSTOMER and 
    a11.DISTRIBUTION_CHANNEL_ID = a15.DISTRIBUTION_CHANNEL and 
    a11.SALES_ORGANISATION_ID = a15.SALES_ORGANISATION)
    join DL_GBL_TAS_BI.D_DIVISION as a16
     on (a13.DIVISION_ID = a16.DIVISION_ID)
    join DL_GBL_TAS_BI.D_CUSTOMER as a17
     on (a11.CUSTOMER_ID = a17.CUSTOMER_ID)
    where(a14.FISCAL_YEAR_CODE in ('FY2019', 'FY2020', 'FY2021','FY2022')
     and a11.SALES_ORGANISATION_ID in ('CA01')
     and a11.DISTRIBUTION_CHANNEL_ID in ('10'))
     and a12.FAMILY_SHORT_CODE not in ('0048','0052','0053','0054','0055','0079','0546','0547')
     and a14.CALENDAR_WEEK_NAME between 201901 and ''' + str(WEEK - 1) + ''' 
    group by a14.FISCAL_WEEK_NUMBER,
    (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW),
    a14.CALENDAR_WEEK_NAME,
    (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW),
    RIGHT(a15.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)),
    a15.CUSTOMER_HIER_LVL_1_NAME,
    RIGHT(a15.CUSTOMER_HIER_LVL_2,CAST(10 AS INTEGER)),
    a15.CUSTOMER_HIER_LVL_2_NAME,
    a11.CUSTOMER_ID,
    a17.CUSTOMER_NAME,
    a13.DIVISION_ID,
    a16.DIVISION_NAME,
    a12.CATEGORY_SHORT_CODE,
    a12.CATEGORY_DESC,
    a12.SUB_CATEGORY_SHORT_CODE,
    a12.SUB_CATEGORY_DESC,
    a12.FAMILY_SHORT_CODE,
    a12.FAMILY_DESC,
    TRIM (LEADING '0' FROM a13.MATERIAL_ID),
    a13.MATERIAL_DESCRIPTION;
    '''
    
    #build dataframe from teradata query
    df = td_dataframe(select_db, query)
    
    #return transformed dataframe
    return transform_teradata(df)


def teradata_brand():
    #SET QUERY_BAND = 'ApplicationName=MicroStrategy;Version=9.0;ClientUser=NEWATTER;Source=Vantage; Action=Brand COVID Performance;StartTime=20200901T113649;JobID=55922;Importance=666;'  FOR SESSION;
    select_db = "DATABASE DL_GBL_TAS_BI"

    query ='''select a14.FISCAL_WEEK_NUMBER as FISCAL_WEEK_NUMBER,
    (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW) as FISCAL_WEEK,
    a14.CALENDAR_WEEK_NAME as CALENDAR_WEEK_NUMBER,
    (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW) as CALENDAR_WEEK,
    RIGHT(a15.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)) as CUSTOMER_HIER_LVL_1,
    a15.CUSTOMER_HIER_LVL_1_NAME as CUSTOMER_HIER_LVL_1_NAME,
    a11.CUSTOMER_ID as CUSTOMER_ID,
    a17.CUSTOMER_NAME as CUSTOMER_NAME,
    a13.DIVISION_ID as DIVISION,
    a16.DIVISION_NAME as DIVISION_NAME,
    a12.CATEGORY_SHORT_CODE as CATEGORY_SHORT_CODE,
    a12.CATEGORY_DESC as CATEGORY_DESC,
    a12.BRAND_SHORT_CODE as BRAND_SHORT_CODE,
    a12.BRAND_DESC as BRAND_DESC,
    TRIM (LEADING '0' FROM a13.MATERIAL_ID) as MATERIAL_ID,
    a13.MATERIAL_DESCRIPTION  MATERIAL_NAME,
    sum(a11.SALES_VOLUME_WEIGHT_LBS) as ACTUAL_VOLUME_LBS
    from DL_GBL_TAS_BI.FACT_SALES_ACTUAL as a11
    join DL_GBL_TAS_BI.VW_H_PRODUCT_ALL_SALES as a12
     on (a11.MATERIAL_ID = a12.MATERIAL_ID)
    join DL_GBL_TAS_BI.D_MATERIAL_DN_ALL as a13
     on (a11.MATERIAL_ID = a13.MATERIAL_ID)
    join DL_GBL_TAS_BI.D_TIME_FY_V6 as a14
     on (a11.ACCOUNTING_PERIOD_DATE = a14.DAY_CALENDAR_DATE)
    join DL_GBL_TAS_BI.VW_H_CUSTOMER_ALL_DIVISION00 as a15
     on (a11.CUSTOMER_ID = a15.CUSTOMER and 
    a11.DISTRIBUTION_CHANNEL_ID = a15.DISTRIBUTION_CHANNEL and 
    a11.SALES_ORGANISATION_ID = a15.SALES_ORGANISATION)
    join DL_GBL_TAS_BI.D_DIVISION as a16
     on (a13.DIVISION_ID = a16.DIVISION_ID)
    join DL_GBL_TAS_BI.D_CUSTOMER as a17
     on (a11.CUSTOMER_ID = a17.CUSTOMER_ID)
    where (a14.FISCAL_YEAR_CODE in ('FY2019', 'FY2020', 'FY2021','FY2022')
     and a11.SALES_ORGANISATION_ID in ('CA01')
     and a11.DISTRIBUTION_CHANNEL_ID in ('10')
     and a12.BRAND_SHORT_CODE in ('042', '068', '104', '112', '156', '225', '240', '500', '005', '002'))
     and a14.CALENDAR_WEEK_NAME between 201901 and ''' + str(WEEK - 1) + '''
     and a11.CUSTOMER_ID not in ('1000076341','1000076333','1000076214','1000076325','1000076175','1000086306','1000086305','1000086301')
    group by a14.FISCAL_WEEK_NUMBER,
    (a14.FISCAL_WEEK_NUMBER_DESCR || ' ' || a14.START_DATE_OF_SAPYW),
    a14.CALENDAR_WEEK_NAME,
    (a14.CALENDAR_WEEK_LONG_DESCRIPTION || ' ' || a14.START_DATE_OF_SAPYW),
    RIGHT(a15.CUSTOMER_HIER_LVL_1,CAST(10 AS INTEGER)),
    a15.CUSTOMER_HIER_LVL_1_NAME,
    a11.CUSTOMER_ID,
    a17.CUSTOMER_NAME,
    a13.DIVISION_ID,
    a16.DIVISION_NAME,
    a12.CATEGORY_SHORT_CODE,
    a12.CATEGORY_DESC,
    a12.BRAND_SHORT_CODE,
    a12.BRAND_DESC,
    TRIM (LEADING '0' FROM a13.MATERIAL_ID),
    a13.MATERIAL_DESCRIPTION;
    '''

    #create dataframe using both functions td_to_pandas and td_dataframe
    df = td_dataframe(select_db, query)
    
    return transform_teradata(df)


def transform_teradata(df):

    #update category_desc values based on row qualifiers
    df['Consolidated Category'] = df['category_desc']
    df.loc[df['Consolidated Category'] == 'Sweet Potato' , 'Consolidated Category'] = 'Potato'
    df.loc[df['Consolidated Category'] != 'Potato' , 'Consolidated Category'] = 'Prepared Foods'
    
    #update calendar_week_name to numeric for future functions
    df['calendar_week_number'] = pd.to_numeric(df['calendar_week_number'], errors = 'coerce')
    
    df = df.astype({'actual_volume_lbs':'float64'})

    df = df.rename(columns={'actual_volume_lbs':'LBS',
                          'calendar_week_number':'Calendar Week Year',
                          'division_name':'Division Name',
                          'material_pricing_group_description':'Material Pricing Group Description',
                          'customer_hier_lvl_1_name':'Customer L1 Name',
                          'brand_desc':'Brand Desc',
                          'family_desc': 'Family Desc'})
    
    return df

### 4. Execute Analysis
Run cell below

In [4]:


#pull sales data from teradata
_sales = teradata_sales()

#blank list to add lists to
_list = []

#Output 1: Division Name - List 0
_list.append(['Division Name','Consolidated Category'])

#Output 2: MPG - List 1
_list.append(['Family Desc'])

#Create dataframes
output1 = process_list(_sales, _list[0])
output2 = process_list(_sales[_sales['Division Name'] == 'Retail'], _list[1])

#pull sales data by brand from teradata
#seperate from the query above becaues of filters
_brand = teradata_brand()

#Output 3: Brand - List 2
_list.append(['Brand Desc'])

output3 = process_list(_brand, _list[2])

print('All done')

Database selected!
05/19/2022 02:57:18 PM
Query: Execution started...finished. 0:02:15.746866
Query: Fetching data started...finished. 0:04:10.319712
Query: Creating DataFrame for started...finished. 0:00:01.311164
Dim: (561999, 21)
Database selected!
05/19/2022 03:03:53 PM
Query: Execution started...finished. 0:03:23.073494
Query: Fetching data started...finished. 0:00:01.642879
Query: Creating DataFrame for started...finished. 0:00:00.019830
Dim: (12124, 17)
All done


### 5. Teradata Update
Run cell below

In [5]:
def td_upload(select_db, df, table_name):
    with teradatasql.connect(None, 
                         host='172.29.3.43',
                         user='PNWATTERS',
                         password='teradata123') as con:
        with con.cursor() as cur:
            cur.execute (select_db)
            d = dt.now().strftime('%m/%d/%Y %r')
            print(f'Database selected! {d}', flush=True)            

            delete_from_td(df, table_name, cur)
            insert_into_td(df, table_name, cur)

def delete_from_td(df, table_name, cur):
    distributor = df.groupby('Country').size().reset_index().drop(columns=0).to_numpy()[0][0]
    
    print(f'Deleting records for: {distributor} in table: {table_name}', flush = True)          
        
    query = '''
    DELETE FROM ''' + table_name  + ''' 
    WHERE "Country" = ''' + "'" + distributor + "'"
    
    cur.execute (query)
    
def insert_into_td(df, table_name, cur):
    insert_list = df.values.tolist()
    
    #creates ?, ?,.... string used in query for teradata fastload
    insert_columns = ('?, ' * len(df.columns)).rstrip(', ')

    print(f'Inserting records into {table_name}', flush = True)
    
    query = "INSERT INTO " + table_name  + " (" + insert_columns + ")"
    #query = "{fn teradata_try_fastload}INSERT INTO " + table_name  + " (" + insert_columns + ")"
    
    cur.execute (query, insert_list)
    
    print(f'Inserted {df.shape[0]} records', flush = True)
    

select_db = 'DATABASE DL_NA_PROTOTYPING'

td_upload(select_db, output1, 'SELLIN_DIVISION')
td_upload(select_db, output2, 'SELLIN_RETAIL')
td_upload(select_db, output3, 'SELLIN_BRAND')

print('All done', flush = True)

Database selected! 05/19/2022 03:07:24 PM
Deleting records for: Canada in table: SELLIN_DIVISION
Inserting records into SELLIN_DIVISION
Inserted 1238 records
Database selected! 05/19/2022 03:11:45 PM
Deleting records for: Canada in table: SELLIN_RETAIL
Inserting records into SELLIN_RETAIL
Inserted 1968 records
Database selected! 05/19/2022 03:13:39 PM
Deleting records for: Canada in table: SELLIN_BRAND
Inserting records into SELLIN_BRAND
Inserted 1740 records
All done


### 6. Output to Excel
Only used for Brand

In [6]:
%%time

table_list = ['SELLIN_DIVISION','SELLIN_MPG','SELLIN_BRAND','SELLIN_RETAIL','SELLIN_CUSTOMER_L1']

select_db = 'DATABASE DL_NA_PROTOTYPING'

# Create a Pandas Excel writer using XlsxWriter as the engine.
#writer = pd.ExcelWriter(PATH + str(dt.now().strftime('%m-%d-%Y')) + 'Weekly Sellin Data.csv', engine='xlsxwriter')

for table_name in table_list:
    print(f'Exporting table: {table_name}', flush = True)
    query = '''
    SELECT * FROM ''' + table_name + '''
    '''
    df = td_dataframe(select_db, query)
    
    df.to_csv(PATH + str(dt.now().strftime('%Y%m%d')) + ' ' + table_name + '.csv')
    #df.to_excel(writer, sheet_name = table_name, index = False)
    
writer.save()

print('All done', flush = True)

Exporting table: SELLIN_DIVISION
Database selected!
04/06/2022 09:49:48 PM
Query: Execution started...finished. 0:00:04.607016
Query: Fetching data started...finished. 0:00:00.318056
Query: Creating DataFrame for started...finished. 0:00:00.013547
Dim: (3066, 31)
Exporting table: SELLIN_MPG
Database selected!
04/06/2022 09:49:56 PM
Query: Execution started...finished. 0:00:04.552704
Query: Fetching data started...finished. 0:00:00.667789
Query: Creating DataFrame for started...finished. 0:00:00.031000
Dim: (6460, 31)
Exporting table: SELLIN_BRAND
Database selected!
04/06/2022 09:50:04 PM
Query: Execution started...finished. 0:00:04.403383
Query: Fetching data started...finished. 0:00:00.359017
Query: Creating DataFrame for started...finished. 0:00:00.014447
Dim: (3554, 31)
Exporting table: SELLIN_RETAIL
Database selected!
04/06/2022 09:50:11 PM
Query: Execution started...finished. 0:00:02.472207
Query: Fetching data started...finished. 0:00:00.186778
Query: Creating DataFrame for start

NameError: name 'writer' is not defined

In [6]:
table_list = ['SELLOUT_REGION']

select_db = 'DATABASE DL_NA_PROTOTYPING'

for table_name in table_list:
    print(f'Exporting table: {table_name}', flush = True)
    query = '''
    SELECT * FROM ''' + table_name + '''
    '''
    df = td_dataframe(select_db, query)
    df.to_csv(table_name, index = False)

print('All done', flush = True)

Exporting table: SELLOUT_REGION
Database selected!
09/24/2021 03:55:59 PM
Query: Execution started...finished. 0:00:02.689834
Query: Fetching data started...finished. 0:08:25.960192
Query: Creating DataFrame for started...finished. 0:00:04.762162
Dim: (1036856, 30)
All done
