In [None]:
!pip install openpyxl

In [3]:
import os
import calendar
import logging as log
import pandas as pd
from io import BytesIO
from datetime import date, datetime, timezone, timedelta
from google.cloud import bigquery as bq
from google.cloud import storage
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseUpload
from google.api_core.exceptions import Forbidden, NotFound

'''
DATETIME constants
'''
TIME_ZONE = timezone(timedelta(hours=8))
START_DATE = datetime(2025, 3, 11, tzinfo=TIME_ZONE)

'''
CREDENTIALS	
'''
JSON_KEYS_PATH = 'json-keys/gch-prod-dwh01-data-pipeline.json'
# JSON_KEYS_PATH = '/home/yanzhe/gch-prod-dwh01/json-keys/gch-prod-dwh01-data-pipeline.json'
SERVICE_ACCOUNT = f'{JSON_KEYS_PATH}'

# set up credentials for BQ and Drive to query data
credentials = service_account.Credentials.from_service_account_file(JSON_KEYS_PATH)
bq_client = bq.Client(credentials=credentials, project=credentials.project_id)
bucket_client = storage.Client(credentials=credentials, project=credentials.project_id)

'''
LOCAL FILE PATHS
'''
SQL_SCRIPTS_PATH = 'sql-scripts/sc-possalesrl'
# SQL_SCRIPTS_PATH = '/home/yanzhe/gch-prod-dwh01/sql-scripts/sc-possalesrl'

OUTFILES_DIR = '/mnt/c/Users/Asus/Desktop/cloud-space/giant/outfiles'
# OUTFILES_DIR = '/home/yanzhe/outfiles'
os.makedirs(OUTFILES_DIR, exist_ok=True)

PY_LOGS_DIR = '/mnt/c/Users/Asus/Desktop/cloud-space/giant/py_log'
# PY_LOGS_DIR = '/home/yanzhe/py_log'
os.makedirs(PY_LOGS_DIR, exist_ok=True)

'''
GOOGLE DRIVE PARAMS
'''
SCOPES = ['https://www.googleapis.com/auth/drive']

POSSALES_RL_FOLDER_ID = '1LYITa9mHJZXQyC21_75Ip8_oMwBanfcF' # use this for the actual prod
# POSSALES_RL_FOLDER_ID = '1iQDbpxsqa8zoEIREJANEWau6HEqPe7hF' # GCH Report > Supply Chain (mock drive)

SHARED_DRIVE_ID = '0AJjN4b49gRCrUk9PVA'

'''
OUTPUT FILE CONFIG
'''
SLICE_BY_ROWS = 1000000 - 1

DEPARTMENTS = {
	'1': '1 - GROCERY',
	'2': '2 - FRESH',
	'3': '3 - PERISHABLES',
	'4': '4 - NON FOODS',
	'5': '5 - HEALTH & BEAUTY',
	'6': '6 - GMS'
}

'''
Logging
'''
month = calendar.month_name[datetime.now().month]
year = datetime.now().year

# create log dir for current month/year
LOG_DIR = f'{PY_LOGS_DIR}/{year}/{month}'
os.makedirs(LOG_DIR, exist_ok=True)

# create log file name with timestamp
log_file_name = f'{datetime.now().strftime("%Y%m%d_%H%M%S")}_pylog.txt'
log_file_fullpath = f'{LOG_DIR}/{log_file_name}'

# config logging
log.basicConfig(
	filename=log_file_fullpath,
	level=log.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s"
)

# log to console for debugging
console_handler = log.StreamHandler()
console_handler.setLevel(log.INFO)
log.getLogger().addHandler(console_handler)

In [None]:
expected_df = pd.read_excel('srp.xlsx', sheet_name='Sheet3', header=0)
all_dates = expected_df['date'].unique()

In [51]:
all_dates

<DatetimeArray>
['2024-08-01 00:00:00', '2024-08-02 00:00:00', '2024-08-03 00:00:00',
 '2024-08-04 00:00:00', '2024-08-05 00:00:00', '2024-08-06 00:00:00',
 '2024-09-01 00:00:00', '2024-09-02 00:00:00', '2024-09-03 00:00:00',
 '2024-09-04 00:00:00', '2024-09-05 00:00:00', '2024-09-06 00:00:00',
 '2024-09-07 00:00:00', '2024-09-08 00:00:00', '2024-09-09 00:00:00',
 '2024-09-10 00:00:00', '2024-09-11 00:00:00', '2024-09-12 00:00:00',
 '2024-09-13 00:00:00', '2024-09-14 00:00:00', '2024-09-15 00:00:00',
 '2024-09-16 00:00:00', '2024-09-17 00:00:00', '2024-09-18 00:00:00',
 '2024-09-19 00:00:00', '2024-09-20 00:00:00', '2024-10-01 00:00:00',
 '2024-10-02 00:00:00', '2024-10-03 00:00:00', '2024-10-04 00:00:00',
 '2024-10-05 00:00:00', '2024-10-06 00:00:00', '2024-10-07 00:00:00',
 '2024-10-08 00:00:00', '2024-10-09 00:00:00', '2024-10-10 00:00:00',
 '2024-10-11 00:00:00', '2024-10-12 00:00:00', '2024-10-13 00:00:00',
 '2024-10-14 00:00:00', '2024-10-15 00:00:00']
Length: 41, dtype: datetime

In [57]:
expected_df.head()

Unnamed: 0,date,total_qty_sales,total_sales,total_margin
0,2024-08-01,11721.2201,44670.89,9635.88
1,2024-08-02,9555.8705,43142.5,10286.87
2,2024-08-03,13903.5728,56188.88,11392.43
3,2024-08-04,13970.0486,64507.47,13068.66
4,2024-08-05,11566.8014,43778.06,10385.05


In [59]:
expected_df = expected_df.rename(columns={
		'total_sales_qty': 'total_qty_sales'
})

In [60]:
expected_df.head()

Unnamed: 0,date,total_qty_sales,total_sales,total_margin
0,2024-08-01,11721.2201,44670.89,9635.88
1,2024-08-02,9555.8705,43142.5,10286.87
2,2024-08-03,13903.5728,56188.88,11392.43
3,2024-08-04,13970.0486,64507.47,13068.66
4,2024-08-05,11566.8014,43778.06,10385.05


In [41]:
main_df = pd.DataFrame()

for date in all_dates:
	# Convert to date string in the format BigQuery expects
	formatted_date = date.strftime('%Y-%m-%d')
	
	query = f"""
	SELECT
		date,
		SUM(total_qty_sales) AS total_qty_sales,
		SUM(total_sales) AS total_sales,
		SUM(total_margin) AS total_margin
	FROM `gch-prod-dwh01.srp.agg_possales_copy`
	WHERE date = DATE '{formatted_date}'
	GROUP BY date
	"""

	results_df = bq_client.query(query).to_dataframe()
	main_df = pd.concat([main_df, results_df], ignore_index=True, sort=False)
	
	main_df

In [58]:
main_df.head()

Unnamed: 0,date,total_qty_sales,total_sales,total_margin
0,2024-08-01,21236.2142,126738.61,29163.94
1,2024-08-02,9555.8705,43142.5,10286.87
2,2024-08-03,13903.5728,56188.88,11392.43
3,2024-08-04,13970.0486,64507.47,13068.66
4,2024-08-05,11566.8014,43778.06,10385.05


In [61]:
comparison_df = pd.DataFrame({
    'date': main_df['date'],
    'expected_total_qty_sales': round(expected_df['total_qty_sales'], 2),
	'cur_total_qty_sales': round(main_df['total_qty_sales'], 2),
	'expected_sales': round(expected_df['total_sales'], 2),
	'cur_sales': round(main_df['total_sales'], 2),
    'expected_sales': round(expected_df['total_sales'], 2),
	'cur_sales': round(main_df['total_sales'], 2)
})

comparison_df['different'] = comparison_df['expected_sales'] != comparison_df['cur_sales']

final = comparison_df[comparison_df['different'] == True]
final

Unnamed: 0,date,expected_total_qty_sales,cur_total_qty_sales,expected_sales,cur_sales,different
0,2024-08-01,11721.22,21236.21,44670.89,126738.61,True
