In [None]:
from google.oauth2 import service_account
from googleapiclient.discovery import build

# Configuration
SERVICE_ACCOUNT_FILE = '/Users/deepshah/Downloads/tiffinstash-key.json'
FOLDER_ID = '1t5s3Zf5nGOceskozz74tta9H5vd9mawp'
SCOPES = ['https://www.googleapis.com/auth/drive.metadata.readonly']

def get_sheet_ids():
    creds = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    
    service = build('drive', 'v3', credentials=creds)

    query = (f"'{FOLDER_ID}' in parents and "
             f"mimeType = 'application/vnd.google-apps.spreadsheet' and "
             f"trashed = false")

    # We only request the 'id' field to keep the response lean
    results = service.files().list(
        q=query,
        fields="files(id)"
    ).execute()

    # Extract IDs into a simple Python list
    file_ids = [file['id'] for file in results.get('files', [])]
    
    return file_ids


In [None]:
import gspread
import pandas as pd
from google.oauth2.service_account import Credentials

# 1. Define the scope
SCOPES = [
    "https://www.googleapis.com/auth/drive"
]

# 2. Authenticate using your Service Account key file
# Replace with the actual path to your JSON key file
SERVICE_ACCOUNT_FILE = '/Users/deepshah/Downloads/tiffinstash-key.json' 

creds = Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
client = gspread.authorize(creds)

# 3. Open the Google Sheet
# You can open by title, url, or key. Opening by key is most robust.
# Example URL: https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit
SHEET_ID = '1UMfiU20U3DXL5st0PTRTiq5xQ_cB10_nrDvexmrapsQ' 

try:
    # Open the spreadsheet
    sh = client.open_by_key(SHEET_ID)
    
    # Select the specific worksheet (tab) by index (0 is the first one) or name
    worksheet = sh.get_worksheet(3) 
    # OR: worksheet = sh.worksheet("Sheet1")

    # 4. Get all values and convert to DataFrame
    data = worksheet.get_all_records() # Returns a list of dictionaries
    df = pd.DataFrame(data)

    print("Data extracted successfully:")
    print(df.head())

except Exception as e:
    print(f"Error reading Google Sheet: {e}")


In [None]:
import os
import json
import time
import logging
import requests
import certifi
from datetime import datetime, timedelta
from typing import Optional, Dict, Any
from google.oauth2 import service_account
from google.auth import default
import json

def get_credentials(scopes=None):
    """
    Gets Google Service Account credentials from environment variable, 
    mounted secret file, or local development file.
    """
    # 1. Check if key content is in environment variable
    key_content = os.environ.get("tiffinstash-sa-key")
    if key_content:
        try:
            info = json.loads(key_content)
            creds = service_account.Credentials.from_service_account_info(info)
            if scopes:
                creds = creds.with_scopes(scopes)
            return creds
        except Exception as e:
            logger.warning(f"Failed to parse 'tiffinstash-sa-key' env var as JSON: {e}")

    # 2. Check for mounted secret file or local development file
    possible_paths = [
        "/etc/tiffinstash-sa-key",
        "/Users/deepshah/Downloads/tiffinstash-key.json"
    ]
    for path in possible_paths:
        if os.path.exists(path):
            return service_account.Credentials.from_service_account_file(path, scopes=scopes)
    
    # 3. Fallback to application default credentials (ADC)
    logger.info("No service account key found, falling back to Application Default Credentials")
    credentials, _ = default()
    if scopes:
        credentials = credentials.with_scopes(scopes)
    return credentials

"""
Logic for processing seller data from Google Sheets.
"""
from datetime import datetime

def update_column_k(val: str) -> str:
    """Map seller codes to full names."""
    if not val:
        return val
    v = str(val).lower()
    mapping = {
        'kt': 'KHAOT', 'lk': 'LALKT', 'sw': 'TSWAD', 'tp': 'TPROS', 'mj': 'MIJOY',
        'vs': 'VISWA', 'if': 'INFLV', 'kk': 'KHAOK', 'bv': 'BHAVS', 'an': 'ANGTH',
        'sp': 'SPICE', 'ca': 'CHEFA', 'fg': 'FIERY', 'fm': 'FMONK', 'ks': 'KRISK',
        'kl': 'KERAL', 'sb': 'SPBAR', 'rd': 'RADHA', 'dn': 'DELHI', 'sc': 'SATVK',
        'rn': 'RNBIT', 'sm': 'SUBMA', 'hk': 'HEMIK', 'pr': 'PINDI', 'ms': 'MOKSH',
        'mc': 'MASCO', 'cb': 'CBAKE', 'hf': 'HOMEF', 'rv': 'RITAJ', 'mu': 'MUMKT',
        'dr': 'DSRAS', 'mz': 'MITZI', 'mn': 'AMINA'
    }
    for k, mapped in mapping.items():
        if k in v:
            return mapped
    return val

def update_seller_delivery(val: str) -> str:
    """Normalize seller delivery status."""
    if not val or (isinstance(val, str) and not val.strip()):
        return "No"
    if isinstance(val, str):
        v = val.strip().lower()
        if v in ("no", "yes"):
            return v.capitalize()
        if v == "yes ($1.99/day)":
            return "Yes"
    return val

def apply_td_to_vd(v_val: str, l_val: str) -> str:
    """Switch TD to VD for Midday deliveries."""
    if v_val == "MIDDAY" and l_val == "TD":
        return "VD"
    return l_val


In [None]:
# Shopify Order Field Names
SHOPIFY_ORDER_FIELDNAMES = [
    "ORDER ID",
    "DATE",
    "NAME",
    "Shipping address phone numeric",
    "phone_edit",
    "EMAIL",
    "HOUSE UNIT NO",
    "ADDRESS LINE 1",
    "Select Delivery City",
    "Shipping address city",
    "ZIP",
    "SKU",
    "Delivery Instructions (for drivers)",
    "Order Instructions (for sellers)",
    "Delivery Time",
    "Dinner Delivery",
    "Lunch Delivery",
    "Lunch Delivery Time",
    "Lunch Time",
    "Delivery between",
    "deliverytime_edit",
    "QUANTITY",
    "Select Start Date",
    "Delivery city"
]

In [None]:
SHEET_URLS = get_sheet_ids()

In [None]:
import pandas as pd
import numpy as np
import gspread
import time
from datetime import datetime
from fastapi import APIRouter, HTTPException
from google.oauth2 import service_account
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type

# --- CONFIGURATION ---
SERVICE_ACCOUNT_FILE = '/Users/deepshah/Downloads/tiffinstash-key.json'
FOLDER_ID = '1t5s3Zf5nGOceskozz74tta9H5vd9mawp'
SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive.metadata.readonly"
]

def get_credentials():
    return service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES)

# --- OPTIMIZED WORKER ---

@retry(
    retry=retry_if_exception_type(gspread.exceptions.APIError),
    wait=wait_exponential(multiplier=2, min=10, max=60), # Increased wait time
    stop=stop_after_attempt(3),
    before_sleep=lambda retry_state: print(f"Quota hit. Retrying in {retry_state.next_action.sleep}s...")
)
def fetch_single_sheet_data(client, sheet_id):
    """Worker function: Now used sequentially to prevent burst errors."""
    # Note: open_by_key = 1 Read Request, get_all_values = 1 Read Request. 
    # Total = 2 requests per sheet.
    sh = client.open_by_key(sheet_id)
    try:
        worksheet = sh.worksheet("SD DATA")
    except gspread.WorksheetNotFound:
        return []

    values = worksheet.get_all_values()
    if len(values) < 2:
        return []

    headers = values[0][2:26]
    rows = []
    for row in values[1:]:
        if len(row) > 23 and "ongoing" in str(row[23]).lower():
            target_values = row[2:26]
            if len(target_values) == len(headers):
                rows.append(dict(zip(headers, target_values)))
    return rows

# --- MAIN ROUTE ---

def fetch_aggregated_seller_data():
    try:
        creds = get_credentials()
        client = gspread.authorize(creds)
        
        # 1. Fetch IDs (This is 1 Drive API request)
        from googleapiclient.discovery import build
        drive_service = build('drive', 'v3', credentials=creds)
        query = f"'{FOLDER_ID}' in parents and mimeType = 'application/vnd.google-apps.spreadsheet' and trashed = false"
        results = drive_service.files().list(q=query, fields="files(id)").execute()
        sheet_ids = [f['id'] for f in results.get('files', [])]
        
        if not sheet_ids:
            return []

        all_data = []
        total = len(sheet_ids)
        
        print(f"Starting controlled fetch for {total} sheets...")

        # 2. Sequential Processing with Mandatory Delay
        for idx, sid in enumerate(sheet_ids):
            print(f"[{idx+1}/{total}] Fetching sheet: {sid}")
            
            data = fetch_single_sheet_data(client, sid)
            if data:
                all_data.extend(data)
            
            # --- THE QUOTA PROTECTOR ---
            # Each sheet takes 2 requests. 
            # To stay under 60 requests/min, we need to ensure we don't 
            # do more than 30 sheets per minute. 
            # 60 seconds / 30 sheets = 2 seconds per sheet.
            if idx < total - 1: # Don't sleep after the very last sheet
                time.sleep(2.0) 

        # 3. Transform
        final_data = transform_to_master_format(all_data)
        
        df_final = pd.DataFrame(final_data)
        return df_final.replace([np.inf, -np.inf], np.nan).where(pd.notnull(df_final), None).to_dict(orient="records")

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Aggregation failed: {str(e)}")

In [None]:
df=fetch_aggregated_seller_data()

In [None]:
df

In [None]:
len(df)