In [4]:
import pandas as pd
import numpy as np
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive

# === Step 1: Load and Prepare Data ===
df = pd.read_csv('../data/SampleSuperstore.csv')

# Add synthetic order dates
num_rows = df.shape[0]
date_range = pd.date_range(start="2022-01-01", end="2023-12-31", periods=num_rows)
df['Order Date'] = np.random.permutation(date_range)

# Create 'Month' column for trend analysis
df['Month'] = df['Order Date'].dt.to_period('M').astype(str)

# Perform analysis
region_summary = df.groupby('Region')[['Sales', 'Profit']].sum().reset_index()
monthly_sales = df.groupby('Month')['Sales'].sum().reset_index()
top_products = (
    df.groupby('Sub-Category')['Sales']
    .sum()
    .sort_values(ascending=False)
    .head(10)
    .reset_index()
)

# === Step 2: Export to CSVs ===
output_dir = 'tableau'
os.makedirs(output_dir, exist_ok=True)

df.to_csv(f'{output_dir}/Cleaned_Superstore.csv', index=False)
region_summary.to_csv(f'{output_dir}/region_summary.csv', index=False)
monthly_sales.to_csv(f'{output_dir}/monthly_sales.csv', index=False)
top_products.to_csv(f'{output_dir}/top_products.csv', index=False)

print("✅ Data processing complete. Files saved to 'tableau/' folder.")

# === Step 3: Upload Files to Google Drive ===
if not os.path.exists('client_secrets.json'):
    raise FileNotFoundError("Missing 'client_secrets.json' in the current directory.")

gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)

# Find or create the 'tableau' folder on Google Drive
folder_name = 'tableau'
file_list = drive.ListFile({'q': f"title='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"}).GetList()

if file_list:
    folder_id = file_list[0]['id']
    print(f"📁 Found existing Google Drive folder: '{folder_name}'")
else:
    folder_metadata = {'title': folder_name, 'mimeType': 'application/vnd.google-apps.folder'}
    folder = drive.CreateFile(folder_metadata)
    folder.Upload()
    folder_id = folder['id']
    print(f"📁 Created new Google Drive folder: '{folder_name}'")

# Upload each file into the 'tableau' folder
csv_files = ['Cleaned_Superstore.csv', 'region_summary.csv', 'monthly_sales.csv', 'top_products.csv']
for fname in csv_files:
    path = os.path.join(output_dir, fname)
    file_drive = drive.CreateFile({
        'title': fname,
        'parents': [{'id': folder_id}]
    })
    file_drive.SetContentFile(path)
    file_drive.Upload()
    print(f"✅ Uploaded '{fname}' to Google Drive folder '{folder_name}'.")

✅ Data processing complete. Files saved to 'tableau/' folder.
Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=336173916933-t42icgn100dmsv6snkle529vfc64t1t3.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.
📁 Created new Google Drive folder: 'tableau'
✅ Uploaded 'Cleaned_Superstore.csv' to Google Drive folder 'tableau'.
✅ Uploaded 'region_summary.csv' to Google Drive folder 'tableau'.
✅ Uploaded 'monthly_sales.csv' to Google Drive folder 'tableau'.
✅ Uploaded 'top_products.csv' to Google Drive folder 'tableau'.
