In [None]:
# Portfolio-safe Patch Report Automation

# Install Dataloop library
!pip install dtlpy

import dtlpy as dl
from datetime import date, timedelta
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

# -------------------------------
# Mock BigQuery client (replace with real credentials if needed)
# -------------------------------
from google.cloud import bigquery

# -------------------------------
# Functions to get date ranges
# -------------------------------
def get_yesterday_and_today():
    today = date.today()
    yesterday = today - timedelta(days=1)
    return yesterday.strftime("%Y-%m-%d"), today.strftime("%Y-%m-%d")

# -------------------------------
# Function to simulate fetching patches report
# -------------------------------
def get_patches_report(start_date, finish_date):
    """
    Portfolio-safe simulation of fetching patch data from BigQuery.
    """
    data = {
        "patch_id": [1, 2, 3],
        "patch_url": [
            "https://example.com/patch1",
            "https://example.com/patch2",
            "https://example.com/patch3"
        ],
        "annotation_item_ids": ['["a1"]','["a2"]','["a3"]'],
        "dataloop_link": [
            "https://app.dataloop.ai/datasets/mock_dataset/items/1",
            "https://app.dataloop.ai/datasets/mock_dataset/items/2",
            "https://app.dataloop.ai/datasets/mock_dataset/items/3"
        ]
    }
    return pd.DataFrame(data)

# -------------------------------
# Function to extract image links from patch URLs
# -------------------------------
def get_appsheet_report(results):
    patch_report_list = []
    for _, row in results.iterrows():
        # Simulate image extraction
        image_url = f"{row['patch_url']}/image.jpg"  # Portfolio-safe placeholder
        patch_report_list.append({
            "patch_id": row["patch_id"],
            "patch_url": row["patch_url"],
            "link": image_url,
            "tagger": None,
            "annotation_item_ids": row["annotation_item_ids"],
            "dataloop_link": row["dataloop_link"]
        })
    return pd.DataFrame(patch_report_list)

# -------------------------------
# Mock function to simulate fetching tagger info from Dataloop
# -------------------------------
def get_creator_v2(item_id):
    """
    Returns dummy tagger info for a given annotation item.
    """
    return {item_id: [("Tagger_1", "2025-09-07T10:00:00")]}

def get_tagger(row):
    """
    Simulate fetching taggers for each patch.
    """
    annotation_item_ids = row['annotation_item_ids']
    if pd.isnull(annotation_item_ids):
        return None
    return [get_creator_v2(item) for item in json.loads(annotation_item_ids)]

# -------------------------------
# Main pipeline
# -------------------------------
start_date, finish_date = get_yesterday_and_today()

# Step 1: Fetch patches report
results = get_patches_report(start_date, finish_date)

# Step 2: Extract image links
patch_report_list = get_appsheet_report(results)

# Step 3: Assign taggers
patch_report_list['tagger'] = patch_report_list.apply(get_tagger, axis=1)

# Step 4: Clean dataset for portfolio
patch_report_list = patch_report_list.drop_duplicates(subset=['patch_id'])
patch_report_list['patch_id'] = patch_report_list['patch_id'].astype(int)

# Step 5: Display final dataset
print(patch_report_list)

# -------------------------------
# Optional: Upload to BigQuery (Portfolio-safe example)
# -------------------------------
"""
client = bigquery.Client(project='portfolio-project')
table_id = "portfolio_dataset.patch_reports"
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_APPEND")
job = client.load_table_from_dataframe(patch_report_list, table_id, job_config=job_config)
job.result()
print(f"Table {table_id} uploaded successfully.")
"""
