In [None]:
# Portfolio-safe Patch Report Automation

# Install Dataloop library
!pip install dtlpy

import dtlpy as dl
from datetime import date, timedelta
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

# -------------------------------
# Mock BigQuery client (replace with real credentials if needed)
# -------------------------------
from google.cloud import bigquery

# -------------------------------
# Functions to get date ranges
# -------------------------------
def get_yesterday_and_today():
    today = date.today()
    yesterday = today - timedelta(days=1)
    return yesterday.strftime("%Y-%m-%d"), today.strftime("%Y-%m-%d")


# -------------------------------
# Functions to get the last day updated
# -------------------------------
def get_last_date():
    # Use %%bigquery to execute a SQL query and store results in a DataFrame
    query = f"""
    SELECT MAX(date) FROM `taranis-bi.TS_AG.patches_quality_report_all`
    """

    # Execute the query and store the result in the DataFrame
    from google.cloud import bigquery
    client = bigquery.Client(project='taranis-bi')
    results = client.query(query).to_dataframe()

    return str(results.iloc[0][0])

# -------------------------------
# Function to simulate fetching patches report
# -------------------------------
def get_patches_report(star_date, finish_date):
    # Use %%bigquery to execute a SQL query and store results in a DataFrame
    query = f"""
    SELECT * FROM `human_patches_clear_data`
    WHERE is_correct_answer is False and date > '{star_date}' and date < '{finish_date}'
    and is_correct_answer is False
    """

    # Execute the query and store the result in the DataFrame
    from google.cloud import bigquery
    client = bigquery.Client(project='project_name')
    results = client.query(query).to_dataframe()

    return results


# -------------------------------
# Function to extract image links from patch URLs
# -------------------------------

def get_appsheet_report(results):
    list_all_patches = []
    for row in range(len(results)):
    # print (row)
        list_all_patches.append(dict(results.iloc[row]))
    list_all_patches

    patch_report_list = []
    for row in list_all_patches:
        url = row["patch_url"]

        # Make the GET request
        response = requests.get(url)
        # Check if the request was successful
        if response.status_code == 200:
            # Parse the HTML content with BeautifulSoup
            soup = BeautifulSoup(response.content, "html.parser")
            # Find all image tags and extract their src attribute
            image_url = [img["src"] for img in soup.find_all("img")][0]
            # Print or return the list of image URLs
            row_dict = {}
            for k in row.keys():
                row_dict[k] = row[k]
                row_dict['link'] = image_url
                row_dict['tagger'] = None
                patch_report_list.append(row_dict)
        else:
            print(f"Failed to retrieve images. Status code: {response.status_code}")

    # You can return or print the patch_report_list as needed
    patch_report_list = pd.DataFrame(patch_report_list)

    return patch_report_list

# -------------------------------
# Mock function to simulate fetching tagger info from Dataloop
# -------------------------------
def get_creator_metadata(item_id):
    """
    Returns dummy tagger info for a given annotation item.
    """
    return {item_id: [("Tagger_1", "2025-09-07T10:00:00")]}


dataloop_project = dl.projects.get(project_id="project_id")

def get_tagger(row):

    """
    Simulate fetching taggers for each patch.
    """
    try:
        dataset_id = row['dataloop_link'].split('datasets/')[1].rsplit('/items')[0]
    except:
        dataset_id = None
    annotation_item_ids = row['annotation_item_ids']

    if pd.isnull(annotation_item_ids) or dataset_id is None:
        return None

    if dataset_id:
        dataset = dataloop_project.datasets.get(dataset_id=dataset_id)
        filters = dl.Filters()
        filters.add(field="metadata.annotation_item_id", values=json.loads(annotation_item_ids), operator=dl.FiltersOperations.IN)
        list_items = dataset.items.get_all_items(filters=filters)
        taggers = [get_creator_metadata(item) for item in list_items]
        print (annotation_item_ids)
        return taggers

    return None

# -------------------------------
# Main pipeline
# -------------------------------
start_date, finish_date = get_yesterday_and_today()

# Step 1: Fetch patches report
results = get_patches_report(start_date, finish_date)

# Step 2: Extract image links
patch_report_list = get_appsheet_report(results)

# Step 3: Assign taggers
patch_report_list['tagger'] = patch_report_list.apply(get_tagger, axis=1)


# Step 4: Clean dataset for portfolio
patch_report_list = patch_report_list.drop_duplicates(subset=['patch_id'])
patch_report_list['patch_id'] = patch_report_list['patch_id'].astype(int)

# Step 5: Adding the tagger name in each patch 
patch_report_list =  patch_report_list[['image_id', 'annotation_item_ids', 'dataloop_link']]
patch_report_list['tagger'] = patch_report_list.apply(get_tagger, axis=1)


# Step 6: Upload to BigQuery (Portfolio-safe example)

"""
client = bigquery.Client(project='portfolio-project')
table_id = "portfolio_dataset.patch_reports"
job_config = bigquery.LoadJobConfig(write_disposition="WRITE_APPEND")
job = client.load_table_from_dataframe(patch_report_list, table_id, job_config=job_config)
job.result()
print(f"Table {table_id} uploaded successfully.")
"""
