## **Run Delta Table Maintenance (REST API)**
**Reference:** [Background Jobs - Run On Demand Table Maintenance](https://learn.microsoft.com/en-us/rest/api/fabric/lakehouse/background-jobs/run-on-demand-table-maintenance?tabs=HTTP)

In [None]:
%run ../main.py

def trigger_table_maintenance_job(table_name: str, token: str) -> str:
    endpoint = f"https://api.fabric.microsoft.com/v1/workspaces/{WORKSPACE_ID}/lakehouses/{LAKEHOUSE_ID}/jobs/instances?jobType=TableMaintenance"
    payload = {"executionData": {"tableName": table_name, "optimizeSettings": {"vOrder": True}, "vacuumSettings": {"retentionPeriod": "7:01:00:00"}}}
    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
    response = requests.post(endpoint, json=payload, headers=headers)
    if response.status_code == 202:
        return response.headers.get("Location")
    else:
        print(f"Failed to trigger table maintenance job. Status code: {response.status_code}, Response text: {response.text}")
        return None
    
def get_bearer_token() -> str:
    return InteractiveBrowserCredential().get_token("https://api.fabric.microsoft.com/.default").token

def get_authentication_token() -> DefaultAzureCredential:
    return DefaultAzureCredential()

#### **Single Table**

In [None]:
%run ../main.py

# Get token
token = get_bearer_token() # Interactive Browser
# token = get_authentication_token().get_token("https://api.fabric.microsoft.com/.default").token # Service Principal

# Execute table maintenance job
trigger_table_maintenance_job(table_name="dim_coa_gold", token=token)

#### **All Tables**

In [None]:
%run ../main.py
import time

# Get token
token = get_bearer_token() # Interactive Browser
# token = get_authentication_token().get_token("https://api.fabric.microsoft.com/.default").token # Service Principal
file_system_client = get_file_system_client(get_authentication_token())

# Get the filtered subdirectory names for "Tables"
filtered_tables = list_items(file_system_client=file_system_client, target_directory_path="Tables")

# Define batch size and delay between batches
batch_size = 5
batch_delay = 60  # in seconds

# Iterate over the filtered tables in batches
for i in range(0, len(filtered_tables), batch_size):
    batch_tables = filtered_tables[i:i + batch_size]
    for table_name in batch_tables:
        try:
            result = trigger_table_maintenance_job(table_name=table_name, token=token)
            if result is not None:
                print(f"Table maintenance job triggered for table: {table_name}")
            else:
                print(f"Failed to trigger table maintenance job for table: {table_name}")
        except Exception as e:
            print(f"An error occurred for table {table_name}: {e}")
    
    # Delay between batches
    if i + batch_size < len(filtered_tables):
        print(f"Waiting for {batch_delay} seconds before triggering the next batch...")
        time.sleep(batch_delay)

## **Run Data Pipeline**
**Reference:** [Microsoft Fabric data pipeline public REST API](https://learn.microsoft.com/en-us/fabric/data-factory/pipeline-rest-api)

In [None]:
%run get-bearer-token.py
import requests
import os
import json

def trigger_pipeline_job(workspace_id: str, pipeline_id: str, token: str) -> str:
    endpoint = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{pipeline_id}/jobs/instances?jobType=Pipeline"
    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
    response = requests.post(endpoint, headers=headers)
    if response.status_code == 202:
        return response.headers.get("Location")
    else:
        response.raise_for_status()

# Microsoft Fabric Details from Environment Variables
workspace_id=os.getenv("WORKSPACE_ID")
pipeline_id = os.getenv("PIPELINE_ID")
token = json.load(open("token_store.json", "r"))["token"] # Get token from JSON File

trigger_pipeline_job(workspace_id = workspace_id, 
                     pipeline_id=pipeline_id, 
                     token=token)

## **Run Spark Notebook**
**Reference:** [Manage and Execute Notebooks in Fabric with APIs](https://learn.microsoft.com/en-us/fabric/data-engineering/notebook-public-api)

In [None]:
%run get-bearer-token.py
import requests
import os
import json

def run_notebook_job(artifact_id: str, workspace_id: str, lakehouse_name: str, lakehouse_id: str, token: str) -> str:
    endpoint = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{artifact_id}/jobs/instances?jobType=RunNotebook"
    payload = {
        "executionData": {
                "defaultLakehouse": {
                    "name": lakehouse_name,
                    "id": lakehouse_id,
                },
                "useStarterPool": True
            }
        }
    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
    response = requests.post(endpoint, json=payload, headers=headers)
    if response.status_code == 202:
        return response.headers.get("Location")
    else:
        print(f"Failed to trigger table maintenance job. Status code: {response.status_code}, Response text: {response.text}")
        return None

# Microsoft Fabric Details from Environment Variables
workspace_id=os.getenv("WORKSPACE_ID")
artifact_id = os.getenv("ARTIFACT_ID")
lakehouse_id=os.getenv("LAKEHOUSE_ID")
lakehouse_name = os.getenv("LAKEHOUSE_NAME")
token = json.load(open("token_store.json", "r"))["token"] # Get token from JSON File

run_notebook_job(workspace_id = workspace_id, 
                 artifact_id=artifact_id, 
                 lakehouse_id = lakehouse_id, 
                 lakehouse_name=lakehouse_name, 
                 token=token)

## **Import/Create Notebook**
**Reference:** [Manage and Execute Notebooks in Fabric with APIs](https://learn.microsoft.com/en-us/fabric/data-engineering/notebook-public-api)

In [None]:
%run main.py
%run get-bearer-token.py
import os
import json
import requests
import base64
from datetime import datetime
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

onelake_ops = OneLakeRemoteOperations()

# Load environment variables
workspace_id = os.getenv("WORKSPACE_ID")
lakehouse_id = os.getenv("LAKEHOUSE_ID")
lakehouse_name = os.getenv("LAKEHOUSE_NAME")

def poll_notebook_creation(location_url: str, token: str, max_retries: int = 20, retry_interval: int = 15) -> dict:
    """
    Poll the status of notebook creation.

    Args:
        location_url (str): The URL to poll for creation status.
        token (str): The authentication token.
        max_retries (int): Maximum number of polling attempts.
        retry_interval (int): Time in seconds between polling attempts.

    Returns:
        dict: A dictionary containing the success status and details of the creation process.
    """
    headers = {"Authorization": f"Bearer {token}"}
    for attempt in range(max_retries):
        try:
            poll_response = requests.get(location_url, headers=headers)
            poll_response.raise_for_status()
            
            response_json = poll_response.json()
            print(f"Poll response: {response_json}")
            
            status = response_json.get('status', '').lower()
            
            if status == 'succeeded':
                return {"success": True, "id": response_json.get('resourceId'), "details": response_json}
            elif status in ['failed', 'canceled']:
                return {"success": False, "details": response_json}
            else:
                print(f"Creation in progress. Status: {status}. Attempt {attempt + 1}/{max_retries}")
            
            time.sleep(retry_interval)
        except requests.RequestException as e:
            print(f"Error during polling: {e}")
            time.sleep(retry_interval)
    
    return {"success": False, "details": "Polling exceeded maximum retries"}

def download_file_from_github(repo_url: str, file_path: str) -> dict:
    """
    Download a specific file from a GitHub repository.

    Args:
        repo_url (str): The URL of the GitHub repository.
        file_path (str): The path to the file within the repository.

    Returns:
        dict: The content of the file as a JSON object.
    """
    # Extract owner and repo name from URL
    parts = repo_url.rstrip('/').split('/')
    owner, repo = parts[-2], parts[-1]
    if repo.endswith('.git'):
        repo = repo[:-4]

    # Construct the raw content URL
    raw_url = f"https://raw.githubusercontent.com/{owner}/{repo}/main/{file_path}"

    # Download the file
    response = requests.get(raw_url)
    response.raise_for_status()

    # Parse the content as JSON
    return json.loads(response.text)

def import_notebook_to_fabric(upload_from: str, source_path: str,
                              default_lakehouse_workspace_id: str = None,
                              default_lakehouse_name: str = None,
                              default_lakehouse: str = None,
                              workspaceId: str = None,
                              environmentId: str = None,
                              file_path: str = None):
    """
    Import a notebook to Microsoft Fabric.

    Args:
        upload_from (str): The source of the notebook ('local', 'lakehouse', or 'github').
        source_path (str): The path to the notebook file or GitHub repository URL.
        default_lakehouse_workspace_id (str, optional): The default lakehouse workspace ID.
        default_lakehouse_name (str, optional): The default lakehouse name.
        default_lakehouse (str, optional): The default lakehouse ID.
        workspaceId (str, optional): The workspace ID.
        environmentId (str, optional): The environment ID.
        file_path (str, optional): The path to the file within the GitHub repository.

    This function handles the entire process of importing a notebook, including:
    - Loading the notebook content
    - Updating the metadata
    - Creating the notebook in Fabric
    - Polling for the creation status
    """

    notebook_name = f"{upload_from}_{os.path.splitext(os.path.basename(source_path))[0]}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

    # Load notebook content
    if upload_from == "local":
        if os.path.exists(source_path):
            with open(source_path, "r", encoding="utf-8") as file:
                notebook_json = json.load(file)
        else:
            print(f"Failed to locate the local notebook file: {source_path}")
            return
    elif upload_from == "lakehouse":
        token_credential = onelake_ops.get_authentication_token()
        file_system_client = DataLakeServiceClient(
            f"https://onelake.dfs.fabric.microsoft.com",
            credential=token_credential
        ).get_file_system_client(workspace_id)

        temp_file_path = onelake_ops.download_from_lakehouse_temp(file_system_client, source_path, lakehouse_id)
        if temp_file_path:
            with open(temp_file_path, "r", encoding="utf-8") as file:
                notebook_json = json.load(file)
            os.unlink(temp_file_path)  # Delete the temporary file
        else:
            print("Failed to download the notebook file from lakehouse.")
            return
    elif upload_from == "github":
        if not file_path:
            print("File path is required for GitHub upload.")
            return
        try:
            notebook_json = download_file_from_github(source_path, file_path)
        except Exception as e:
            print(f"Failed to download the notebook file from GitHub: {str(e)}")
            return
    else:
        print("Invalid upload_from parameter. Use 'local', 'lakehouse', or 'github'.")
        return

    # Create new metadata structure
    new_metadata = {
        "language_info": {
            "name": "python"
        },
        "trident": {
            "environment": {
                "environmentId": environmentId or "6524967a-18dc-44ae-86d1-0ec903e7ca05",
                "workspaceId": workspaceId or workspace_id
            },
            "lakehouse": {
                "default_lakehouse": default_lakehouse or lakehouse_id,
                "default_lakehouse_name": default_lakehouse_name or lakehouse_name,
                "default_lakehouse_workspace_id": default_lakehouse_workspace_id or workspace_id
            }
        }
    }

    # Create new notebook structure
    new_notebook = {
        "nbformat": 4,
        "nbformat_minor": 5,
        "cells": notebook_json.get("cells", []),
        "metadata": new_metadata
    }

    print(f"Updated notebook metadata: {json.dumps(new_notebook['metadata'], indent=2)}")

    # Convert to base64
    base64_notebook_content = base64.b64encode(json.dumps(new_notebook).encode('utf-8')).decode('utf-8')

    # Create notebook
    endpoint = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items"
    payload = {
        "displayName": notebook_name,
        "type": "Notebook",
        "description": "Notebook created via API",
        "definition": {
            "format": "ipynb",
            "parts": [{"path": "artifact.content.ipynb", "payload": base64_notebook_content, "payloadType": "InlineBase64"}]
        }
    }
    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

    response = requests.post(endpoint, json=payload, headers=headers)
    response.raise_for_status()

    if response.status_code == 201:
        print(f"Notebook created successfully. ID: {response.json()['id']}")
    elif response.status_code == 202:
        location_url = response.headers.get("Location")
        print("Notebook creation in progress. Polling for status...")
        poll_result = poll_notebook_creation(location_url, token)
        if poll_result["success"]:
            print(f"Notebook created successfully. ID: {poll_result['id']}")
        else:
            print(f"Failed to create notebook. Details: {poll_result['details']}")
    else:
        print(f"Unexpected response. Status code: {response.status_code}, Response: {response.text}")

# Example usage
import_notebook_to_fabric(
    upload_from="github", # ('local', 'lakehouse', or 'github')
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    file_path="Chapter03/ch03.ipynb",
    # Optional parameters:
    # default_lakehouse_workspace_id="custom_workspace_id",
    # default_lakehouse_name="LH_bronze",
    # default_lakehouse="46389222-328e-4e65-aa06-02a380dd60d8",
    # workspaceId="custom_workspace_id",
    # environmentId="custom_environment_id"
)