## **Install Package**

In [None]:
%pip install https://github.com/renan-peres/fabric-remote-tools/raw/main/fabric_remote_tools-0.1.1.tar.gz

## **Import Modules & Authenticate**

In [43]:
from fabric_remote_tools import FabricAuth, OneLakeUtils
import os
from dotenv import load_dotenv
load_dotenv()

# Load Fabric Environmet Variables (.env File)
account_name = os.getenv("ACCOUNT_NAME")
workspace_id = os.getenv("WORKSPACE_ID")
lakehouse_id = os.getenv("LAKEHOUSE_ID")

# Get Authentication Token
token = FabricAuth.get_service_principal_token()

# Get File System Client
file_system_client = FabricAuth.get_file_system_client(token, account_name, workspace_id)

## **Write to Lakehouse (Files/Tables)**

### **Local Tables (Delta)**

In [None]:
# Single Table
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Tables/venture_funding_deals_delta",
    target_path="Tables/local_venture_funding_deals_delta"
)

# Multiple Tables in a Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Tables",
    target_path="Tables/",
)

### **Local Files/Folders**

In [None]:
# Whole Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Files",
    target_path="Files/"
)

# Individual Subfolder inside a Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Files/Contoso",
    target_path="Files/Contoso"
)

# Specific File in a Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Files/Contoso/contoso_sales.csv",
    target_path="Files/Contoso/contoso_sales.csv", 
)

### **GitHub (Public Repo)** 

In [None]:
# Whole GitHub repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github",
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    target_path="Files/GitHub/Polars-Cookbook"
)

# Single Table (Delta) in Repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github",
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    target_path="Tables/github_venture_funding_deals_delta",
    folder_path="data/venture_funding_deals_delta"
)

# Specific folder from GitHub repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github",
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    target_path="Files/GitHub/data",
    folder_path="data"
)

### **GitHub (Private Repo)** 

In [None]:
github_token = os.getenv("GITHUB_PERSONAL_ACCESS_TOKEN")
github_username = os.getenv("GITHUB_USERNAME")
gh_repo_name = os.getenv("GITHUB_REPO_NAME")

# Whole GitHub private repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github_private",
    github_token=github_token,
    github_username=github_username,
    repo_name=gh_repo_name,
    target_path=f"Files/GitHub/{gh_repo_name}"
)

# Specific folder from GitHub private repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github_private",
    github_token=github_token,
    github_username=github_username,
    repo_name=gh_repo_name,
    target_path="Files/GitHub/data",
    folder_path="data"
)

### **Azure DevOps (Private Repo)**

In [None]:
organization_url = os.getenv("ORGANIZATIONAL_URL")
personal_access_token = os.getenv("PERSONAL_ACCESS_TOKEN")
project_name = os.getenv("PROJECT_NAME")
repo_name = os.getenv("REPO_NAME")

# Whole Azure DevOps repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="azure_devops",
    project_name=project_name,
    repo_name=repo_name,
    organization_url=organization_url,
    personal_access_token=personal_access_token,
    target_path=f"Files/AzureDevOps/{repo_name}",
)

# Specific folder from Azure DevOps repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="azure_devops",
    project_name=project_name,
    repo_name=repo_name,
    organization_url=organization_url,
    personal_access_token=personal_access_token,
    target_path="Files/AzureDevOps/data",
    folder_path="/data",
)

## **List Items from Lakehouse (Files/Tables)**

In [None]:
# List All Items in Lakehouse
OneLakeUtils.list_items(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    ,target_directory_path="Tables" # Tables or Files
    #  ,print_output= True # Optional
)

## **Delta Table Operations**

### **Read Delta Table from Lakehouse**

In [None]:
from fabric_remote_tools import FabricAuth, OneLakeUtils
import os
from dotenv import load_dotenv
load_dotenv() # Load environment variables from .env file

# Authenticate and obtain access token
file_system_client = FabricAuth().get_client_secret_token()

# Read Table from Lakehouse into Dataframe
workspace_name = os.getenv("WORKSPACE_NAME")
lakehouse_name = os.getenv("LAKEHOUSE_NAME")
table_name = "Tables/venture_funding_deals_delta_partitioned"
table_path = f"abfss://{workspace_name}@onelake.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/{table_name}"

df = OneLakeUtils().read_delta_from_fabric_lakehouse(
    file_system_client=file_system_client,
    table_path=table_path,
    engine='duckdb',  # Supported options: 'duckdb', 'polars'
    version=11,  # Optional: specify the version to read
    # row_limit=10  # Optional
)

display(df)

### **Write DataFrame to Lakehouse**

In [None]:
from deltalake.writer import write_deltalake
import duckdb
import pyarrow
import polars as pl

# Write DataFrame to Lakehouse
write_deltalake(
    table_or_uri=table_path
    ,storage_options=file_system_client
    # ,data=df.to_arrow() # Polars DF
    ,data=df.arrow() # DuckDB (arrow DF)
    ,mode="append" # Supported options: 'append', 'overwrite'
    ,engine="rust"
)

### **DESCRIBE HISTORY**

In [34]:
from deltalake import DeltaTable
import pandas as pd

# Initialize the DeltaTable
dt = DeltaTable(table_path)

# Retrieve the full history of the DeltaTable
history = dt.history()

# Convert the history list to a pandas DataFrame
history_df = pd.DataFrame(history)

# Parse the timestamp column
history_df['timestamp'] = pd.to_datetime(history_df['timestamp'], unit='ms')

# Display the DataFrame, sorted by version in descending order
display(history_df.sort_values(by='version', ascending=False))

Unnamed: 0,timestamp,operation,operationParameters,clientVersion,version,readVersion,isolationLevel,isBlindAppend,engineInfo,operationMetrics,txnId,tags
0,2024-07-02 19:57:56.736,WRITE,"{'mode': 'Overwrite', 'partitionBy': '[""Month""]'}",delta-rs.0.18.0,12,,,,,,,
1,2024-07-02 19:55:33.151,WRITE,"{'mode': 'Append', 'partitionBy': '[""Month""]'}",delta-rs.0.18.0,11,,,,,,,
2,2024-07-02 19:53:09.165,VACUUM END,{'status': 'COMPLETED'},,10,9.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'numDeletedFiles': '0', 'numVacuumedDirectori...",33d20b2a-18af-4181-be73-16fa36691665,
3,2024-07-02 19:53:07.140,VACUUM START,"{'retentionCheckEnabled': True, 'defaultRetent...",,9,8.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'numFilesToDelete': '0', 'sizeOfDataToDelete'...",753c18cb-da83-496f-93b8-c276cf758f64,
4,2024-07-02 19:51:21.406,VACUUM END,{'status': 'COMPLETED'},,8,7.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'numDeletedFiles': '0', 'numVacuumedDirectori...",60e404d2-1717-4718-916e-029fd5e6a70b,
5,2024-07-02 19:51:19.532,VACUUM START,"{'retentionCheckEnabled': True, 'defaultRetent...",,7,6.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'numFilesToDelete': '0', 'sizeOfDataToDelete'...",106c8e3d-c77c-451c-8a33-327463028eb1,
6,2024-07-02 19:50:02.303,OPTIMIZE,"{'predicate': '[]', 'auto': False, 'zOrderBy':...",,6,5.0,SnapshotIsolation,False,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'maxFileSize': '5808', 'minFileSize': '4502',...",3a28865c-5261-49a6-8416-01ba424c97c2,{'VORDER': 'true'}
7,2024-07-02 19:49:59.598,VACUUM END,{'status': 'COMPLETED'},,5,4.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'numDeletedFiles': '0', 'numVacuumedDirectori...",d478b83c-ce7d-4585-b6cf-38b2e4c828b5,
8,2024-07-02 19:49:57.636,VACUUM START,"{'retentionCheckEnabled': True, 'defaultRetent...",,4,3.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,"{'numFilesToDelete': '0', 'sizeOfDataToDelete'...",e1226ae9-ad65-4521-bc3e-04d7406aaa74,
9,2024-06-25 16:41:51.547,WRITE,"{'mode': 'Overwrite', 'partitionBy': '[""Month""]'}",delta-rs.0.18.0,3,,,,,,,


## **Download Items from Lakehouse (Files/Tables)**

In [None]:
# Tables
OneLakeUtils.download_from_lakehouse(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    # ,target_file_path="Tables/venture_funding_deals" # Single Table
    ,target_file_path="Tables/" # All Tables
)

# Files
OneLakeUtils.download_from_lakehouse(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    # ,target_file_path="Files/Contoso/contoso_sales.csv" # Single File
    # ,target_file_path="Files/Contoso/" # Subfolder
    ,target_file_path="Files/" # All Subfolders & Files
)

## **Delete Items from Lakehouse (Files/Tables)**

In [None]:
# Tables
OneLakeUtils.delete_file(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    # ,lakehouse_dir_path="Tables/venture_funding_deals_delta" # Single Table
    ,lakehouse_dir_path="Tables/" # All Tables
)

# Files
OneLakeUtils.delete_file(
    file_system_client=file_system_client 
    ,lakehouse_id=lakehouse_id
    # ,lakehouse_dir_path="Files/Contoso/contoso_sales.csv" # Single File
    # ,lakehouse_dir_path="Files/Contoso" # Subfolder
    ,lakehouse_dir_path="Files/" # All Subfolders & Files
)