## **Install Package**

In [None]:
%pip install https://github.com/renan-peres/fabric-remote-tools/raw/main/fabric_remote_tools-0.1.1.tar.gz

## **Import Modules & Authenticate**

In [1]:
from fabric_remote_tools import FabricAuth, OneLakeUtils
import os
from dotenv import load_dotenv
load_dotenv()

# Load Fabric Environmet Variables (.env File)
account_name = os.getenv("ACCOUNT_NAME")
workspace_id = os.getenv("WORKSPACE_ID")
lakehouse_id = os.getenv("LAKEHOUSE_ID")

# Get Authentication Token
token = FabricAuth.get_service_principal_token()

# Get File System Client
file_system_client = FabricAuth.get_file_system_client(token, account_name, workspace_id)

## **Write to Lakehouse (Files/Tables)**

### **Local Tables (Delta)**

In [2]:
# Single Table
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Tables/venture_funding_deals_delta",
    target_path="Tables/local_venture_funding_deals_delta"
)

# Multiple Tables in a Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Tables",
    target_path="Tables/",
)

Uploading local Delta Table '../assets/data/Tables/venture_funding_deals_delta' to 'Tables/local_venture_funding_deals_delta'
Successfully uploaded 2 out of 2 files to 'Tables/local_venture_funding_deals_delta'
Uploading multiple tables from '../assets/data/Tables' to 'Tables/'
Processing table: venture_funding_deals_delta
Processing table: venture_funding_deals_delta_partitioned
Successfully uploaded 2 out of 2 files to 'Tables/venture_funding_deals_delta'
Successfully uploaded 40 out of 40 files to 'Tables/venture_funding_deals_delta_partitioned'


### **Local Files/Folders**

In [None]:
# Whole Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Files",
    target_path="Files/"
)

# Individual Subfolder inside a Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Files/Contoso",
    target_path="Files/Contoso"
)

# Specific File in a Folder
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="local",
    source_path="../assets/data/Files/Contoso/contoso_sales.csv",
    target_path="Files/Contoso/contoso_sales.csv", 
)

### **GitHub (Public Repo)** 

In [None]:
# Whole GitHub repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github",
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    target_path="Files/GitHub/Polars-Cookbook"
)

# Single Table (Delta) in Repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github",
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    target_path="Tables/github_venture_funding_deals_delta",
    folder_path="data/venture_funding_deals_delta"
)

# Specific folder from GitHub repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github",
    source_path="https://github.com/renan-peres/Polars-Cookbook.git",
    target_path="Files/GitHub/data",
    folder_path="data"
)

### **GitHub (Private Repo)** 

In [None]:
github_token = os.getenv("GH_PERSONAL_ACCESS_TOKEN")
github_username = os.getenv("GH_USERNAME")
gh_repo_name = os.getenv("GH_REPO_NAME")

# Whole GitHub private repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github_private",
    github_token=github_token,
    github_username=github_username,
    repo_name=gh_repo_name,
    target_path=f"Files/GitHub/{gh_repo_name}"
)

# Specific folder from GitHub private repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="github_private",
    github_token=github_token,
    github_username=github_username,
    repo_name=gh_repo_name,
    target_path="Files/GitHub/data",
    folder_path="data"
)

### **Azure DevOps (Private Repo)**

In [None]:
organization_url = os.getenv("ADO_ORGANIZATIONAL_URL")
personal_access_token = os.getenv("ADO_PERSONAL_ACCESS_TOKEN")
project_name = os.getenv("ADO_PROJECT_NAME")
repo_name = os.getenv("ADO_REPO_NAME")

# Whole Azure DevOps repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="azure_devops",
    project_name=project_name,
    repo_name=repo_name,
    organization_url=organization_url,
    personal_access_token=personal_access_token,
    target_path=f"Files/AzureDevOps/{repo_name}",
)

# Specific folder from Azure DevOps repository
OneLakeUtils.write_to_lakehouse(
    file_system_client=file_system_client,
    lakehouse_id=lakehouse_id,
    upload_from="azure_devops",
    project_name=project_name,
    repo_name=repo_name,
    organization_url=organization_url,
    personal_access_token=personal_access_token,
    target_path="Files/AzureDevOps/data",
    folder_path="/data",
)

## **List Items from Lakehouse (Files/Tables)**

In [None]:
# List All Items in Lakehouse
OneLakeUtils.list_items(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    ,target_directory_path="Tables" # Tables or Files
    #  ,print_output= True # Optional
)

## **DeltaLake Table Operations**

### **Read Table from Lakehouse into a DataFrame**

In [15]:
from fabric_remote_tools import FabricAuth, OneLakeUtils
import os
from dotenv import load_dotenv
load_dotenv() # Load environment variables from .env file

# Authenticate and obtain access token
file_system_client = FabricAuth().get_client_secret_token()

# Read Table from Lakehouse into Dataframe
workspace_name = os.getenv("WORKSPACE_NAME")
lakehouse_name = os.getenv("LAKEHOUSE_NAME")
table_name = "Tables/venture_funding_deals_delta_partitioned"
table_path = f"abfss://{workspace_name}@onelake.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/{table_name}"

df = OneLakeUtils().read_delta_from_fabric_lakehouse(
    file_system_client=file_system_client,
    table_path=table_path,
    engine='duckdb',  # Supported options: 'duckdb', 'polars'
    version=11,  # Optional: specify the version to read
    # row_limit=10  # Optional
)

display(df)

┌──────────────────────┬──────────────┬──────────────────────┬───┬───────────────┬──────┬───────┬───────┐
│       Company        │    Amount    │    Lead investors    │ … │ Date reported │ Day  │ Month │ Year  │
│       varchar        │   varchar    │       varchar        │   │    varchar    │ int8 │ int8  │ int32 │
├──────────────────────┼──────────────┼──────────────────────┼───┼───────────────┼──────┼───────┼───────┤
│ Rapport Therapeutics │ $100,000,000 │ Third Rock Venture…  │ … │ 3/7/23        │    7 │     3 │  2023 │
│ Character.AI         │ $150,000,000 │ Andreessen Horowitz  │ … │ 3/21/23       │   21 │     3 │  2023 │
│ Palmetto             │ $150,000,000 │ TPG Rise Climate     │ … │ 3/6/23        │    6 │     3 │  2023 │
│ Consensus            │ $110,000,000 │ Sumeru Equity Part…  │ … │ 3/8/23        │    8 │     3 │  2023 │
│ Bicara Therapeutics  │ $108,000,000 │ Red Tree Venture C…  │ … │ 3/6/23        │    6 │     3 │  2023 │
│ CARGO Therapeutics   │ $200,000,000 │ Third 

### **Query DataFrame with DuckDB**

In [None]:
# %pip install magic_duckdb --upgrade --quiet
%load_ext magic_duckdb

In [18]:
%%dql
PRAGMA disable_print_progress_bar;

SELECT *
FROM df;

Unnamed: 0,Company,Amount,Lead investors,Valuation,Industry,Date reported,Day,Month,Year
0,Rapport Therapeutics,"$100,000,000","Third Rock Ventures, Arch Venture Partners, Jo...",,Biotech,3/7/23,7,3,2023
1,Character.AI,"$150,000,000",Andreessen Horowitz,"$1,000,000,000",Artificial intelligence,3/21/23,21,3,2023
2,Palmetto,"$150,000,000",TPG Rise Climate,,Cleantech,3/6/23,6,3,2023
3,Consensus,"$110,000,000",Sumeru Equity Partners,,SaaS,3/8/23,8,3,2023
4,Bicara Therapeutics,"$108,000,000","Red Tree Venture Capital, RA Capital Management",,Biotech,3/6/23,6,3,2023
...,...,...,...,...,...,...,...,...,...
337,Inceptive,"$100,000,000","NVentures, Andreessen Horowitz",,Biotech,9/7/23,7,9,2023
338,Vesper Energy,"$100,000,000",GCM Grosvenor,,Energy,9/13/23,13,9,2023
339,Writer,"$100,000,000",Iconiq Growth,"$500,000,000",Artificial intelligence,9/18/23,18,9,2023
340,Pryon,"$100,000,000",US Innovative Technology Fund,"$625,000,000",Artificial intelligence,9/19/23,19,9,2023


In [19]:
%%dql
CREATE OR REPLACE TABLE df_tranf AS 
    SELECT *
    FROM df
    LIMIT 20;

SELECT *
FROM df_tranf;

Unnamed: 0,Company,Amount,Lead investors,Valuation,Industry,Date reported,Day,Month,Year
0,Rapport Therapeutics,"$100,000,000","Third Rock Ventures, Arch Venture Partners, Jo...",,Biotech,3/7/23,7,3,2023
1,Character.AI,"$150,000,000",Andreessen Horowitz,"$1,000,000,000",Artificial intelligence,3/21/23,21,3,2023
2,Palmetto,"$150,000,000",TPG Rise Climate,,Cleantech,3/6/23,6,3,2023
3,Consensus,"$110,000,000",Sumeru Equity Partners,,SaaS,3/8/23,8,3,2023
4,Bicara Therapeutics,"$108,000,000","Red Tree Venture Capital, RA Capital Management",,Biotech,3/6/23,6,3,2023
5,CARGO Therapeutics,"$200,000,000","Third Rock Ventures, RTW Investments, Percepti...",,Biotech,3/1/23,1,3,2023
6,Humane,"$100,000,000",Kindred Ventures,,Artifical intelligence,3/8/23,8,3,2023
7,Rippling,"$500,000,000",Greenoaks,,Human resources,3/17/23,17,3,2023
8,Amogy,"$139,000,000",SK Innovation,,Cleantech,3/22/23,22,3,2023
9,Adept AI,"$350,000,000","General Catalyst, Spark Capital","$1,000,000,000",Artificial intelligence,3/14/23,14,3,2023


### **Write DataFrame to Lakehouse as a Delta Table**

In [12]:
from deltalake.writer import write_deltalake
import duckdb
import pyarrow
import polars as pl

# Write DataFrame to Lakehouse
write_deltalake(
    table_or_uri=table_path
    ,storage_options=file_system_client
    # ,data=df.to_arrow() # Polars DF
    ,data=duckdb.sql("SELECT * FROM df_tranf").arrow() # DuckDB (arrow DF)
    ,mode="overwrite" # Supported options: 'append', 'overwrite'
    ,engine="rust"
)

### **DESCRIBE HISTORY**

In [12]:
from deltalake import DeltaTable
import pandas as pd

# Initialize the DeltaTable
dt = DeltaTable(table_path)

# Retrieve the full history of the DeltaTable
history = dt.history()

# Convert the history list to a pandas DataFrame
history_df = pd.DataFrame(history)

# Parse the timestamp column
history_df['timestamp'] = pd.to_datetime(history_df['timestamp'], unit='ms')

# Display the DataFrame, sorted by version in descending order
display(history_df.sort_values(by='version', ascending=False).head(5))

Unnamed: 0,timestamp,operation,operationParameters,clientVersion,version,readVersion,isolationLevel,isBlindAppend,engineInfo,txnId,operationMetrics,tags
0,2024-07-02 19:57:56.736,WRITE,"{'mode': 'Overwrite', 'partitionBy': '[""Month""]'}",delta-rs.0.18.0,12,,,,,,,
1,2024-07-02 19:55:33.151,WRITE,"{'mode': 'Append', 'partitionBy': '[""Month""]'}",delta-rs.0.18.0,11,,,,,,,
2,2024-07-02 19:53:09.165,VACUUM END,{'status': 'COMPLETED'},,10,9.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,33d20b2a-18af-4181-be73-16fa36691665,"{'numDeletedFiles': '0', 'numVacuumedDirectori...",
3,2024-07-02 19:53:07.140,VACUUM START,"{'retentionCheckEnabled': True, 'defaultRetent...",,9,8.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,753c18cb-da83-496f-93b8-c276cf758f64,"{'numFilesToDelete': '0', 'sizeOfDataToDelete'...",
4,2024-07-02 19:51:21.406,VACUUM END,{'status': 'COMPLETED'},,8,7.0,SnapshotIsolation,True,Apache-Spark/3.4.1.5.3.20240528.1 Delta-Lake/2...,60e404d2-1717-4718-916e-029fd5e6a70b,"{'numDeletedFiles': '0', 'numVacuumedDirectori...",


## **Download Items from Lakehouse (Files/Tables)**

In [None]:
# Tables
OneLakeUtils.download_from_lakehouse(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    # ,target_file_path="Tables/venture_funding_deals" # Single Table
    ,target_file_path="Tables/" # All Tables
)

# Files
OneLakeUtils.download_from_lakehouse(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    # ,target_file_path="Files/Contoso/contoso_sales.csv" # Single File
    # ,target_file_path="Files/Contoso/" # Subfolder
    ,target_file_path="Files/" # All Subfolders & Files
)

## **Delete Items from Lakehouse (Files/Tables)**

In [None]:
# Tables
OneLakeUtils.delete_file(
    file_system_client=file_system_client
    ,lakehouse_id=lakehouse_id
    # ,lakehouse_dir_path="Tables/venture_funding_deals_delta" # Single Table
    ,lakehouse_dir_path="Tables/" # All Tables
)

# Files
OneLakeUtils.delete_file(
    file_system_client=file_system_client 
    ,lakehouse_id=lakehouse_id
    # ,lakehouse_dir_path="Files/Contoso/contoso_sales.csv" # Single File
    # ,lakehouse_dir_path="Files/Contoso" # Subfolder
    ,lakehouse_dir_path="Files/" # All Subfolders & Files
)