# Integration of lakeFS with Prefect

## Versioning Information

In [None]:
sourceBranch = "main"

## Import Python packages

In [None]:
%xmode Minimal
import requests
import lakefs_demo
import os
import lakefs

## Set environment variables

In [None]:
os.environ["LAKECTL_SERVER_ENDPOINT_URL"] = lakefsEndPoint
os.environ["LAKECTL_CREDENTIALS_ACCESS_KEY_ID"] = lakefsAccessKey
os.environ["LAKECTL_CREDENTIALS_SECRET_ACCESS_KEY"] = lakefsSecretKey

## Verify lakeFS credentials by getting lakeFS version

In [None]:
print("Verifying lakeFS credentials…")
try:
    v=lakefs.client.Client().version
except:
    print("🛑 failed to get lakeFS version")
else:
    print(f"…✅lakeFS credentials verified\n\nℹ️lakeFS version {v}")

## Create Repository

In [None]:
try:
    repo=lakefs.repository(repo_name)
    print(f"Found existing repo {repo.id} using storage namespace {repo.properties.storage_namespace}")
except lakefs.exceptions.NotFoundException as f:
    print(f"Repository {repo_name} does not exist, so going to try and create it now.")
    try:
        repo=lakefs.repository(repo_name).create(storage_namespace=f"{storageNamespace}/{repo_name}")
        print(f"Created new repo {repo.id} using storage namespace {repo.properties.storage_namespace}")
    except lakefs.exceptions.LakeFSException as e:
        print(f"Error creating repo {repo_name}. Error is {e}")
except lakefs.exceptions.LakeFSException as e:
    print(f"Error getting repo {repo_name}: {e}")

## S3A Gateway configuration

##### Note: lakeFS can be configured to work with Spark in two ways:
###### * Access lakeFS using the S3A gateway https://docs.lakefs.io/integrations/spark.html#access-lakefs-using-the-s3a-gateway.
###### * Access lakeFS using the lakeFS-specific Hadoop FileSystem https://docs.lakefs.io/integrations/spark.html#access-lakefs-using-the-lakefs-specific-hadoop-filesystem.

In [None]:
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
sc = SparkContext.getOrCreate()
spark = SparkSession(sc)

sc._jsc.hadoopConfiguration().set("fs.s3a.access.key", lakefsAccessKey)
sc._jsc.hadoopConfiguration().set("fs.s3a.secret.key", lakefsSecretKey)
sc._jsc.hadoopConfiguration().set("fs.s3a.endpoint", lakefsEndPoint)
sc._jsc.hadoopConfiguration().set("fs.s3a.path.style.access", "true")

## Set Prefect variables which are used by the demo workflow

In [None]:
prefectAPIEndPoint = 'http://host.docker.internal:4200/api'
prefectUIEndPoint = 'http://127.0.0.1:4200'

repo_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/repo')
repo_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "repo", "value": repo_name})
#print(repo_variable_creation_request.json())

sourceBranch_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/source_branch')
sourceBranch_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "source_branch", "value": sourceBranch})

newBranch_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/new_branch')
newBranch_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "new_branch", "value": newBranch})

prefect_ui_endpoint_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/prefect_ui_endpoint')
prefect_ui_endpoint_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "prefect_ui_endpoint", "value": prefectUIEndPoint})

if lakefsEndPoint.startswith('http://host.docker.internal'):
    lakefsUIEndPoint = lakefsEndPoint.replace('host.docker.internal','127.0.0.1')
elif lakefsEndPoint.startswith('http://lakefs'):
    lakefsUIEndPoint = 'http://127.0.0.1:58000'
else:
    lakefsUIEndPoint = lakefsEndPoint
    
lakefs_ui_endpoint_variable_deletion_request = requests.delete(prefectAPIEndPoint + '/variables/name/lakefs_ui_endpoint')
lakefs_ui_endpoint_variable_creation_request = requests.post(prefectAPIEndPoint + '/variables', json={"name": "lakefs_ui_endpoint", "value": lakefsUIEndPoint})

print("Created Prefect variables")