# Managed and External Tables in Fabric


https://learn.microsoft.com/en-us/training/modules/work-delta-lake-tables-fabric/3-create-delta-tables

In [1]:
df = spark.read.format("csv").option("header","true").load("Files/data/DimGeography.csv")

df.printSchema()


StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 3, Finished, Available, Finished)

root
 |-- GeographyKey: string (nullable = true)
 |-- City: string (nullable = true)
 |-- StateProvinceCode: string (nullable = true)
 |-- StateProvinceName: string (nullable = true)
 |-- CountryRegionCode: string (nullable = true)
 |-- EnglishCountryRegionName: string (nullable = true)
 |-- SpanishCountryRegionName: string (nullable = true)
 |-- FrenchCountryRegionName: string (nullable = true)
 |-- PostalCode: string (nullable = true)
 |-- SalesTerritoryKey: string (nullable = true)
 |-- IpAddressLocator: string (nullable = true)



In [3]:
table_names = spark.catalog.listTables()
print(table_names)

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 5, Finished, Available, Finished)

[]


## Managed

In [4]:
## delta managed using df.writer API

df.write.format("delta").saveAsTable("DimGeography_df_managed")

# https://onelake.dfs.fabric.microsoft.com/DP700/Managed_External.Lakehouse/Tables/dimgeography_df_managed

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 6, Finished, Available, Finished)

In [5]:
%%sql

create table DimGeography_sql_managed
(GeographyKey string,
 City  string,
 StateProvinceCode string, 
 StateProvinceName string, 
 CountryRegionCode string, 
 EnglishCountryRegionName string, 
 SpanishCountryRegionName string,
 FrenchCountryRegionName string, 
 PostalCode string,
 SalesTerritoryKey string, 
 IpAddressLocator string 
)
using DELTA

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 7, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [6]:
%%sql

insert into DimGeography_sql_managed
SELECT * from dimgeography_df_managed

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 8, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [7]:
%%sql

select * from DimGeography_sql_managed

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 9, Finished, Available, Finished)

<Spark SQL result set with 655 rows and 11 fields>

In [8]:
## DeltaBuilder Api

from delta.tables import DeltaTable

(DeltaTable.create(spark) 
  .tableName("dimgeography_api_managed") 
  .addColumn("GeographyKey", "STRING")
  .addColumn("City", "STRING") 
  .addColumn("StateProvinceCode", "STRING") 
  .addColumn("StateProvinceName", "STRING") 
  .addColumn("CountryRegionCode", "STRING") 
  .addColumn("EnglishCountryRegionName", "STRING") 
  .addColumn("SpanishCountryRegionName", "STRING") 
  .addColumn("FrenchCountryRegionName", "STRING") 
  .addColumn("PostalCode", "STRING") 
  .addColumn("SalesTerritoryKey", "STRING") 
  .addColumn("IpAddressLocator", "STRING") 
  .execute()
)


StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 10, Finished, Available, Finished)

<delta.tables.DeltaTable at 0x779b06b66410>

In [9]:
## populate
df.write.format("delta").mode("append").saveAsTable("dimgeography_api_managed")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 11, Finished, Available, Finished)

## External

In [10]:
## delta managed using df.writer API

df.write.format("delta").saveAsTable("dimgeography_df_external", path="Files/dimgeography_df_external")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 12, Finished, Available, Finished)

In [11]:
%%sql

create table DimGeography_sql_external
(GeographyKey string,
 City  string,
 StateProvinceCode string, 
 StateProvinceName string, 
 CountryRegionCode string, 
 EnglishCountryRegionName string, 
 SpanishCountryRegionName string,
 FrenchCountryRegionName string, 
 PostalCode string,
 SalesTerritoryKey string, 
 IpAddressLocator string 
)
using DELTA
LOCATION 'Files/dimgeography_sql_external'

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 13, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [12]:
from delta.tables import DeltaTable

(DeltaTable.create(spark) 
  .tableName("dimgeography_api_external")
  .location('Files/dimgeography_api_external')
  .addColumn("GeographyKey", "STRING")
  .addColumn("City", "STRING") 
  .addColumn("StateProvinceCode", "STRING") 
  .addColumn("StateProvinceName", "STRING") 
  .addColumn("CountryRegionCode", "STRING") 
  .addColumn("EnglishCountryRegionName", "STRING") 
  .addColumn("SpanishCountryRegionName", "STRING") 
  .addColumn("FrenchCountryRegionName", "STRING") 
  .addColumn("PostalCode", "STRING") 
  .addColumn("SalesTerritoryKey", "STRING") 
  .addColumn("IpAddressLocator", "STRING") 
  .execute()
)

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 14, Finished, Available, Finished)

<delta.tables.DeltaTable at 0x779b06be4e10>

In [13]:
## populate
df.write.format("delta").mode("append").saveAsTable("dimgeography_api_external")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 15, Finished, Available, Finished)

## not metastore managed

In [14]:
delta_path = "Files/dimgeography_df_external_non_metastore"
df.write.format("delta").save(delta_path)

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 16, Finished, Available, Finished)

In [15]:
non_metastore_df = spark.read.load(delta_path)

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 17, Finished, Available, Finished)

In [16]:
display(non_metastore_df)

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 18, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 28c63097-55d3-4b0b-a0fb-4c3df5d98863)

## Other cases

In [17]:
# csv managed

df.write.format("csv").saveAsTable("dimgeography_csv_managed")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 19, Finished, Available, Finished)

In [18]:
# external on the tables path?

df.write.format("delta").saveAsTable("dimgeography_df_external_tables", path="Tables/dimgeography_df_external")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 20, Finished, Available, Finished)

## Dropping

In [19]:
def droptable(table):
    try:
        spark.sql(f"drop table {table}")
        print(f"table {table} dropped")
    except:
        print("table already dropped")


StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 21, Finished, Available, Finished)

In [21]:
droptable("dimgeography_api_external")
droptable("dimgeography_api_managed")
droptable("dimgeography_csv_managed")
droptable("dimgeography_df_external_tables")
droptable("dimgeography_df_external")
droptable("dimgeography_df_managed")
droptable("dimgeography_sql_external")
droptable("dimgeography_sql_managed")


StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 23, Finished, Available, Finished)

table already dropped
table already dropped
table already dropped
table already dropped
table dimgeography_df_external dropped
table already dropped
table already dropped
table already dropped


In [22]:
old_managed_table = spark.read.format('delta').table("dimgeography_df_external")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 24, Finished, Available, Finished)

AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `dimgeography_df_external` cannot be found. Verify the spelling and correctness of the schema and catalog.
If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS.;
'UnresolvedRelation [dimgeography_df_external], [], false


In [23]:
df_recover = spark.read.format('delta').load("Files/dimgeography_df_external")

display(df_recover)

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 25, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, ffe53744-041b-4207-bd52-30a954e22044)

In [25]:
def remove_folder(path:str)-> None:
    try:
        mssparkutils.fs.rm(path, True)
        print(f"removed folder: {path}")
    except Exception as e:
        print(f"path already removed")


StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 27, Finished, Available, Finished)

In [26]:
remove_folder("Files/dimgeography_df_external")
remove_folder("Files/dimgeography_df_external_non_metastore")
remove_folder("Files/dimgeography_api_external")
remove_folder("Files/dimgeography_sql_external")

StatementMeta(, 0220328f-6984-4a5f-96bf-ecdd30cb86a6, 28, Finished, Available, Finished)

removed folder: Files/dimgeography_df_external
removed folder: Files/dimgeography_df_external_non_metastore
removed folder: Files/dimgeography_api_external
removed folder: Files/dimgeography_sql_external


StatementMeta(, ae400d2a-173d-4bc2-adc5-941488eb8c21, 6, Finished, Available, Finished)

In [5]:
table_names

StatementMeta(, ae400d2a-173d-4bc2-adc5-941488eb8c21, 7, Finished, Available, Finished)

[]