In [10]:
!pip install delta-spark==3.2.0 -q
import pyspark
from delta import *
from pyspark.sql.functions import *

# Create a SparkSession with Delta Lake extensions
# The '.config(...)' lines are crucial for enabling Delta Lake's features
builder = pyspark.sql.SparkSession.builder.appName("DeltaTutorial") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

# Get or create the SparkSession
spark = configure_spark_with_delta_pip(builder).getOrCreate()

print("Spark and Delta Lake are ready!")

Spark and Delta Lake are ready!


In [11]:
data = {("amit",28),("priya",32),("rahul",25)}
df = spark.createDataFrame(data,["name","age"])
df.write.format("delta").saveAsTable("managed_people")

spark.sql("select * from managed_people").show()

location = spark.sql("describe detail managed_people").collect()[0]['location']
print("managed table location: ",location)

+-----+---+
| name|age|
+-----+---+
|priya| 32|
|rahul| 25|
| amit| 28|
+-----+---+

managed table location:  file:/content/spark-warehouse/managed_people


In [12]:
# DROP managed_people table
spark.sql("drop table managed_people")
# Deletes both table data and metadata

DataFrame[]

In [13]:
#  Install dependencies
!pip install pyspark==3.5.1 delta-spark==3.2.0 -q

In [14]:
# Import libraries
import pyspark
from delta import configure_spark_with_delta_pip

In [15]:
#  Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
# Path to CSV in Google Drive
csv_path = "/content/drive/MyDrive/Colab Notebooks/suppliers.csv"

# Read CSV
df = spark.read.option("header", "true").csv(csv_path)
print("Original CSV data:")
df.show()

Original CSV data:
+-----------+------------------+--------------------+---------+
|supplier_id|              name|        contact_info| location|
+-----------+------------------+--------------------+---------+
|          1|       ABC Traders|       abc@gmail.com|   Mumbai|
|          2|   Global Supplies| global@supplies.com|    Delhi|
|          3|Metro Distributors|metro@distributor...|  Chennai|
|          4|          FastMart|contact@fastmart.com|Hyderabad|
|          5|       QuickSupply|support@quicksupp...|Bangalore|
|          6|         LogiTrade|  info@logitrade.com|     Pune|
|          7|          CityMart|  hello@citymart.com|  Kolkata|
|          8|     Everest Goods|   reach@everest.com|Ahmedabad|
|          9|    NextGen Supply|  nextgen@supply.com|   Jaipur|
|         10| Elite Wholesalers| elite@wholesale.com|  Lucknow|
+-----------+------------------+--------------------+---------+



In [17]:
# Save as managed Delta table
df.write.format("delta").saveAsTable("managed_people_from_csv")
print("\nManaged Delta table created.")

# Check location of managed table
location = spark.sql("DESCRIBE DETAIL managed_people_from_csv").collect()[0]['location']
print("Managed table location:", location)


Managed Delta table created.
Managed table location: file:/content/spark-warehouse/managed_people_from_csv


In [18]:
# Drop (delete) the table
spark.sql("DROP TABLE managed_people_from_csv")
print("\nManaged table deleted.")


Managed table deleted.


In [19]:
# Check if original CSV in Google Drive still exists
import os
if os.path.exists(csv_path):
    print("CSV in Google Drive still exists.")
else:
    print("CSV file was deleted.")

CSV in Google Drive still exists.
