### Spark object and properties

In [0]:
spark

In [0]:
print(spark.version)

### File system operations (dbutils)

In [0]:
%fs ls dbfs:/databricks-datasets

In [0]:
%fs ls dbfs:/databricks-datasets/songs

In [0]:
dbutils.fs.ls("dbfs:/databricks-datasets/")

In [0]:
display(dbutils.fs.ls("dbfs:/databricks-datasets/"))

In [0]:
files = dbutils.fs.ls("dbfs:/databricks-datasets/")
for file in files:
    print(file.name.upper())

In [0]:
dbutils.help()

In [0]:
dbutils.fs.help()

### Getting information about current catalog and schema

In [0]:
print(spark.catalog.currentDatabase())

In [0]:
print(spark.catalog.currentCatalog())

### Run SQL commands from Python and create volume

In [0]:
spark.sql("USE CATALOG databricks_01")
spark.sql("USE default")

In [0]:
spark.sql("CREATE VOLUME IF NOT EXISTS input_volume")

In [0]:
import os
import zipfile
import urllib.request

# define variables
url = "https://github.com/rafalkkk/C041_Spark_PySpark/raw/main/01-datasets/pizzas.zip"
local_zip_path = "/tmp/pizzas.zip"
extract_dir = "/tmp/pizzas_unzipped"
dbfs_target = "/Volumes/databricks_01/default/input_volume"

# download the file
urllib.request.urlretrieve(url, local_zip_path)
print(f"File saved in {local_zip_path}")

# unzipping
with zipfile.ZipFile(local_zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
print(f"Files unzipped in {extract_dir}")

# 🚀 Krok 4: Przeniesienie do DBFS
import shutil

dbutils.fs.mkdirs(dbfs_target)  

for root, dirs, files in os.walk(extract_dir):
    for file in files:
        local_file_path = os.path.join(root, file)
        target_file_path = os.path.join(dbfs_target, file)
        dbutils.fs.cp(f"file:{local_file_path}", target_file_path)
        print(f"Copied file {file} to {target_file_path}")


In [0]:
# dbutils.fs.ls('/tmp')

In [0]:
%sh
ls -l /tmp

In [0]:
import os
os.listdir("/tmp")

In [0]:
display(dbutils.fs.ls("/Volumes/databricks_01/default/input_volume"))

In [0]:
dbutils.fs.head('dbfs:/Volumes/databricks_01/default/input_volume/pizzas.csv')

In [0]:
spark.read.csv("dbfs:/Volumes/databricks_01/default/input_volume/pizzas.csv", header=True, inferSchema=True).display()