In [0]:
%fs ls

path,name,size,modificationTime
dbfs:/Volume/,Volume/,0,0
dbfs:/Volumes/,Volumes/,0,0
dbfs:/databricks-datasets/,databricks-datasets/,0,0
dbfs:/databricks-results/,databricks-results/,0,0
dbfs:/exam_prep/,exam_prep/,0,1720271830000
dbfs:/user/,user/,0,1720350559000
dbfs:/volume/,volume/,0,0
dbfs:/volumes/,volumes/,0,0


In [0]:
from pyspark.sql import Row

import datetime


courses = [
    {
        'course_id': 1,
        'course_title': 'Mastering Python',
        'course_published_dt': datetime.date(2021, 1, 14),
        'is_active': True,
        'last_updated_ts': datetime.datetime(2021, 2, 18, 16, 57, 25)
    },
    {
        'course_id': 2,
        'course_title': 'Data Engineering Essentials',
        'course_published_dt': datetime.date(2021, 2, 10),
        'is_active': True,
        'last_updated_ts': datetime.datetime(2021, 3, 5, 12, 7, 33)
    },
    {
        'course_id': 3,
        'course_title': 'Mastering Pyspark',
        'course_published_dt': datetime.date(2021, 1, 7),
        'is_active': True,
        'last_updated_ts': datetime.datetime(2021, 4, 6, 10, 5, 42)
    },
    {
        'course_id': 4,
        'course_title': 'AWS Essentials',
        'course_published_dt': datetime.date(2021, 3, 19),
        'is_active': False,
        'last_updated_ts': datetime.datetime(2021, 4, 10, 2, 25, 36)
    },
    {
        'course_id': 5,
        'course_title': 'Docker 101',
        'course_published_dt': datetime.date(2021, 2, 28),
        'is_active': True,
        'last_updated_ts': datetime.datetime(2021, 3, 21, 7, 18, 52)
    }
]

courses_df = spark.createDataFrame([Row(**course) for course in courses])

In [0]:
courses_df.show(5)

+---------+--------------------+-------------------+---------+-------------------+
|course_id|        course_title|course_published_dt|is_active|    last_updated_ts|
+---------+--------------------+-------------------+---------+-------------------+
|        1|    Mastering Python|         2021-01-14|     true|2021-02-18 16:57:25|
|        2|Data Engineering ...|         2021-02-10|     true|2021-03-05 12:07:33|
|        3|   Mastering Pyspark|         2021-01-07|     true|2021-04-06 10:05:42|
|        4|      AWS Essentials|         2021-03-19|    false|2021-04-10 02:25:36|
|        5|          Docker 101|         2021-02-28|     true|2021-03-21 07:18:52|
+---------+--------------------+-------------------+---------+-------------------+



In [0]:
courses_df.count()

5

In [0]:
dbutils.fs.ls('/exam_prep')

[FileInfo(path='dbfs:/exam_prep/retail_db/', name='retail_db/', size=0, modificationTime=1720271878000),
 FileInfo(path='dbfs:/exam_prep/retail_db_json/', name='retail_db_json/', size=0, modificationTime=1720271858000)]

In [0]:
courses_df.write.csv('/exam_prep/courses_db')

### the csv files are written to the folder

* By default > 1 files can be created

In [0]:
%fs ls '/exam_prep/courses_db'

path,name,size,modificationTime
dbfs:/exam_prep/courses_db/_SUCCESS,_SUCCESS,0,1720501734000
dbfs:/exam_prep/courses_db/_committed_473753379059822001,_committed_473753379059822001,368,1720501734000
dbfs:/exam_prep/courses_db/_started_473753379059822001,_started_473753379059822001,0,1720501733000
dbfs:/exam_prep/courses_db/part-00000-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-4-1-c000.csv,part-00000-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-4-1-c000.csv,60,1720501734000
dbfs:/exam_prep/courses_db/part-00001-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-5-1-c000.csv,part-00001-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-5-1-c000.csv,71,1720501734000
dbfs:/exam_prep/courses_db/part-00002-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-6-1-c000.csv,part-00002-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-6-1-c000.csv,61,1720501734000
dbfs:/exam_prep/courses_db/part-00003-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-7-1-c000.csv,part-00003-tid-473753379059822001-f924cb7b-cb03-465e-a790-9092b88a42d0-7-1-c000.csv,113,1720501734000


### When you load the folder Spark automatically loads the files

In [0]:
df = spark.read.csv('/exam_prep/courses_db', header=False)
df.show(5)

+---+--------------------+----------+-----+--------------------+
|_c0|                 _c1|       _c2|  _c3|                 _c4|
+---+--------------------+----------+-----+--------------------+
|  4|      AWS Essentials|2021-03-19|false|2021-04-10T02:25:...|
|  5|          Docker 101|2021-02-28| true|2021-03-21T07:18:...|
|  2|Data Engineering ...|2021-02-10| true|2021-03-05T12:07:...|
|  3|   Mastering Pyspark|2021-01-07| true|2021-04-06T10:05:...|
|  1|    Mastering Python|2021-01-14| true|2021-02-18T16:57:...|
+---+--------------------+----------+-----+--------------------+



### Another way of writing files

In [0]:
courses_df.write.format('csv').save('/exam_prep/courses_db', mode='overwrite')

In [0]:
%fs ls '/exam_prep/courses_db'

path,name,size,modificationTime
dbfs:/exam_prep/courses_db/_SUCCESS,_SUCCESS,0,1720503794000
dbfs:/exam_prep/courses_db/_committed_473753379059822001,_committed_473753379059822001,368,1720501734000
dbfs:/exam_prep/courses_db/_committed_6996352720318803789,_committed_6996352720318803789,730,1720503794000
dbfs:/exam_prep/courses_db/_committed_vacuum17659798105944892,_committed_vacuum17659798105944892,93,1720503795000
dbfs:/exam_prep/courses_db/_started_6996352720318803789,_started_6996352720318803789,0,1720503794000
dbfs:/exam_prep/courses_db/part-00000-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-49-1-c000.csv,part-00000-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-49-1-c000.csv,60,1720503794000
dbfs:/exam_prep/courses_db/part-00001-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-50-1-c000.csv,part-00001-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-50-1-c000.csv,71,1720503794000
dbfs:/exam_prep/courses_db/part-00002-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-51-1-c000.csv,part-00002-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-51-1-c000.csv,61,1720503794000
dbfs:/exam_prep/courses_db/part-00003-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-52-1-c000.csv,part-00003-tid-6996352720318803789-704b30b4-b042-44b6-bc8e-285f6c188b13-52-1-c000.csv,113,1720503794000


### Write as a single file (don't split the dataframe)

In [0]:
courses_df.coalesce(1).write.format('csv').save('/exam_prep/courses_db', mode='overwrite')

In [0]:
%fs ls 'exam_prep/courses_db'

path,name,size,modificationTime
dbfs:/exam_prep/courses_db/_SUCCESS,_SUCCESS,0,1720503987000
dbfs:/exam_prep/courses_db/_committed_1293567585999376496,_committed_1293567585999376496,463,1720503987000
dbfs:/exam_prep/courses_db/_committed_473753379059822001,_committed_473753379059822001,368,1720501734000
dbfs:/exam_prep/courses_db/_committed_6996352720318803789,_committed_6996352720318803789,730,1720503794000
dbfs:/exam_prep/courses_db/_committed_vacuum17659798105944892,_committed_vacuum17659798105944892,93,1720503795000
dbfs:/exam_prep/courses_db/_started_1293567585999376496,_started_1293567585999376496,0,1720503987000
dbfs:/exam_prep/courses_db/_started_6996352720318803789,_started_6996352720318803789,0,1720503794000
dbfs:/exam_prep/courses_db/part-00000-tid-1293567585999376496-72f9d223-d405-4bd2-b49a-69a63432a4ac-53-1-c000.csv,part-00000-tid-1293567585999376496-72f9d223-d405-4bd2-b49a-69a63432a4ac-53-1-c000.csv,305,1720503987000
