### Specify types with schema as a string

In [0]:
import datetime

users = [
    (
        1,
        "Tanmay",
        "Kulkarni",
        True,
        datetime.date(2024, 1, 30),
        datetime.datetime(2023, 3, 10, 11, 16, 56),
    ),
    (
        2,
        "Tyrion",
        "Lannister",
        False,
        None,
        datetime.datetime(2023, 3, 10, 11, 16, 56),
    ),
]

schema = """
    id INT,
    first_name STRING,
    last_name STRING,
    is_customer BOOLEAN,
    date_of_joining DATE,
    last_updated TIMESTAMP
"""

df = spark.createDataFrame(users, schema)

df.show()

+---+----------+---------+-----------+---------------+-------------------+
| id|first_name|last_name|is_customer|date_of_joining|       last_updated|
+---+----------+---------+-----------+---------------+-------------------+
|  1|    Tanmay| Kulkarni|       true|     2024-01-30|2023-03-10 11:16:56|
|  2|    Tyrion|Lannister|      false|           NULL|2023-03-10 11:16:56|
+---+----------+---------+-----------+---------------+-------------------+



In [0]:
df.printSchema()

root
 |-- id: integer (nullable = true)
 |-- first_name: string (nullable = true)
 |-- last_name: string (nullable = true)
 |-- is_customer: boolean (nullable = true)
 |-- date_of_joining: date (nullable = true)
 |-- last_updated: timestamp (nullable = true)



### Specify types using Spark types

In [0]:
from pyspark.sql.types import (
    StructType,
    StructField,
    IntegerType,
    StringType,
    BooleanType,
    DateType,
    TimestampType,
)

fields = StructType(
    [
        StructField("id", IntegerType()),
        StructField("first_name", StringType()),
        StructField("last_name", StringType()),
        StructField("is_customer", BooleanType()),
        StructField("date_of_joining", DateType()),
        StructField("last_updated", TimestampType()),
    ]
)


df = spark.createDataFrame(users, fields)

df.show()

+---+----------+---------+-----------+---------------+-------------------+
| id|first_name|last_name|is_customer|date_of_joining|       last_updated|
+---+----------+---------+-----------+---------------+-------------------+
|  1|    Tanmay| Kulkarni|       true|     2024-01-30|2023-03-10 11:16:56|
|  2|    Tyrion|Lannister|      false|           NULL|2023-03-10 11:16:56|
+---+----------+---------+-----------+---------------+-------------------+



### Convert a spark dataframe to a pandas dataframe

In [0]:
pandas_df = df.toPandas()
pandas_df.head()

Unnamed: 0,id,first_name,last_name,is_customer,date_of_joining,last_updated
0,1,Tanmay,Kulkarni,True,2024-01-30,2023-03-10 11:16:56
1,2,Tyrion,Lannister,False,,2023-03-10 11:16:56
