In [1]:
from pyspark.sql import SparkSession, functions as fn
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [2]:
spark = SparkSession.builder \
.master("local") \
.appName("cast_column_df") \
.getOrCreate()

In [3]:
simpleData = [("James",34,"2006-01-01","true","M",3000.60),
    ("Michael",33,"1980-01-10","true","F",3300.80),
    ("Robert",37,"06-01-1992","false","M",5000.50)
  ]

columns = ["firstname","age","jobStartDate","isGraduated","gender","salary"]
df1 = spark.createDataFrame(data = simpleData, schema = columns)

In [4]:
df2 = df1.withColumn("age",fn.col("age").cast(StringType())) \
.withColumn("isGraduated",fn.col("isGraduated").cast(BooleanType())) \
.withColumn("jobStartDate", fn.col("jobStartDate").cast(DateType())).show()

+---------+---+------------+-----------+------+------+
|firstname|age|jobStartDate|isGraduated|gender|salary|
+---------+---+------------+-----------+------+------+
|    James| 34|  2006-01-01|       true|     M|3000.6|
|  Michael| 33|  1980-01-10|       true|     F|3300.8|
|   Robert| 37|        null|      false|     M|5000.5|
+---------+---+------------+-----------+------+------+



In [7]:
# column expression
df3 = df1.selectExpr("cast(age as int) age", "cast(jobStartDate as string) jobStartdate", \
                    "cast(isGraduated as string) isGraduated").show()

+---+------------+-----------+
|age|jobStartdate|isGraduated|
+---+------------+-----------+
| 34|  2006-01-01|       true|
| 33|  1980-01-10|       true|
| 37|  06-01-1992|      false|
+---+------------+-----------+



In [8]:
# spark sql
df1.createOrReplaceTempView("castColumn")
spark.sql("select string(age) age, boolean(isGraduated) isGraduated, date(jobStartDate) jobStartDate from castColumn").show()

+---+-----------+------------+
|age|isGraduated|jobStartDate|
+---+-----------+------------+
| 34|       true|  2006-01-01|
| 33|       true|  1980-01-10|
| 37|      false|        null|
+---+-----------+------------+

