In [0]:
#Creating a dataframe

In [0]:
data = [(1, 'Ketaki', '3000'), (2, 'Yash', '10000')]
columns = ["id", "name", "discount"]
df = spark.createDataFrame(data=data, schema=columns)
df.show()

+---+------+--------+
| id|  name|discount|
+---+------+--------+
|  1|Ketaki|    3000|
|  2|  Yash|   10000|
+---+------+--------+



In [0]:
#Checking the data types of each column 

In [0]:
df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- discount: string (nullable = true)



In [0]:
help(df.withColumn)

Help on method withColumn in module pyspark.sql.dataframe:

withColumn(colName: str, col: pyspark.sql.column.Column) -> 'DataFrame' method of pyspark.sql.dataframe.DataFrame instance
    Returns a new :class:`DataFrame` by adding a column or replacing the
    existing column that has the same name.
    
    The column expression must be an expression over this :class:`DataFrame`; attempting to add
    a column from some other :class:`DataFrame` will raise an error.
    
    .. versionadded:: 1.3.0
    
    .. versionchanged:: 3.4.0
        Support Spark Connect.
    
    Parameters
    ----------
    colName : str
        string, name of the new column.
    col : :class:`Column`
        a :class:`Column` expression for the new column.
    
    Returns
    -------
    :class:`DataFrame`
        DataFrame with new or replaced column.
    
    Notes
    -----
    This method introduces a projection internally. Therefore, calling it multiple
    times, for instance, via loops in order to a

In [0]:
#Trying to change the data types using withColumn

In [0]:
from pyspark.sql.functions import col, lit

df1 = df.withColumn(colName='discount', col=col('discount').cast('Integer'))
df1.show()
df1.printSchema()

+---+------+--------+
| id|  name|discount|
+---+------+--------+
|  1|Ketaki|    3000|
|  2|  Yash|   10000|
+---+------+--------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- discount: integer (nullable = true)



In [0]:
#scaling a column 

In [0]:
df2 = df1.withColumn('discount', col=col('discount') * 2)
df2.show()

+---+------+--------+
| id|  name|discount|
+---+------+--------+
|  1|Ketaki|    6000|
|  2|  Yash|   20000|
+---+------+--------+



In [0]:
#creating a new column 

In [0]:
df3 = df2.withColumn('country', lit('india'))
df3.show()

+---+------+--------+-------+
| id|  name|discount|country|
+---+------+--------+-------+
|  1|Ketaki|    6000|  india|
|  2|  Yash|   20000|  india|
+---+------+--------+-------+



In [0]:
#copying a column 

In [0]:
df4 = df3.withColumn('copieddiscount', col('discount'))
df4.show()

+---+------+--------+-------+--------------+
| id|  name|discount|country|copieddiscount|
+---+------+--------+-------+--------------+
|  1|Ketaki|    6000|  india|          6000|
|  2|  Yash|   20000|  india|         20000|
+---+------+--------+-------+--------------+

