In [0]:
df = spark.createDataFrame(data = [(1, 'pavani', '23000'), (2,'john', '26000'),(3, 'Beck', '20000'),(4,'Leo','33000')], schema = ('id','name', 'sal'))

df.show()
df.printSchema()

+---+------+-----+
| id|  name|  sal|
+---+------+-----+
|  1|pavani|23000|
|  2|  john|26000|
|  3|  Beck|20000|
|  4|   Leo|33000|
+---+------+-----+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- sal: string (nullable = true)



In [0]:
from pyspark.sql.functions import col, lit

df = df.withColumn('sal',col('sal')*2)

df.show()
df.printSchema()

+---+------+-------+
| id|  name|    sal|
+---+------+-------+
|  1|pavani|46000.0|
|  2|  john|52000.0|
|  3|  Beck|40000.0|
|  4|   Leo|66000.0|
+---+------+-------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- sal: double (nullable = true)



In [0]:
help(df.withColumn)

Help on method withColumn in module pyspark.sql.dataframe:

withColumn(colName: str, col: pyspark.sql.column.Column) -> 'DataFrame' method of pyspark.sql.dataframe.DataFrame instance
    Returns a new :class:`DataFrame` by adding a column or replacing the
    existing column that has the same name.
    
    The column expression must be an expression over this :class:`DataFrame`; attempting to add
    a column from some other :class:`DataFrame` will raise an error.
    
    .. versionadded:: 1.3.0
    
    .. versionchanged:: 3.4.0
        Support Spark Connect.
    
    Parameters
    ----------
    colName : str
        string, name of the new column.
    col : :class:`Column`
        a :class:`Column` expression for the new column.
    
    Returns
    -------
    :class:`DataFrame`
        DataFrame with new or replaced column.
    
    Notes
    -----
    This method introduces a projection internally. Therefore, calling it multiple
    times, for instance, via loops in order to a

In [0]:
df = df.withColumn('sal',col('sal').cast('Integer'))

In [0]:
df.show()
df.printSchema()

+---+------+-----+
| id|  name|  sal|
+---+------+-----+
|  1|pavani|46000|
|  2|  john|52000|
|  3|  Beck|40000|
|  4|   Leo|66000|
+---+------+-----+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- sal: integer (nullable = true)



In [0]:
df = df.withColumn('country', lit('India'))

In [0]:
df.show()

+---+------+-----+-------+
| id|  name|  sal|country|
+---+------+-----+-------+
|  1|pavani|46000|  India|
|  2|  john|52000|  India|
|  3|  Beck|40000|  India|
|  4|   Leo|66000|  India|
+---+------+-----+-------+



In [0]:
df = df.withColumn('copied_sal',col('sal'))

df.show()

+---+------+-----+-------+----------+
| id|  name|  sal|country|copied_sal|
+---+------+-----+-------+----------+
|  1|pavani|46000|  India|     46000|
|  2|  john|52000|  India|     52000|
|  3|  Beck|40000|  India|     40000|
|  4|   Leo|66000|  India|     66000|
+---+------+-----+-------+----------+



In [0]:
df = df.withColumnRenamed('sal', 'Salary')

df.show()

+---+------+------+-------+----------+
| id|  name|Salary|country|copied_sal|
+---+------+------+-------+----------+
|  1|pavani| 46000|  India|     46000|
|  2|  john| 52000|  India|     52000|
|  3|  Beck| 40000|  India|     40000|
|  4|   Leo| 66000|  India|     66000|
+---+------+------+-------+----------+

