In [0]:
from pyspark.sql.types import *
data = [(1,'Tony',3000.00),(2,'Bruce',2500.00)]

schema = StructType([StructField(name='id',dataType=IntegerType()),
            StructField(name='name', dataType=StringType()),
            StructField(name='salary',dataType=FloatType()) 
                        
            ])

df = spark.createDataFrame(data,schema=schema)
display(df)

id,name,salary
1,Tony,3000.0
2,Bruce,2500.0


In [0]:
help(df.withColumn)

Help on method withColumn in module pyspark.sql.dataframe:

withColumn(colName: str, col: pyspark.sql.column.Column) -> 'DataFrame' method of pyspark.sql.dataframe.DataFrame instance
    Returns a new :class:`DataFrame` by adding a column or replacing the
    existing column that has the same name.
    
    The column expression must be an expression over this :class:`DataFrame`; attempting to add
    a column from some other :class:`DataFrame` will raise an error.
    
    .. versionadded:: 1.3.0
    
    .. versionchanged:: 3.4.0
        Support Spark Connect.
    
    Parameters
    ----------
    colName : str
        string, name of the new column.
    col : :class:`Column`
        a :class:`Column` expression for the new column.
    
    Returns
    -------
    :class:`DataFrame`
        DataFrame with new or replaced column.
    
    Notes
    -----
    This method introduces a projection internally. Therefore, calling it multiple
    times, for instance, via loops in order to a

In [0]:
df1 = df.withColumn('base', df.salary / 1.5)
df1.show()
df1.printSchema()

+---+-----+------+------------------+
| id| name|salary|              base|
+---+-----+------+------------------+
|  1| Tony|3000.0|            2000.0|
|  2|Bruce|2500.0|1666.6666666666667|
+---+-----+------+------------------+

root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- salary: float (nullable = true)
 |-- base: double (nullable = true)



In [0]:
from pyspark.sql.functions import col
df2 = df1.withColumn(colName='base',col=col('base').cast('Integer'))
df2.show()
df2.printSchema()


+---+-----+------+----+
| id| name|salary|base|
+---+-----+------+----+
|  1| Tony|3000.0|2000|
|  2|Bruce|2500.0|1666|
+---+-----+------+----+

root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- salary: float (nullable = true)
 |-- base: integer (nullable = true)



In [0]:
from pyspark.sql.functions import col, lit
df3 = df2.withColumn('country',lit('india'))
df3.show()
df3.printSchema()


+---+-----+------+----+-------+
| id| name|salary|base|country|
+---+-----+------+----+-------+
|  1| Tony|3000.0|2000|  india|
|  2|Bruce|2500.0|1666|  india|
+---+-----+------+----+-------+

root
 |-- id: integer (nullable = true)
 |-- name: string (nullable = true)
 |-- salary: float (nullable = true)
 |-- base: integer (nullable = true)
 |-- country: string (nullable = false)



In [0]:
help(lit)

Help on function lit in module pyspark.sql.functions:

lit(col: Any) -> pyspark.sql.column.Column
    Creates a :class:`~pyspark.sql.Column` of literal value.
    
    .. versionadded:: 1.3.0
    
    .. versionchanged:: 3.4.0
        Support Spark Connect.
    
    Parameters
    ----------
    col : :class:`~pyspark.sql.Column` or Python primitive type.
        the value to make it as a PySpark literal. If a column is passed,
        it returns the column as is.
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        the literal instance.
    
    Examples
    --------
    >>> df = spark.range(1)
    >>> df.select(lit(5).alias('height'), df.id).show()
    +------+---+
    |height| id|
    +------+---+
    |     5|  0|
    +------+---+



In [0]:
help(col)

Help on function col in module pyspark.sql.functions:

col(col: str) -> pyspark.sql.column.Column
    Returns a :class:`~pyspark.sql.Column` based on the given column name.
    
    .. versionadded:: 1.3.0
    
    .. versionchanged:: 3.4.0
        Support Spark Connect.
    
    Parameters
    ----------
    col : str
        the name for the column
    
    Returns
    -------
    :class:`~pyspark.sql.Column`
        the corresponding column instance.
    
    Examples
    --------
    >>> col('x')
    Column<'x'>
    >>> column('x')
    Column<'x'>



In [0]:
help(df.withColumnRenamed)

Help on method withColumnRenamed in module pyspark.sql.dataframe:

withColumnRenamed(existing: str, new: str) -> 'DataFrame' method of pyspark.sql.dataframe.DataFrame instance
    Returns a new :class:`DataFrame` by renaming an existing column.
    This is a no-op if the schema doesn't contain the given column name.
    
    .. versionadded:: 1.3.0
    
    .. versionchanged:: 3.4.0
        Support Spark Connect.
    
    Parameters
    ----------
    existing : str
        string, name of the existing column to rename.
    new : str
        string, new name of the column.
    
    Returns
    -------
    :class:`DataFrame`
        DataFrame with renamed column.
    
    Examples
    --------
    >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
    >>> df.withColumnRenamed('age', 'age2').show()
    +----+-----+
    |age2| name|
    +----+-----+
    |   2|Alice|
    |   5|  Bob|
    +----+-----+



In [0]:
df4 = df3.withColumnRenamed('country', 'emp_country')
df4.show()

+---+-----+------+----+-----------+
| id| name|salary|base|emp_country|
+---+-----+------+----+-----------+
|  1| Tony|3000.0|2000|      india|
|  2|Bruce|2500.0|1666|      india|
+---+-----+------+----+-----------+

