In [1]:
from pyspark.sql import SparkSession, functions as fn
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [2]:
spark = SparkSession.builder \
.master("local") \
.appName("column_functions_df") \
.getOrCreate()

In [3]:
data=[("James","Bond","100",None),
      ("Ann","Varsa","200",'F'),
      ("Tom Cruise","XXX","400",''),
      ("Tom Brand",None,"400",'M')] 

columns=["fname","lname","id","gender"]

In [4]:
df1 = spark.createDataFrame(data=data, schema=columns)

In [6]:
df1.sort(df1.fname.asc()).show()

+----------+-----+---+------+
|     fname|lname| id|gender|
+----------+-----+---+------+
|       Ann|Varsa|200|     F|
|     James| Bond|100|  null|
| Tom Brand| null|400|     M|
|Tom Cruise|  XXX|400|      |
+----------+-----+---+------+



In [7]:
df1.filter(df1.id.between(100,300)).show()

+-----+-----+---+------+
|fname|lname| id|gender|
+-----+-----+---+------+
|James| Bond|100|  null|
|  Ann|Varsa|200|     F|
+-----+-----+---+------+



In [9]:
df1.filter(df1.fname.contains("Cruise")).show()

+----------+-----+---+------+
|     fname|lname| id|gender|
+----------+-----+---+------+
|Tom Cruise|  XXX|400|      |
+----------+-----+---+------+



In [10]:
df1.filter(df1.fname.startswith("T")).show()

+----------+-----+---+------+
|     fname|lname| id|gender|
+----------+-----+---+------+
|Tom Cruise|  XXX|400|      |
| Tom Brand| null|400|     M|
+----------+-----+---+------+



In [11]:
df1.filter(df1.fname.endswith('d')).show()

+---------+-----+---+------+
|    fname|lname| id|gender|
+---------+-----+---+------+
|Tom Brand| null|400|     M|
+---------+-----+---+------+



In [12]:
df1.filter(df1.lname.isNull()).show()

+---------+-----+---+------+
|    fname|lname| id|gender|
+---------+-----+---+------+
|Tom Brand| null|400|     M|
+---------+-----+---+------+



In [13]:
df1.filter(df1.lname.isNotNull()).show()

+----------+-----+---+------+
|     fname|lname| id|gender|
+----------+-----+---+------+
|     James| Bond|100|  null|
|       Ann|Varsa|200|     F|
|Tom Cruise|  XXX|400|      |
+----------+-----+---+------+



In [14]:
df1.select(df1.fname, df1.lname, df1.id).filter(df1.fname.like('%om%')).show()

+----------+-----+---+
|     fname|lname| id|
+----------+-----+---+
|Tom Cruise|  XXX|400|
| Tom Brand| null|400|
+----------+-----+---+



In [17]:
df1.select(df1.fname, df1.lname, when(df1.gender=='M','Male') \
          .when(df1.gender=='F','Female') \
          .when(df1.gender=='null','') \
          .otherwise(df1.gender).alias("new_gender")).show()

+----------+-----+----------+
|     fname|lname|new_gender|
+----------+-----+----------+
|     James| Bond|      null|
|       Ann|Varsa|    Female|
|Tom Cruise|  XXX|          |
| Tom Brand| null|      Male|
+----------+-----+----------+



In [18]:
df1.select(df1.id.isin(["100","200"])).show()

+------------------+
|(id IN (100, 200))|
+------------------+
|              true|
|              true|
|             false|
|             false|
+------------------+

