In [0]:
data = [(1,'Lahari',['de','SE']),(2,'JYOTHI',['DOCTER','azure'])]
schema = ('id','names','skills')

df = spark.createDataFrame(data = data,schema = schema)
df.show()
df.printSchema()

+---+------+---------------+
| id| names|         skills|
+---+------+---------------+
|  1|Lahari|       [de, SE]|
|  2|JYOTHI|[DOCTER, azure]|
+---+------+---------------+

root
 |-- id: long (nullable = true)
 |-- names: string (nullable = true)
 |-- skills: array (nullable = true)
 |    |-- element: string (containsNull = true)



In [0]:
from pyspark.sql.functions import explode,col
df.show()
df1 = df.withColumn('skill',explode(col('skills')))
df1.show()
df1.printSchema()

+---+------+---------------+
| id| names|         skills|
+---+------+---------------+
|  1|Lahari|       [de, SE]|
|  2|JYOTHI|[DOCTER, azure]|
+---+------+---------------+

+---+------+---------------+------+
| id| names|         skills| skill|
+---+------+---------------+------+
|  1|Lahari|       [de, SE]|    de|
|  1|Lahari|       [de, SE]|    SE|
|  2|JYOTHI|[DOCTER, azure]|DOCTER|
|  2|JYOTHI|[DOCTER, azure]| azure|
+---+------+---------------+------+

root
 |-- id: long (nullable = true)
 |-- names: string (nullable = true)
 |-- skills: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- skill: string (nullable = true)



In [0]:
from pyspark.sql.functions import split,col
data = [(1,'lahari','azure,aws'),(2,'shetty','mbbs,btech')]
schema = ('id','name','skills')

df = spark.createDataFrame(data=data,schema=schema)
df.show()
df.printSchema()

+---+------+----------+
| id|  name|    skills|
+---+------+----------+
|  1|lahari| azure,aws|
|  2|shetty|mbbs,btech|
+---+------+----------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- skills: string (nullable = true)



In [0]:
df.show()
df1 = df.withColumn('splitarray',split(col('skills'),','))
df1.show()
df1.printSchema()

+---+------+----------+
| id|  name|    skills|
+---+------+----------+
|  1|lahari| azure,aws|
|  2|shetty|mbbs,btech|
+---+------+----------+

+---+------+----------+-------------+
| id|  name|    skills|   splitarray|
+---+------+----------+-------------+
|  1|lahari| azure,aws| [azure, aws]|
|  2|shetty|mbbs,btech|[mbbs, btech]|
+---+------+----------+-------------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- skills: string (nullable = true)
 |-- splitarray: array (nullable = true)
 |    |-- element: string (containsNull = false)



In [0]:
from pyspark.sql.functions import array,col
data = [(1,'lahari','azure','aws'),(2,'shetty','doctor','engineer')]
schema = ('id','name','primaryskill','secondaryskill')
df = spark.createDataFrame(data = data,schema = schema)
df.show()
df.printSchema()

+---+------+------------+--------------+
| id|  name|primaryskill|secondaryskill|
+---+------+------------+--------------+
|  1|lahari|       azure|           aws|
|  2|shetty|      doctor|      engineer|
+---+------+------------+--------------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- primaryskill: string (nullable = true)
 |-- secondaryskill: string (nullable = true)



In [0]:
df.show()
df1 = df.withColumn('splitarray',array(col('primaryskill'),col('secondaryskill')))
df1.show()
df1.printSchema()

+---+------+------------+--------------+
| id|  name|primaryskill|secondaryskill|
+---+------+------------+--------------+
|  1|lahari|       azure|           aws|
|  2|shetty|      doctor|      engineer|
+---+------+------------+--------------+

+---+------+------------+--------------+------------------+
| id|  name|primaryskill|secondaryskill|        splitarray|
+---+------+------------+--------------+------------------+
|  1|lahari|       azure|           aws|      [azure, aws]|
|  2|shetty|      doctor|      engineer|[doctor, engineer]|
+---+------+------------+--------------+------------------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- primaryskill: string (nullable = true)
 |-- secondaryskill: string (nullable = true)
 |-- splitarray: array (nullable = false)
 |    |-- element: string (containsNull = true)



In [0]:
data = [(1,'lahari',['azure','aws']),(2,'shetty',['mbbs','docter'])]
schema = ('id','name','skills')
df = spark.createDataFrame(data=data,schema=schema)
df.show()
df.printSchema()

+---+------+--------------+
| id|  name|        skills|
+---+------+--------------+
|  1|lahari|  [azure, aws]|
|  2|shetty|[mbbs, docter]|
+---+------+--------------+

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- skills: array (nullable = true)
 |    |-- element: string (containsNull = true)



In [0]:
from pyspark.sql.functions import array_contains,col
df.show()
df1 = df.withColumn('existvalueinarray',array_contains(col('skills'),'azure'))
df1.show()

+---+------+--------------+
| id|  name|        skills|
+---+------+--------------+
|  1|lahari|  [azure, aws]|
|  2|shetty|[mbbs, docter]|
+---+------+--------------+

+---+------+--------------+-----------------+
| id|  name|        skills|existvalueinarray|
+---+------+--------------+-----------------+
|  1|lahari|  [azure, aws]|             true|
|  2|shetty|[mbbs, docter]|            false|
+---+------+--------------+-----------------+



In [0]:
from pyspark.sql.functions import array_contains,col
df.show()
df1 = df.withColumn('existvalueinarray',array_contains(col('skills'),'java'))
df1.show()

+---+------+--------------+
| id|  name|        skills|
+---+------+--------------+
|  1|lahari|  [azure, aws]|
|  2|shetty|[mbbs, docter]|
+---+------+--------------+

+---+------+--------------+-----------------+
| id|  name|        skills|existvalueinarray|
+---+------+--------------+-----------------+
|  1|lahari|  [azure, aws]|            false|
|  2|shetty|[mbbs, docter]|            false|
+---+------+--------------+-----------------+

