In [0]:
data = [('hary', [2,3]), ('paul', [5,6])]
schema = ('name', 'numbers')

df = spark.createDataFrame(data, schema)

df.show()
df.printSchema()


+----+-------+
|name|numbers|
+----+-------+
|hary| [2, 3]|
|paul| [5, 6]|
+----+-------+

root
 |-- name: string (nullable = true)
 |-- numbers: array (nullable = true)
 |    |-- element: long (containsNull = true)



In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, ArrayType

data = [('hary', [2,3]), ('paul', [5,6])]
schema = StructType([\
                    StructField('name', StringType()),\
                    StructField('number', ArrayType(IntegerType()))\
                    ])

df = spark.createDataFrame(data, schema)

df.show()
df.printSchema()


+----+------+
|name|number|
+----+------+
|hary|[2, 3]|
|paul|[5, 6]|
+----+------+

root
 |-- name: string (nullable = true)
 |-- number: array (nullable = true)
 |    |-- element: integer (containsNull = true)



In [0]:
from pyspark.sql.functions import col

df.withColumn('first_number', col('number')[0]).show()

+----+------+------------+
|name|number|first_number|
+----+------+------------+
|hary|[2, 3]|           2|
|paul|[5, 6]|           5|
+----+------+------------+



In [0]:
from pyspark.sql.functions import col

df.withColumn('firstnumber', col('number')[0]).show()

+----+------+-----------+
|name|number|firstnumber|
+----+------+-----------+
|hary|[2, 3]|          2|
|paul|[5, 6]|          5|
+----+------+-----------+



In [0]:
data = [(1,2),(3,4)]
schema = ('num1', 'num2')

df = spark.createDataFrame(data, schema)
df.show()

+----+----+
|num1|num2|
+----+----+
|   1|   2|
|   3|   4|
+----+----+



In [0]:
from pyspark.sql.functions import col, array

df1 = df.withColumn('numbers', array(col('num1'), col('num2')))

df1.show()
df1.printSchema()


+----+----+-------+
|num1|num2|numbers|
+----+----+-------+
|   1|   2| [1, 2]|
|   3|   4| [3, 4]|
+----+----+-------+

root
 |-- num1: long (nullable = true)
 |-- num2: long (nullable = true)
 |-- numbers: array (nullable = false)
 |    |-- element: long (containsNull = true)



In [0]:
data = [(1,'pavani', ['aws', 'python']), (2,'john', ['azure', 'sql'])]
schema = ('id', 'name', 'skills')

df = spark.createDataFrame(data, schema)

df.show()

from pyspark.sql.functions import explode, col

df1 = df.withColumn('skill', explode(col('skills')))

df1.show()

+---+------+-------------+
| id|  name|       skills|
+---+------+-------------+
|  1|pavani|[aws, python]|
|  2|  john| [azure, sql]|
+---+------+-------------+

+---+------+-------------+------+
| id|  name|       skills| skill|
+---+------+-------------+------+
|  1|pavani|[aws, python]|   aws|
|  1|pavani|[aws, python]|python|
|  2|  john| [azure, sql]| azure|
|  2|  john| [azure, sql]|   sql|
+---+------+-------------+------+



In [0]:
from pyspark.sql.functions import split, col

data = [(1,'pavani', 'aws, python'), (2,'john', 'azure,sql')]
schema = ('id', 'name', 'skills')

df = spark.createDataFrame(data, schema)

df1 = df.withColumn('skillsArray', split(col('skills'), ','))

df1.show()

+---+------+-----------+--------------+
| id|  name|     skills|   skillsArray|
+---+------+-----------+--------------+
|  1|pavani|aws, python|[aws,  python]|
|  2|  john|  azure,sql|  [azure, sql]|
+---+------+-----------+--------------+



In [0]:
from pyspark.sql.functions import array, col

data = [(1,'pavani', 'aws', 'python'), (2,'john', 'azure', 'sql')]
schema = ('id', 'name', 'primary skill', 'secondary skill')

df = spark.createDataFrame(data, schema)
df.show()
df1 = df.withColumn('skillsArray', array(col('primary skill'), col('secondary skill')))

df1.show()

+---+------+-------------+---------------+
| id|  name|primary skill|secondary skill|
+---+------+-------------+---------------+
|  1|pavani|          aws|         python|
|  2|  john|        azure|            sql|
+---+------+-------------+---------------+

+---+------+-------------+---------------+-------------+
| id|  name|primary skill|secondary skill|  skillsArray|
+---+------+-------------+---------------+-------------+
|  1|pavani|          aws|         python|[aws, python]|
|  2|  john|        azure|            sql| [azure, sql]|
+---+------+-------------+---------------+-------------+



In [0]:
from pyspark.sql.functions import array_contains, col

data = [(1,'pavani', ['aws', 'python']), (2,'john', ['azure', 'sql'])]
schema = ('id', 'name', 'skills')

df = spark.createDataFrame(data, schema)
df.show()

df1 = df.withColumn('has_aws', array_contains(col('skills'), 'aws'))
df1.show()

+---+------+-------------+
| id|  name|       skills|
+---+------+-------------+
|  1|pavani|[aws, python]|
|  2|  john| [azure, sql]|
+---+------+-------------+

+---+------+-------------+-------+
| id|  name|       skills|has_aws|
+---+------+-------------+-------+
|  1|pavani|[aws, python]|   true|
|  2|  john| [azure, sql]|  false|
+---+------+-------------+-------+



In [0]:
data = [('maheer', {'hair': 'brown', 'eye': 'black'}), ('lokesh', {'hair': 'black', 'eye': 'brown'}) ]
schema = ('name', 'properities')

df = spark.createDataFrame(data, schema)
df.show()
df.printSchema()

+------+--------------------+
|  name|         properities|
+------+--------------------+
|maheer|{eye -> black, ha...|
|lokesh|{eye -> brown, ha...|
+------+--------------------+

root
 |-- name: string (nullable = true)
 |-- properities: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [0]:
from pyspark.sql.types import StringType, StructType, StringType, MapType

data = [('maheer', {'hair': 'brown', 'eye': 'black'}), ('lokesh', {'hair': 'black', 'eye': 'brown'}) ]
schema = StructType([StructField('name',StringType()),\
                     StructField('properities',  MapType(StringType(), StringType()))])

df = spark.createDataFrame(data, schema)
df.show()
df.printSchema()

+------+--------------------+
|  name|         properities|
+------+--------------------+
|maheer|{eye -> black, ha...|
|lokesh|{eye -> brown, ha...|
+------+--------------------+

root
 |-- name: string (nullable = true)
 |-- properities: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)



In [0]:
df1 = df.withColumn('eye', df.properities['eye'])

df1.show()

+------+--------------------+-----+
|  name|         properities|  eye|
+------+--------------------+-----+
|maheer|{eye -> black, ha...|black|
|lokesh|{eye -> brown, ha...|brown|
+------+--------------------+-----+



In [0]:
df2 = df1.withColumn('hair', df.properities.getItem('hair'))

df2.show()

+------+--------------------+-----+-----+
|  name|         properities|  eye| hair|
+------+--------------------+-----+-----+
|maheer|{eye -> black, ha...|black|brown|
|lokesh|{eye -> brown, ha...|brown|black|
+------+--------------------+-----+-----+



In [0]:
from pyspark.sql.types import StringType, StructType, StringType, MapType
from pyspark.sql.functions import explode, col

data = [('maheer', {'hair': 'brown', 'eye': 'black'}), ('lokesh', {'hair': 'black', 'eye': 'brown'}) ]
schema = StructType([StructField('name',StringType()),\
                     StructField('properities',  MapType(StringType(), StringType()))])

df = spark.createDataFrame(data, schema)
df.show(truncate = False)
df.printSchema()

df1 = df.select('name', 'properities', explode(col('properities')))

df1.show()

+------+-----------------------------+
|name  |properities                  |
+------+-----------------------------+
|maheer|{eye -> black, hair -> brown}|
|lokesh|{eye -> brown, hair -> black}|
+------+-----------------------------+

root
 |-- name: string (nullable = true)
 |-- properities: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)

+------+--------------------+----+-----+
|  name|         properities| key|value|
+------+--------------------+----+-----+
|maheer|{eye -> black, ha...| eye|black|
|maheer|{eye -> black, ha...|hair|brown|
|lokesh|{eye -> brown, ha...| eye|brown|
|lokesh|{eye -> brown, ha...|hair|black|
+------+--------------------+----+-----+



In [0]:
from pyspark.sql.functions import map_keys

df1 = df.withColumn('keys', map_keys('properities'))

df1.show()

+------+--------------------+-----------+
|  name|         properities|       keys|
+------+--------------------+-----------+
|maheer|{eye -> black, ha...|[eye, hair]|
|lokesh|{eye -> brown, ha...|[eye, hair]|
+------+--------------------+-----------+



In [0]:
from pyspark.sql.functions import map_values

df1 = df.withColumn('values', map_values('properities'))

df1.show()

+------+--------------------+--------------+
|  name|         properities|        values|
+------+--------------------+--------------+
|maheer|{eye -> black, ha...|[black, brown]|
|lokesh|{eye -> brown, ha...|[brown, black]|
+------+--------------------+--------------+

