In [1]:
rng = sqlContext.range(5)
rng.show()

+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
+---+



In [18]:
from datetime import datetime

data = [
        ("Alice", 20, [1, 2, 3], {"xz1": 1, "xz2": 2}, True, datetime(2020, 2, 1, 22, 10, 12)), 
        ("Bob", 30, [1, 2, 3], {"xz1": 1, "xz2": 2}, False, datetime(2020, 2, 2, 13, 33, 44))]
df = sqlContext.createDataFrame(data, ["Name", "Score", "List", "Dict", "Bool", "Datetime"])
df.show()

+-----+-----+---------+--------------------+-----+-------------------+
| Name|Score|     List|                Dict| Bool|           Datetime|
+-----+-----+---------+--------------------+-----+-------------------+
|Alice|   20|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]| true|2020-02-01 22:10:12|
|  Bob|   30|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]|false|2020-02-02 13:33:44|
+-----+-----+---------+--------------------+-----+-------------------+



In [23]:
alice_score = df.collect()[0][1]
alice_score

20

In [28]:
from pprint import pprint

lst = df.collect()[0][2]
lst.append(100)
pprint(lst)
df.show()

[1, 2, 3, 100]
+-----+-----+---------+--------------------+-----+-------------------+
| Name|Score|     List|                Dict| Bool|           Datetime|
+-----+-----+---------+--------------------+-----+-------------------+
|Alice|   20|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]| true|2020-02-01 22:10:12|
|  Bob|   30|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]|false|2020-02-02 13:33:44|
+-----+-----+---------+--------------------+-----+-------------------+



In [48]:
rdd = df.rdd\
        .map(lambda _: (_.Name, _.Datetime))\
        .collect()

pprint(rdd)

[('Alice', datetime.datetime(2020, 2, 1, 22, 10, 12)),
 ('Bob', datetime.datetime(2020, 2, 2, 13, 33, 44))]


In [54]:
cols = df\
        .select("Name", "List")\
        .where(df.Score < 30)\
        .show()

+-----+---------+
| Name|     List|
+-----+---------+
|Alice|[1, 2, 3]|
+-----+---------+



In [66]:
df.rdd\
    .map(lambda _: (_.Name + " xz!"))\
    .collect()


['Alice xz!', 'Bob xz!']

In [71]:
df.select("Name", "Score")\
    .withColumn("New Score",
               (df.Score + 100))\
    .show()

+-----+-----+---------+
| Name|Score|New Score|
+-----+-----+---------+
|Alice|   20|      120|
|  Bob|   30|      130|
+-----+-----+---------+



In [74]:
df\
    .withColumnRenamed("Name", "Renamed Name")\
    .show()

+------------+-----+---------+--------------------+-----+-------------------+
|Renamed Name|Score|     List|                Dict| Bool|           Datetime|
+------------+-----+---------+--------------------+-----+-------------------+
|       Alice|   20|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]| true|2020-02-01 22:10:12|
|         Bob|   30|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]|false|2020-02-02 13:33:44|
+------------+-----+---------+--------------------+-----+-------------------+



In [76]:
df.show()

+-----+-----+---------+--------------------+-----+-------------------+
| Name|Score|     List|                Dict| Bool|           Datetime|
+-----+-----+---------+--------------------+-----+-------------------+
|Alice|   20|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]| true|2020-02-01 22:10:12|
|  Bob|   30|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]|false|2020-02-02 13:33:44|
+-----+-----+---------+--------------------+-----+-------------------+



In [77]:
df\
    .select(df.Score.alias("Score_Alias"))\
    .show()

+-----------+
|Score_Alias|
+-----------+
|         20|
|         30|
+-----------+



In [79]:
import pandas

pandas_df = df.toPandas()
pandas_df

Unnamed: 0,Name,Score,List,Dict,Bool,Datetime
0,Alice,20,"[1, 2, 3]","{'xz2': 2, 'xz1': 1}",True,2020-02-01 22:10:12
1,Bob,30,"[1, 2, 3]","{'xz2': 2, 'xz1': 1}",False,2020-02-02 13:33:44


In [80]:
spark_df = sqlContext.createDataFrame(pandas_df).show()

+-----+-----+---------+--------------------+-----+-------------------+
| Name|Score|     List|                Dict| Bool|           Datetime|
+-----+-----+---------+--------------------+-----+-------------------+
|Alice|   20|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]| true|2020-02-01 22:10:12|
|  Bob|   30|[1, 2, 3]|[xz2 -> 2, xz1 -> 1]|false|2020-02-02 13:33:44|
+-----+-----+---------+--------------------+-----+-------------------+

