In [1]:
from pyspark.sql import SparkSession, SQLContext, DataFrame
from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter

appname = "read-write-mysql"
sparkmaster = "local"
spark = SparkSession.builder.appName(appname).master(sparkmaster).getOrCreate()
sc = spark.sparkContext

sqlContext = SQLContext(sc)

# 自行建立一個表格 (DataFrame)

In [2]:
dt = sqlContext.createDataFrame(data=[(1,2),(3,4)], schema=("A","B"))
dt.show()

+---+---+
|  A|  B|
+---+---+
|  1|  2|
|  3|  4|
+---+---+



# 自行建立一個 RDD，並且轉為 dataframe

In [3]:
rdd = sc.parallelize([('a',4),('b',5)])
df = rdd.toDF(["id","name"])
df.show()

+---+----+
| id|name|
+---+----+
|  a|   4|
|  b|   5|
+---+----+



# 寫入 mysql 

In [5]:
df.write.format('jdbc').options(
    url="jdbc:mysql://172.18.0.2:3306/iii",
    driver="com.mysql.jdbc.Driver",
    dbtable="iii",
    user="root",
    password="iii"
).mode('append').save()

# 讀取 mysql 表格資訊

In [6]:
df=sqlContext.read.format("jdbc").options(url="jdbc:mysql://172.18.0.2:3306/iii",
                                       driver="com.mysql.jdbc.Driver",
                                       dbtable="(SELECT * FROM iii) tmp",user="root",
                                       password="iii").load()

In [7]:
print(df.printSchema())

root
 |-- id: string (nullable = true)
 |-- name: string (nullable = true)

None


In [8]:
print(df.show())

+---+----+
| id|name|
+---+----+
|abc| 123|
|abc| 456|
|abc| 789|
|  a|   4|
|  b|   5|
|  a|   4|
|  b|   5|
|  a|   4|
|  b|   5|
+---+----+

None


In [10]:
#儲存結果 (CSV)
df.write.format("com.databricks.spark.csv").save('/home/jovyan/dataset/iii')

In [11]:
#儲存結果 (parquet)
df.write.format("parquet").save('/home/jovyan/dataset/parquet')

In [12]:
#讀取檔案 (CSV)
df_csv =sqlContext.read.format("csv").option("header", "true").load("/home/jovyan/dataset/iii/*")
df_csv.show()

+---+---+
|abc|123|
+---+---+
|abc|456|
|abc|789|
|  a|  4|
|  b|  5|
|  a|  4|
|  b|  5|
|  a|  4|
|  b|  5|
+---+---+



In [13]:
#讀取檔案 (parquet)
df_parquet =sqlContext.read.format("parquet").option("header", "true").load("/home/jovyan/dataset/parquet/*")
df_parquet.show()

+---+----+
| id|name|
+---+----+
|abc| 123|
|abc| 456|
|abc| 789|
|  a|   4|
|  b|   5|
|  a|   4|
|  b|   5|
|  a|   4|
|  b|   5|
+---+----+

