-
Notifications
You must be signed in to change notification settings - Fork 1
/
defineSchema.scala
51 lines (39 loc) · 1.69 KB
/
defineSchema.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
package mypc.spark.codes.sql
/**
* Created by maniram on 19/1/18.
*/
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.log4j.{Logger,Level}
import org.apache.spark.sql.{Row,SQLContext,SparkSession}
import org.apache.spark.sql.types.{StringType,StructType,StructField,IntegerType}
import org.apache.hadoop.io.Text
//import org.apache.spark.sql.SQLContext.implicits._
object defineSchema {
//def main(args :Array[String]): Unit ={
Logger.getLogger("org").setLevel(Level.ERROR)
val session = SparkSession.builder().appName("DefSchema").master("local[1]").getOrCreate()
val context = session.sparkContext
//val conf = new SparkConf().setAppName("selfDefSchema").setMaster("local[1]")
//val sc = new SparkContext(conf)
val rdd = context.textFile("/home/maniram/data/emp")
//rdd foreach println
var schemaString = "id name sal role"
var defSchema = new StructType(
schemaString.split(" ").map(field => StructField(field,StringType,true))
)
//schemaString.split(" ").map(field => StructField(field,StringType,true))
//schemaString.split(" ").map(field => StructField(field,if(field=="id"||field=="sal") IntegerType else StringType,true))
//var emp = rdd.map(_.split(",")).map(p => Row(p(0).trim.toInt,p(1),p(2).trim.toInt,p(3)))
var emp = rdd.map(_.split(",")).map(p => Row(p(0),p(1),p(2),p(3)))
var data = session.createDataFrame(emp,defSchema)
data.show(2)
data.printSchema()
data.registerTempTable("emp")
val d = session.sql("select name from emp order by name")
d.show()
//d.write.save("/home/maniram/data/empname")
// d.write.save("/home/maniram/data/empname")
println("Succefull")
session.close()
// }
}