In [1]:
import findspark
findspark.init()

In [3]:
from pyspark.conf import SparkConf
config = SparkConf()
# config.set("property", "value")
config.setMaster("local").setAppName("GlobaltempSparkSession")

from pyspark.sql import SparkSession
# spark Session, entry point for Spark SQL, DataFrame
# in single spark driver/note book/spark application,
# there can be many spark sessions, and 
# only one spark context
spark = SparkSession.builder\
                    .config(conf=config)\
                    .getOrCreate()
# spark core operations, like rdd, partitions, actions etc
# spark session shall use catalyst engine, which will use spark context for low level
# code execution
sc = spark.sparkContext

In [4]:
# Databricks notebook source
# Databricks notebook source
products = [ 
          # (product_id, product_name, brand_id)  
         (1, 'iPhone', 100),
         (2, 'Galaxy', 200),
         (3, 'Redme', 300), #   no matching brand
         (4, 'Pixel', 400),
]

brands = [
    #(brand_id, brand_name)
    (100, "Apple"),
    (200, "Samsung"),
    (400, "Google"),
    (500, "Sony"), # no matching products
]



productDf = spark.createDataFrame(data=products, schema=["product_id", "product_name", "brand_id"])
productDf.show()

brandDf = spark.createDataFrame(data=brands, schema=["brand_id", "brand_name"])

brandDf.show()

store = [
    #(store_id, store_name)
    (1000, "Poorvika"),
    (2000, "Sangeetha"),
    (4000, "Amazon"),
    (5000, "FlipKart"), 
]
 
storeDf = spark.createDataFrame(data=store, schema=["store_id", "store_name"])
storeDf.show()

                                                                                

+----------+------------+--------+
|product_id|product_name|brand_id|
+----------+------------+--------+
|         1|      iPhone|     100|
|         2|      Galaxy|     200|
|         3|       Redme|     300|
|         4|       Pixel|     400|
+----------+------------+--------+

+--------+----------+
|brand_id|brand_name|
+--------+----------+
|     100|     Apple|
|     200|   Samsung|
|     400|    Google|
|     500|      Sony|
+--------+----------+

+--------+----------+
|store_id|store_name|
+--------+----------+
|    1000|  Poorvika|
|    2000| Sangeetha|
|    4000|    Amazon|
|    5000|  FlipKart|
+--------+----------+



In [5]:

# in any spark application, there will be ONLY ONE spark context
# and as many spark sessions allowed

spark2 = spark.newSession()

In [7]:
spark

In [8]:
# we created productDf using spark.createDataFrame
# create product temp table in spark session
# products is temp view, private to spark session, means we cannot access from spark 2
productDf.createOrReplaceTempView("products")

In [9]:

# now access products from session /knew it will work
spark.sql("SELECT * FROM products").show()


+----------+------------+--------+
|product_id|product_name|brand_id|
+----------+------------+--------+
|         1|      iPhone|     100|
|         2|      Galaxy|     200|
|         3|       Redme|     300|
|         4|       Pixel|     400|
+----------+------------+--------+



In [10]:

# now try to access products from spark2, IT WILL FAIL, as products table private to spark session
spark2.sql("SELECT * FROM products").show() # error  AnalysisException: Table or view not found: products; 


AnalysisException: Table or view not found: products; line 1 pos 14;
'Project [*]
+- 'UnresolvedRelation [products], [], false


In [11]:
# now create global temp view global_temp that can be shared across multiple sessions on same notebook

brandDf.createOrReplaceGlobalTempView ("brands")

spark.sql("SHOW TABLES IN global_temp").show()

+-----------+---------+-----------+
|   database|tableName|isTemporary|
+-----------+---------+-----------+
|global_temp|   brands|       true|
|           | products|       true|
+-----------+---------+-----------+



In [12]:
# MUST prefix global_temp to access global temp view
spark.sql("SELECT * FROM global_temp.brands").show()


+--------+----------+
|brand_id|brand_name|
+--------+----------+
|     100|     Apple|
|     200|   Samsung|
|     400|    Google|
|     500|      Sony|
+--------+----------+



In [13]:

# access global temp from spark2 session
spark2.sql("SELECT * FROM global_temp.brands").show()

+--------+----------+
|brand_id|brand_name|
+--------+----------+
|     100|     Apple|
|     200|   Samsung|
|     400|    Google|
|     500|      Sony|
+--------+----------+



In [None]:
# DIY: create a new session from spark/spark2 called spark3
# DIY: create global temp for stores and list stores from spark, spark2 and spark3 sessions
# DIY: Try in scala
