In [0]:
spark.catalog.listCatalogs()

[CatalogMetadata(name='spark_catalog', description=None)]

In [0]:
from pyspark.sql.types import StructType,StructField,IntegerType,StringType,DoubleType
schema=StructType([\
    StructField("Product_id",IntegerType(),True),
    StructField("ProductName",StringType(),True),
    StructField("Category",StringType(),True),
    StructField("Price",DoubleType(),True),
    StructField("Stcockquantity",IntegerType(),True)])

data=[
    (1,'laptop','electronics',999.99,50),
    (2,"smartphone","electronics",699.99,100),
    (3,"headphone","electronics",49.99,200),
    (4,"book","books",19.99,300)
]
df=spark.createDataFrame(data=data,schema=schema)
df.display()

Product_id,ProductName,Category,Price,Stcockquantity
1,laptop,electronics,999.99,50
2,smartphone,electronics,699.99,100
3,headphone,electronics,49.99,200
4,book,books,19.99,300


In [0]:
import pandas as pd
df2=pd.DataFrame(data=data,columns=schema)
df2


Unnamed: 0,"StructField('Product_id', IntegerType(), True)","StructField('ProductName', StringType(), True)","StructField('Category', StringType(), True)","StructField('Price', DoubleType(), True)","StructField('Stcockquantity', IntegerType(), True)"
0,1,laptop,electronics,999.99,50
1,2,smartphone,electronics,699.99,100
2,3,headphone,electronics,49.99,200
3,4,book,books,19.99,300


In [0]:
df.createOrReplaceTempView('product')

In [0]:
%sql
select * from product

Product_id,ProductName,Category,Price,Stcockquantity
1,laptop,electronics,999.99,50
2,smartphone,electronics,699.99,100
3,headphone,electronics,49.99,200
4,book,books,19.99,300


In [0]:
sqldf=spark.sql("select * from {table}",table=df)
sqldf.display()

Product_id,ProductName,Category,Price,Stcockquantity
1,laptop,electronics,999.99,50
2,smartphone,electronics,699.99,100
3,headphone,electronics,49.99,200
4,book,books,19.99,300


In [0]:
spark.table('product').display()

Product_id,ProductName,Category,Price,Stcockquantity
1,laptop,electronics,999.99,50
2,smartphone,electronics,699.99,100
3,headphone,electronics,49.99,200
4,book,books,19.99,300


In [0]:
from pyspark.sql.functions import col
sqldf1=spark.sql("select {column1},{column2} from {table}",table=df,column1=df["Product_id"],column2=df["ProductName"])
sqldf1.display()

Product_id,ProductName
1,laptop
2,smartphone
3,headphone
4,book


In [0]:
spark.sql("select Product_id,ProductName,Price*Stcockquantity as Total_cost from {table}",table=df).display()

Product_id,ProductName,Total_cost
1,laptop,49999.5
2,smartphone,69999.0
3,headphone,9998.0
4,book,5996.999999999999


In [0]:
from pyspark.sql.functions import col,expr
py_df=df.select(col("Product_id"),col("ProductName"),(expr("Price*Stcockquantity")).alias("Total_cost") )
py_df.display()
                

Product_id,ProductName,Total_cost
1,laptop,49999.5
2,smartphone,69999.0
3,headphone,9998.0
4,book,5996.999999999999


In [0]:
from pyspark.sql.functions import col,expr
from pyspark.sql import Row
product_data=[
    Row(product_id=1,product_name="laptop",unit_price=800),
    Row(product_id=2,product_name="samrtphone",unit_price=500),
    Row(product_id=3,product_name="tablet",unit_price=300),
    Row(product_id=3,product_name="desktop",unit_price=1000),
    Row(product_id=5,product_name="printer",unit_price=200),
]

sales_data=[
    Row(sale_id=101,product_id=1,quantity=5),
    Row(sale_id=102,product_id=4,quantity=8),
    Row(sale_id=103,product_id=3,quantity=3),
    Row(sale_id=104,product_id=2,quantity=6),
    Row(sale_id=105,product_id=5,quantity=7),
    Row(sale_id=106,product_id=6,quantity=7),
]
df1=spark.createDataFrame(product_data)
df2=spark.createDataFrame(sales_data)
df1.display()
df2.display()

product_id,product_name,unit_price
1,laptop,800
2,samrtphone,500
3,tablet,300
3,desktop,1000
5,printer,200


sale_id,product_id,quantity
101,1,5
102,4,8
103,3,3
104,2,6
105,5,7
106,6,7


In [0]:
joindf=spark.sql("select * from {table1} a join {table2} b on a.{joinkey}=b.{joinkey}",table1=df1,table2=df2,joinkey=df["product_id"])
joindf.display()

product_id,product_name,unit_price,sale_id,product_id.1,quantity
1,laptop,800,101,1,5
2,samrtphone,500,104,2,6
3,tablet,300,103,3,3
3,desktop,1000,103,3,3
5,printer,200,105,5,7
