In [211]:
import findspark
from pyspark.sql import SparkSession

In [57]:
findspark.init()

In [58]:
spark=SparkSession.builder.getOrCreate()

# Primarily used for working with RDDs and performing low-level distributed data operations. It is the foundational object that connects Spark to the cluster.

In [59]:
spark

In [61]:
sc=spark.sparkContext

# The modern entry point for working with structured data (DataFrames, SQL, Datasets) and more user-friendly APIs introduced in Spark 2.0. It also internally creates a SparkContext (spark.sparkContext).

In [62]:
sc

first will work on sparkcontext transformations:(sc)

In [64]:
%%writefile example2.txt
first 
second line
the third line
then a fourth line

Overwriting example2.txt


In [68]:
text_file=sc.textFile('example2.txt')

In [69]:
text_file.collect()

['first ', 'second line', 'the third line', 'then a fourth line']

In [70]:
text_file.map(lambda line : line.split()).collect()

[['first'],
 ['second', 'line'],
 ['the', 'third', 'line'],
 ['then', 'a', 'fourth', 'line']]

In [72]:
text_file.flatMap(lambda line : line.split()).collect()

['first',
 'second',
 'line',
 'the',
 'third',
 'line',
 'then',
 'a',
 'fourth',
 'line']

In [73]:
%%writefile services.txt
#EventId    Timestamp    Customer   State    ServiceID    Amount
201       10/13/2017      100       NY       131          100.00
204       10/18/2017      700       TX       129          450.00
202       10/15/2017      203       CA       121          200.00
206       10/19/2017      202       CA       131          500.00
203       10/17/2017      101       NY       173          750.00
205       10/19/2017      202       TX       121          200.00

Writing services.txt


In [81]:
services=sc.textFile('services.txt')

In [82]:
services.collect()

['#EventId    Timestamp    Customer   State    ServiceID    Amount',
 '201       10/13/2017      100       NY       131          100.00',
 '204       10/18/2017      700       TX       129          450.00',
 '202       10/15/2017      203       CA       121          200.00',
 '206       10/19/2017      202       CA       131          500.00',
 '203       10/17/2017      101       NY       173          750.00',
 '205       10/19/2017      202       TX       121          200.00']

In [83]:
services.take(1)

['#EventId    Timestamp    Customer   State    ServiceID    Amount']

In [84]:
services.map(lambda x: x.split()).collect()

[['#EventId', 'Timestamp', 'Customer', 'State', 'ServiceID', 'Amount'],
 ['201', '10/13/2017', '100', 'NY', '131', '100.00'],
 ['204', '10/18/2017', '700', 'TX', '129', '450.00'],
 ['202', '10/15/2017', '203', 'CA', '121', '200.00'],
 ['206', '10/19/2017', '202', 'CA', '131', '500.00'],
 ['203', '10/17/2017', '101', 'NY', '173', '750.00'],
 ['205', '10/19/2017', '202', 'TX', '121', '200.00']]

In [86]:
# we need to remove the hashtag 

clean=services.map(lambda x:x[1:] if x[0]=='#' else x)

In [104]:
clean_serv=clean.map(lambda x:x.split())

In [105]:
clean_serv.collect()

[['EventId', 'Timestamp', 'Customer', 'State', 'ServiceID', 'Amount'],
 ['201', '10/13/2017', '100', 'NY', '131', '100.00'],
 ['204', '10/18/2017', '700', 'TX', '129', '450.00'],
 ['202', '10/15/2017', '203', 'CA', '121', '200.00'],
 ['206', '10/19/2017', '202', 'CA', '131', '500.00'],
 ['203', '10/17/2017', '101', 'NY', '173', '750.00'],
 ['205', '10/19/2017', '202', 'TX', '121', '200.00']]

In [108]:
#grabbing feilds
clean_serv.map(lambda x: (x[3],x[-1] )).collect()

[('State', 'Amount'),
 ('NY', '100.00'),
 ('TX', '450.00'),
 ('CA', '200.00'),
 ('CA', '500.00'),
 ('NY', '750.00'),
 ('TX', '200.00')]

In [107]:
clean.map(lambda x: (x[0],x[-1] )).collect()

[('E', 't'),
 ('2', '0'),
 ('2', '0'),
 ('2', '0'),
 ('2', '0'),
 ('2', '0'),
 ('2', '0')]

In [112]:
# Grab state and amounts
# Add them
clean_serv.map(lambda x: (x[3],x[-1]) ).reduceByKey(lambda a1,a2: float(a1) + float(a2)).collect()

[('NY', 850.0), ('CA', 700.0), ('State', 'Amount'), ('TX', 650.0)]

In [119]:
# Grab state and amounts
# Add them
# Get rid of ('State','Amount')
# Sort them by the amount value
clean_serv.map(lambda x: (x[3],x[-1]) )\
.reduceByKey(lambda a1,a2: float(a1) + float(a2))\
.filter(lambda x : not x[0]=='State')\
.sortBy(lambda amount:amount[1],ascending=False).collect()

[('NY', 850.0), ('CA', 700.0), ('TX', 650.0)]

In [128]:
clean_serv.map(lambda x:(x[0],x[1])).collect()

[('EventId', 'Timestamp'),
 ('201', '10/13/2017'),
 ('204', '10/18/2017'),
 ('202', '10/15/2017'),
 ('206', '10/19/2017'),
 ('203', '10/17/2017'),
 ('205', '10/19/2017')]

In [129]:
array=sc.parallelize([1,2,4,4,5,6,7])

In [130]:
array.collect()

[1, 2, 4, 4, 5, 6, 7]

In [132]:
array.filter(lambda x: x%2==0).collect()

[2, 4, 4, 6]

In [146]:
array.map(lambda x: x).collect()

[1, 2, 4, 4, 5, 6, 7]

In [137]:
array1=sc.parallelize([[1,2],[3,4],[5,6],[6,7]])

In [154]:
array1.flatMap(lambda x: x).collect()

[1, 2, 3, 4, 5, 6, 6, 7]

In [152]:
array1.map(lambda x: x).collect()

[[1, 2], [3, 4], [5, 6], [6, 7]]

In [156]:
string=sc.parallelize(['hello','world','pradeep','divya'])

In [157]:
string.collect()

['hello', 'world', 'pradeep', 'divya']

In [168]:
string.map(lambda x: x.upper()).collect()
string.map(lambda x: x.capitalize()).collect()

['Hello', 'World', 'Pradeep', 'Divya']

In [166]:
string.flatMap(lambda x: [word.capitalize() for word in x.split()] ).collect()

['Hello', 'World', 'Pradeep', 'Divya']

In [176]:
string.flatMap(lambda x: x.split() ).collect()

['hello', 'world', 'pradeep', 'divya']

In [179]:
array.union(string).collect()

[1, 2, 4, 4, 5, 6, 7, 'hello', 'world', 'pradeep', 'divya']

In [182]:
array.distinct().collect()

[1, 2, 4, 5, 6, 7]

In [183]:
array.collect()

[1, 2, 4, 4, 5, 6, 7]

In [184]:
array1.collect()

[[1, 2], [3, 4], [5, 6], [6, 7]]

In [190]:
array.take(3)

[1, 2, 4]

In [191]:
array.top(3)


[7, 6, 5]

# now will see the sparksession transformations and actions 


In [224]:
df=spark.read.csv("amazon.csv",header=True)
spark.conf.set('spark.sql.repl.eagerEval.enabled', True)

In [225]:
df

product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
B07JW9H4J1,Wayona Nylon Brai...,Computers&Accesso...,₹399,"₹1,099",64%,4.2,24269,High Compatibilit...,AG3D6O4STAQKAY2UV...,"Manav,Adarsh gupt...","R3HXWT0LRP0NMF,R2...","Satisfied,Chargin...",Looks durable Cha...,https://m.media-a...,https://www.amazo...
B098NS6PVG,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹349,43%,4.0,43994,Compatible with a...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...
B096MSW6CT,Sounce Fast Phone...,Computers&Accesso...,₹199,"₹1,899",90%,3.9,7928,【 Fast Charger& D...,AGU3BBQ2V2DDAMOAK...,"Kunal,Himanshu,vi...","R3J3EQQ9TZI5ZJ,R3...",Good speed for ea...,Not quite durable...,https://m.media-a...,https://www.amazo...
B08HDJ86NZ,boAt Deuce USB 30...,Computers&Accesso...,₹329,₹699,53%,4.2,94363,The boAt Deuce US...,AEWAZDZZJLQUYVOVG...,"Omkar dhale,JD,HE...","R3EEUZKKK9J36I,R3...","Good product,Good...","Good product,long...",https://m.media-a...,https://www.amazo...
B08CF3B7N1,Portronics Konnec...,Computers&Accesso...,₹154,₹399,61%,4.2,16905,[CHARGE & SYNC FU...,AE3Q6KSUK5P75D5HF...,"rahuls6099,Swasat...","R1BP4L2HH9TFUP,R1...",As good as origin...,Bought this inste...,https://m.media-a...,https://www.amazo...
B08Y1TFSP6,pTron Solero TB30...,Computers&Accesso...,₹149,"₹1,000",85%,3.9,24871,Fast Charging & D...,AEQ2YMXSZWEOHK2EH...,"Jayesh,Rajesh k.,...","R7S8ANNSDPR40,R3C...","It's pretty good,...",It's a good produ...,https://m.media-a...,https://www.amazo...
B08WRWPM22,boAt Micro USB 55...,Computers&Accesso...,₹176.63,₹499,65%,4.1,15188,It Ensures High S...,AG7C6DAADCTRQJG2B...,"Vivek kumar,Amazo...","R8E73K2KWJRDS,RSD...","Long durable.,goo...",Build quality is ...,https://m.media-a...,https://www.amazo...
B08DDRGWTJ,MI Usb Type-C Cab...,Computers&Accesso...,₹229,₹299,23%,4.3,30411,1m long Type-C US...,AHW6E5LQ2BDYOIVLA...,"Pavan A H,Jayesh ...","R2X090D1YHACKR,R3...",Worth for money -...,Worth for money -...,https://m.media-a...,https://www.amazo...
B008IFXQFU,TP-Link USB WiFi ...,Computers&Accesso...,₹499,₹999,50%,4.2,179691,USB WiFi Adapter ...,AGV3IEFANZCKECFGU...,"Azhar JuMan,Aniru...","R1LW6NWSVTVZ2H,R3...",Works on linux fo...,I use this to con...,https://m.media-a...,https://www.amazo...
B082LZGK39,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹299,33%,4.0,43994,Universal Compati...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...


In [208]:
type(df)

pyspark.sql.dataframe.DataFrame

In [214]:
from pyspark.sql import *

from datetime import datetime, date
import pandas as pd
df2 = spark.createDataFrame([
    Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)),
    Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)),
    Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0))
])

In [218]:
df2.show()

+---+---+-------+----------+-------------------+
|  a|  b|      c|         d|                  e|
+---+---+-------+----------+-------------------+
|  1|2.0|string1|2000-01-01|2000-01-01 12:00:00|
|  2|3.0|string2|2000-02-01|2000-01-02 12:00:00|
|  4|5.0|string3|2000-03-01|2000-01-03 12:00:00|
+---+---+-------+----------+-------------------+



In [222]:
df2.select(df2.a)

a
1
2
4


In [231]:
df.select(df.rating).show(100)

+------+
|rating|
+------+
|   4.2|
|   4.0|
|   3.9|
|   4.2|
|   4.2|
|   3.9|
|   4.1|
|   4.3|
|   4.2|
|   4.0|
|   4.3|
|   4.2|
|   4.4|
|   4.2|
|   4.1|
|   4.4|
|   4.2|
|   4.0|
|   4.1|
|   4.3|
|   4.5|
|   3.7|
|   4.3|
|   4.0|
|   4.3|
|   4.2|
|   4.2|
|   4.0|
|   4.4|
|   4.3|
|   4.5|
|   4.0|
|   4.3|
|   4.3|
|   3.9|
|   3.9|
|   4.4|
|   4.0|
|   4.2|
|   4.2|
|   4.5|
|   4.3|
|   4.2|
|   4.3|
|   4.0|
|   3.3|
|   4.1|
|   4.4|
|   3.6|
|   4.2|
|   4.4|
|   4.2|
|   4.3|
|   4.2|
|   4.1|
|   3.7|
|   4.2|
|   4.2|
|   4.2|
|   4.3|
|   3.7|
|   4.3|
|   4.0|
|   4.2|
|   4.2|
|   4.4|
|   4.1|
|   4.3|
|   4.2|
|   4.3|
|   4.5|
|   4.1|
|   4.2|
|   4.0|
|   4.1|
|   4.1|
|   4.0|
|   4.1|
|   3.9|
|   4.0|
|   4.2|
|   4.0|
|   3.4|
|   4.4|
|   4.2|
|   4.2|
|   4.3|
|   4.3|
|   4.5|
|   4.2|
|   3.6|
|   4.2|
|   4.2|
|   4.1|
|   4.2|
|   3.7|
|   3.8|
|   3.7|
|   4.5|
|   4.1|
+------+
only showing top 100 rows



In [241]:
df.count()

1465

In [242]:
df.describe()

summary,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
count,1465,1465,1465,1465,1465,1465,1465,1463,1465,1465,1465,1465,1465,1465,1465,1465
mean,,,,,,,4.097299218194752,345.4575,67.88,5.564705882352941,4.111111111111111,59.446666666666665,63.53333333333334,216.0,2.5,
stddev,,,,,,,0.29334696955004685,268.659731195697,142.61645066401,5.524823659682563,0.19649710204252682,196.8218805587089,125.72179483673344,357.05461767074235,,
min,B002PD61Y4,!!1000 Watt/2000-...,123 Sports Mode,100 Sports Modes,100 + Sports Modes,150+ Watch Faces,100 Sports Mode ...,100 Watch Faces-...,Heart Rate Monit...,2 Analog sticks,1-step pairing,1.7 meter USB ca...,(B) Covers Full ...,(3rd Gen); iPad ...,I would say tha...,1 Magnetic Charger
max,B0BR4F878Q,tizum HDMI to VGA...,Toys&Games|Arts&C...,₹999,₹999,"₹6,490",₹90,"₹9,999",🥇【Kindly NOTE be...,₹999,"🤘🏻🤘🏻,Kabi,Man...","₹3,999","💥,Considering th...",🔸Everything is n...,https://m.media-a...,"🌟🌟🌟🌟,Good pro..."


In [247]:
df.dropDuplicates().count()

1459

In [249]:
df=df.dropna()

In [250]:
df.count()

1463

In [258]:
df.printSchema()

root
 |-- product_id: string (nullable = true)
 |-- product_name: string (nullable = true)
 |-- category: string (nullable = true)
 |-- discounted_price: string (nullable = true)
 |-- actual_price: string (nullable = true)
 |-- discount_percentage: string (nullable = true)
 |-- rating: string (nullable = true)
 |-- rating_count: string (nullable = true)
 |-- about_product: string (nullable = true)
 |-- user_id: string (nullable = true)
 |-- user_name: string (nullable = true)
 |-- review_id: string (nullable = true)
 |-- review_title: string (nullable = true)
 |-- review_content: string (nullable = true)
 |-- img_link: string (nullable = true)
 |-- product_link: string (nullable = true)



In [259]:
df

product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
B07JW9H4J1,Wayona Nylon Brai...,Computers&Accesso...,₹399,"₹1,099",64%,4.2,24269,High Compatibilit...,AG3D6O4STAQKAY2UV...,"Manav,Adarsh gupt...","R3HXWT0LRP0NMF,R2...","Satisfied,Chargin...",Looks durable Cha...,https://m.media-a...,https://www.amazo...
B098NS6PVG,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹349,43%,4.0,43994,Compatible with a...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...
B096MSW6CT,Sounce Fast Phone...,Computers&Accesso...,₹199,"₹1,899",90%,3.9,7928,【 Fast Charger& D...,AGU3BBQ2V2DDAMOAK...,"Kunal,Himanshu,vi...","R3J3EQQ9TZI5ZJ,R3...",Good speed for ea...,Not quite durable...,https://m.media-a...,https://www.amazo...
B08HDJ86NZ,boAt Deuce USB 30...,Computers&Accesso...,₹329,₹699,53%,4.2,94363,The boAt Deuce US...,AEWAZDZZJLQUYVOVG...,"Omkar dhale,JD,HE...","R3EEUZKKK9J36I,R3...","Good product,Good...","Good product,long...",https://m.media-a...,https://www.amazo...
B08CF3B7N1,Portronics Konnec...,Computers&Accesso...,₹154,₹399,61%,4.2,16905,[CHARGE & SYNC FU...,AE3Q6KSUK5P75D5HF...,"rahuls6099,Swasat...","R1BP4L2HH9TFUP,R1...",As good as origin...,Bought this inste...,https://m.media-a...,https://www.amazo...
B08Y1TFSP6,pTron Solero TB30...,Computers&Accesso...,₹149,"₹1,000",85%,3.9,24871,Fast Charging & D...,AEQ2YMXSZWEOHK2EH...,"Jayesh,Rajesh k.,...","R7S8ANNSDPR40,R3C...","It's pretty good,...",It's a good produ...,https://m.media-a...,https://www.amazo...
B08WRWPM22,boAt Micro USB 55...,Computers&Accesso...,₹176.63,₹499,65%,4.1,15188,It Ensures High S...,AG7C6DAADCTRQJG2B...,"Vivek kumar,Amazo...","R8E73K2KWJRDS,RSD...","Long durable.,goo...",Build quality is ...,https://m.media-a...,https://www.amazo...
B08DDRGWTJ,MI Usb Type-C Cab...,Computers&Accesso...,₹229,₹299,23%,4.3,30411,1m long Type-C US...,AHW6E5LQ2BDYOIVLA...,"Pavan A H,Jayesh ...","R2X090D1YHACKR,R3...",Worth for money -...,Worth for money -...,https://m.media-a...,https://www.amazo...
B008IFXQFU,TP-Link USB WiFi ...,Computers&Accesso...,₹499,₹999,50%,4.2,179691,USB WiFi Adapter ...,AGV3IEFANZCKECFGU...,"Azhar JuMan,Aniru...","R1LW6NWSVTVZ2H,R3...",Works on linux fo...,I use this to con...,https://m.media-a...,https://www.amazo...
B082LZGK39,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹299,33%,4.0,43994,Universal Compati...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...


In [273]:
df.select('rating_count').filter('rating_count < 20000').sort('rating_count',ascending=False)

rating_count
992
989
976
974
974
974
97
97
97
959


In [274]:
# To execute SQL queries on DataFrames, Spark needs to know how to refer to the DataFrame within the SQL context.
#By using createOrReplaceTempView(), you can assign a name to the DataFrame that SQL queries can reference.
df.createOrReplaceTempView("amazon")

In [289]:
#using spark.SQl data filtering
spark.sql("select  product_id,rating_count from amazon where rating_count < 20000 order by rating_count desc").show(3)

+----------+------------+
|product_id|rating_count|
+----------+------------+
|B08L12N5H1|         992|
|B00ZRBWPA0|         989|
|B09Y5FZK9N|         976|
+----------+------------+
only showing top 3 rows



In [288]:
# using pyspark data filtering
df.select("product_id","rating_count").filter('rating_count < 20000').sort('rating_count',ascending=False).show(3)

+----------+------------+
|product_id|rating_count|
+----------+------------+
|B08L12N5H1|         992|
|B00ZRBWPA0|         989|
|B09Y5FZK9N|         976|
+----------+------------+
only showing top 3 rows



In [311]:
df.groupBy("category").count().orderBy('count',ascending =False)

category,count
Computers&Accesso...,231
Electronics|Mobil...,68
Electronics|HomeT...,63
Electronics|Headp...,52
Electronics|HomeT...,49
Electronics|Weara...,31
Home&Kitchen|Kitc...,27
Computers&Accesso...,24
Home&Kitchen|Kitc...,24
Electronics|HomeT...,24


In [312]:
spark.sql("select category,count(*) as count from amazon group by category order by count desc ")

category,count
Computers&Accesso...,231
Electronics|Mobil...,68
Electronics|HomeT...,63
Electronics|Headp...,52
Electronics|HomeT...,49
Electronics|Weara...,31
Home&Kitchen|Kitc...,27
Computers&Accesso...,24
Home&Kitchen|Kitc...,24
Electronics|HomeT...,24


In [315]:
# Rename columns to uppercase using toDF
#toDF(): This method renames all columns at once, making it more efficient than looping with withColumnRenamed().
df.toDF(*[col.upper() for col in df.columns])

PRODUCT_ID,PRODUCT_NAME,CATEGORY,DISCOUNTED_PRICE,ACTUAL_PRICE,DISCOUNT_PERCENTAGE,RATING,RATING_COUNT,ABOUT_PRODUCT,USER_ID,USER_NAME,REVIEW_ID,REVIEW_TITLE,REVIEW_CONTENT,IMG_LINK,PRODUCT_LINK
B07JW9H4J1,Wayona Nylon Brai...,Computers&Accesso...,₹399,"₹1,099",64%,4.2,24269,High Compatibilit...,AG3D6O4STAQKAY2UV...,"Manav,Adarsh gupt...","R3HXWT0LRP0NMF,R2...","Satisfied,Chargin...",Looks durable Cha...,https://m.media-a...,https://www.amazo...
B098NS6PVG,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹349,43%,4.0,43994,Compatible with a...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...
B096MSW6CT,Sounce Fast Phone...,Computers&Accesso...,₹199,"₹1,899",90%,3.9,7928,【 Fast Charger& D...,AGU3BBQ2V2DDAMOAK...,"Kunal,Himanshu,vi...","R3J3EQQ9TZI5ZJ,R3...",Good speed for ea...,Not quite durable...,https://m.media-a...,https://www.amazo...
B08HDJ86NZ,boAt Deuce USB 30...,Computers&Accesso...,₹329,₹699,53%,4.2,94363,The boAt Deuce US...,AEWAZDZZJLQUYVOVG...,"Omkar dhale,JD,HE...","R3EEUZKKK9J36I,R3...","Good product,Good...","Good product,long...",https://m.media-a...,https://www.amazo...
B08CF3B7N1,Portronics Konnec...,Computers&Accesso...,₹154,₹399,61%,4.2,16905,[CHARGE & SYNC FU...,AE3Q6KSUK5P75D5HF...,"rahuls6099,Swasat...","R1BP4L2HH9TFUP,R1...",As good as origin...,Bought this inste...,https://m.media-a...,https://www.amazo...
B08Y1TFSP6,pTron Solero TB30...,Computers&Accesso...,₹149,"₹1,000",85%,3.9,24871,Fast Charging & D...,AEQ2YMXSZWEOHK2EH...,"Jayesh,Rajesh k.,...","R7S8ANNSDPR40,R3C...","It's pretty good,...",It's a good produ...,https://m.media-a...,https://www.amazo...
B08WRWPM22,boAt Micro USB 55...,Computers&Accesso...,₹176.63,₹499,65%,4.1,15188,It Ensures High S...,AG7C6DAADCTRQJG2B...,"Vivek kumar,Amazo...","R8E73K2KWJRDS,RSD...","Long durable.,goo...",Build quality is ...,https://m.media-a...,https://www.amazo...
B08DDRGWTJ,MI Usb Type-C Cab...,Computers&Accesso...,₹229,₹299,23%,4.3,30411,1m long Type-C US...,AHW6E5LQ2BDYOIVLA...,"Pavan A H,Jayesh ...","R2X090D1YHACKR,R3...",Worth for money -...,Worth for money -...,https://m.media-a...,https://www.amazo...
B008IFXQFU,TP-Link USB WiFi ...,Computers&Accesso...,₹499,₹999,50%,4.2,179691,USB WiFi Adapter ...,AGV3IEFANZCKECFGU...,"Azhar JuMan,Aniru...","R1LW6NWSVTVZ2H,R3...",Works on linux fo...,I use this to con...,https://m.media-a...,https://www.amazo...
B082LZGK39,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹299,33%,4.0,43994,Universal Compati...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...


In [323]:
#this function is used for create a new column,or  to modify an existing column by applying a transformation to it. 
df.withColumn("literal",lit(100)) # creating new column
df.withColumn("rating",col('rating')+1) # modifying rating column by applying transformation, here we are adding 1 to all values.

product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
B07JW9H4J1,Wayona Nylon Brai...,Computers&Accesso...,₹399,"₹1,099",64%,5.2,24269,High Compatibilit...,AG3D6O4STAQKAY2UV...,"Manav,Adarsh gupt...","R3HXWT0LRP0NMF,R2...","Satisfied,Chargin...",Looks durable Cha...,https://m.media-a...,https://www.amazo...
B098NS6PVG,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹349,43%,5.0,43994,Compatible with a...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...
B096MSW6CT,Sounce Fast Phone...,Computers&Accesso...,₹199,"₹1,899",90%,4.9,7928,【 Fast Charger& D...,AGU3BBQ2V2DDAMOAK...,"Kunal,Himanshu,vi...","R3J3EQQ9TZI5ZJ,R3...",Good speed for ea...,Not quite durable...,https://m.media-a...,https://www.amazo...
B08HDJ86NZ,boAt Deuce USB 30...,Computers&Accesso...,₹329,₹699,53%,5.2,94363,The boAt Deuce US...,AEWAZDZZJLQUYVOVG...,"Omkar dhale,JD,HE...","R3EEUZKKK9J36I,R3...","Good product,Good...","Good product,long...",https://m.media-a...,https://www.amazo...
B08CF3B7N1,Portronics Konnec...,Computers&Accesso...,₹154,₹399,61%,5.2,16905,[CHARGE & SYNC FU...,AE3Q6KSUK5P75D5HF...,"rahuls6099,Swasat...","R1BP4L2HH9TFUP,R1...",As good as origin...,Bought this inste...,https://m.media-a...,https://www.amazo...
B08Y1TFSP6,pTron Solero TB30...,Computers&Accesso...,₹149,"₹1,000",85%,4.9,24871,Fast Charging & D...,AEQ2YMXSZWEOHK2EH...,"Jayesh,Rajesh k.,...","R7S8ANNSDPR40,R3C...","It's pretty good,...",It's a good produ...,https://m.media-a...,https://www.amazo...
B08WRWPM22,boAt Micro USB 55...,Computers&Accesso...,₹176.63,₹499,65%,5.1,15188,It Ensures High S...,AG7C6DAADCTRQJG2B...,"Vivek kumar,Amazo...","R8E73K2KWJRDS,RSD...","Long durable.,goo...",Build quality is ...,https://m.media-a...,https://www.amazo...
B08DDRGWTJ,MI Usb Type-C Cab...,Computers&Accesso...,₹229,₹299,23%,5.3,30411,1m long Type-C US...,AHW6E5LQ2BDYOIVLA...,"Pavan A H,Jayesh ...","R2X090D1YHACKR,R3...",Worth for money -...,Worth for money -...,https://m.media-a...,https://www.amazo...
B008IFXQFU,TP-Link USB WiFi ...,Computers&Accesso...,₹499,₹999,50%,5.2,179691,USB WiFi Adapter ...,AGV3IEFANZCKECFGU...,"Azhar JuMan,Aniru...","R1LW6NWSVTVZ2H,R3...",Works on linux fo...,I use this to con...,https://m.media-a...,https://www.amazo...
B082LZGK39,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹299,33%,5.0,43994,Universal Compati...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...


In [320]:
#this function is used for change the existing column name .
df1=df.withColumnRenamed('category','CATEGORY')

In [321]:
df1

product_id,product_name,CATEGORY,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link
B07JW9H4J1,Wayona Nylon Brai...,Computers&Accesso...,₹399,"₹1,099",64%,4.2,24269,High Compatibilit...,AG3D6O4STAQKAY2UV...,"Manav,Adarsh gupt...","R3HXWT0LRP0NMF,R2...","Satisfied,Chargin...",Looks durable Cha...,https://m.media-a...,https://www.amazo...
B098NS6PVG,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹349,43%,4.0,43994,Compatible with a...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...
B096MSW6CT,Sounce Fast Phone...,Computers&Accesso...,₹199,"₹1,899",90%,3.9,7928,【 Fast Charger& D...,AGU3BBQ2V2DDAMOAK...,"Kunal,Himanshu,vi...","R3J3EQQ9TZI5ZJ,R3...",Good speed for ea...,Not quite durable...,https://m.media-a...,https://www.amazo...
B08HDJ86NZ,boAt Deuce USB 30...,Computers&Accesso...,₹329,₹699,53%,4.2,94363,The boAt Deuce US...,AEWAZDZZJLQUYVOVG...,"Omkar dhale,JD,HE...","R3EEUZKKK9J36I,R3...","Good product,Good...","Good product,long...",https://m.media-a...,https://www.amazo...
B08CF3B7N1,Portronics Konnec...,Computers&Accesso...,₹154,₹399,61%,4.2,16905,[CHARGE & SYNC FU...,AE3Q6KSUK5P75D5HF...,"rahuls6099,Swasat...","R1BP4L2HH9TFUP,R1...",As good as origin...,Bought this inste...,https://m.media-a...,https://www.amazo...
B08Y1TFSP6,pTron Solero TB30...,Computers&Accesso...,₹149,"₹1,000",85%,3.9,24871,Fast Charging & D...,AEQ2YMXSZWEOHK2EH...,"Jayesh,Rajesh k.,...","R7S8ANNSDPR40,R3C...","It's pretty good,...",It's a good produ...,https://m.media-a...,https://www.amazo...
B08WRWPM22,boAt Micro USB 55...,Computers&Accesso...,₹176.63,₹499,65%,4.1,15188,It Ensures High S...,AG7C6DAADCTRQJG2B...,"Vivek kumar,Amazo...","R8E73K2KWJRDS,RSD...","Long durable.,goo...",Build quality is ...,https://m.media-a...,https://www.amazo...
B08DDRGWTJ,MI Usb Type-C Cab...,Computers&Accesso...,₹229,₹299,23%,4.3,30411,1m long Type-C US...,AHW6E5LQ2BDYOIVLA...,"Pavan A H,Jayesh ...","R2X090D1YHACKR,R3...",Worth for money -...,Worth for money -...,https://m.media-a...,https://www.amazo...
B008IFXQFU,TP-Link USB WiFi ...,Computers&Accesso...,₹499,₹999,50%,4.2,179691,USB WiFi Adapter ...,AGV3IEFANZCKECFGU...,"Azhar JuMan,Aniru...","R1LW6NWSVTVZ2H,R3...",Works on linux fo...,I use this to con...,https://m.media-a...,https://www.amazo...
B082LZGK39,Ambrane Unbreakab...,Computers&Accesso...,₹199,₹299,33%,4.0,43994,Universal Compati...,AECPFYFQVRUWC3KGN...,"ArdKn,Nirbhay kum...","RGIQEG07R9HS2,R1S...",A Good Braided Ca...,I ordered this ca...,https://m.media-a...,https://www.amazo...


In [324]:
spark.stop()

In [329]:
sc.stop()