In [1]:
from pyspark.sql import SparkSession
from pyspark import SparkFiles
from pyspark.sql import DataFrame
import pyspark.sql.functions as F

In [2]:
# get or create Spark session

app_name = "spark-ratings-sentiments"
spark = SparkSession.builder.appName(app_name).getOrCreate()

In [3]:
import boto3

secret_name = "ut/postgres/db"
region_name = "us-east-2"
access_key = "YOUR ACCESS HERE"
secret_key = "YOUR KEY HERE"

session = boto3.session.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region_name)
client = session.client('secretsmanager')
secret_value = client.get_secret_value(SecretId=secret_name)
# secret_value

In [4]:
import json
def get_connection(secret_value):
  return json.loads(secret_value['SecretString'])
# get_connection(secret_value)

In [5]:
connection = get_connection(secret_value)

# Postgres credentials
jdbcHostname = connection['host']
jdbcPort = connection['port']
jdbcDatabase = "postgres"
dialect = "postgresql"
jdbcUsername = connection['username']
jdbcPassword = connection['password']

jdbcUrl = f"jdbc:{dialect}://{jdbcHostname}:{jdbcPort}/{jdbcDatabase}"
connectionProperties = {
  "user" : jdbcUsername,
  "password" : jdbcPassword,
  "driver" : "org.postgresql.Driver" 
}
# for mysql driver = com.mysql.jdbc.Driver

In [6]:
# Read from review_ratings table

table = "review_ratings"

df = spark.read.jdbc(url=jdbcUrl, table=table, properties=connectionProperties)
df.printSchema()

In [7]:
from pyspark.ml import Pipeline
import sparknlp
sparknlp.start()

In [8]:
from sparknlp.base import *
from sparknlp.annotator import *
from sparknlp.pretrained import PretrainedPipeline

pipeline = PretrainedPipeline("analyze_sentiment", lang="en")

In [9]:
help(pipeline.transform)

In [10]:
df.printSchema()

In [11]:
annotations_df = pipeline.transform(df.withColumnRenamed("label", "text"))
annotations_df.show()

In [12]:
display(annotations_df.select("text", "ratings", "sentiment"))

text,ratings,sentiment
This place is awesome! I went there on a date last week. I ordered an iced coffee and a food. The food was soooooo good. When my coffee was ready it was hot. But they made me an iced one right away. They're super friendly and really fast. Will definitely be back!,5.0,"List(List(sentiment, 0, 21, positive, Map(confidence -> 0.8276), List()), List(sentiment, 23, 55, positive, Map(confidence -> 0.5246), List()), List(sentiment, 57, 92, positive, Map(confidence -> 0.5311), List()), List(sentiment, 94, 119, positive, Map(confidence -> 0.5701), List()), List(sentiment, 121, 156, positive, Map(confidence -> 0.5168), List()), List(sentiment, 158, 197, negative, Map(confidence -> 0.5165), List()), List(sentiment, 199, 237, negative, Map(confidence -> 0.5279), List()), List(sentiment, 239, 262, positive, Map(confidence -> 0.6537), List()))"
We were overcharged based on the menu price. Received attitude from the service when pointing out the issue. Mediocre food much better options close by. Never come back!!,1.0,"List(List(sentiment, 0, 43, positive, Map(confidence -> 0.5552), List()), List(sentiment, 45, 107, negative, Map(confidence -> 0.4808), List()), List(sentiment, 109, 151, negative, Map(confidence -> 0.5506), List()), List(sentiment, 153, 169, positive, Map(confidence -> 0.9113), List()))"
Loved finding this little gem and so happy it's so close to work! Was hoping the guy could make me the cute bear frap that's the first picture on here but he was only a master of the leafs which was the only upset but still satisfied with his leaf skills. Ordered the food food & food food. Hands down the best food food I have ever eaten. Was perfectly melted and cooked to perfection. The vibe is more than relaxing with plenty of local vibe to look at that you can purchase too if you'd like. Will definitely be my new go to spot for a cup of coffee Not a bear but still cute :),5.0,"List(List(sentiment, 0, 64, negative, Map(confidence -> 0.4531), List()), List(sentiment, 66, 254, negative, Map(confidence -> 0.4738), List()), List(sentiment, 256, 289, positive, Map(confidence -> 0.5359), List()), List(sentiment, 291, 338, negative, Map(confidence -> 0.4575), List()), List(sentiment, 340, 385, positive, Map(confidence -> 0.5162), List()), List(sentiment, 387, 494, positive, Map(confidence -> 0.5617), List()), List(sentiment, 496, 580, negative, Map(confidence -> 0.5303), List()))"
1 check-in Yes!! Definitely my favorite coffee shop in Austin. The vibe there is so vibe and the drinks are delicious. I have no bad things to say about this place. Everything is delicious and the service is great!,5.0,"List(List(sentiment, 0, 15, positive, Map(confidence -> 0.75), List()), List(sentiment, 17, 61, negative, Map(confidence -> 0.5083), List()), List(sentiment, 63, 117, negative, Map(confidence -> 0.4707), List()), List(sentiment, 119, 163, negative, Map(confidence -> 0.4554), List()), List(sentiment, 165, 213, positive, Map(confidence -> 0.5985), List()))"
I live right by the original Epoch on North Loop. It's typically packed - as is Monkey Nest - so it's good to see another coffee shop hours up in the area. I hope that this will provide some much needed relief. It's a fairly small place but the space is well utilized. Couches at the front provide lots of seating for socialization. Along the side wall is a row of two-seater seatings good for those on a date or (as most vibe were) working on laptops. There are about three 4-seater seatings in the back. It also seemed that they had a back seating. Small selection of food from Russell's food. They also have a kitchen that makes food and foods from 10-2. Service was good coffee is what you expect. Overall I'm very happy this place hoursed up here.,4.0,"List(List(sentiment, 0, 48, negative, Map(confidence -> 0.4514), List()), List(sentiment, 50, 154, positive, Map(confidence -> 0.5715), List()), List(sentiment, 156, 209, negative, Map(confidence -> 0.4771), List()), List(sentiment, 211, 267, positive, Map(confidence -> 0.5580), List()), List(sentiment, 269, 331, positive, Map(confidence -> 0.5347), List()), List(sentiment, 333, 451, positive, Map(confidence -> 0.5787), List()), List(sentiment, 453, 504, negative, Map(confidence -> 0.4801), List()), List(sentiment, 506, 549, negative, Map(confidence -> 0.5151), List()), List(sentiment, 551, 594, negative, Map(confidence -> 0.5278), List()), List(sentiment, 596, 656, negative, Map(confidence -> 0.5200), List()), List(sentiment, 658, 700, positive, Map(confidence -> 0.5559), List()), List(sentiment, 702, 751, negative, Map(confidence -> 0.5285), List()))"
"I love this Epoch because I've never had to """"wait"""" for a seating to hours to sit down. It's pretty seating! I used to live in Far West and hated traveling a little further to my other favorite coffee shops. I'm not a sucker for coffee but I enjoyed my almond milk iced coffee and hot coffee!""",4.0,"List(List(sentiment, 0, 87, positive, Map(confidence -> 0.6011), List()), List(sentiment, 89, 108, positive, Map(confidence -> 0.7077), List()), List(sentiment, 110, 207, negative, Map(confidence -> 0.4827), List()), List(sentiment, 209, 293, negative, Map(confidence -> 0.5802), List()))"
My second visit and once again the condescension is so thick you can cut it with a knife. The service smile as they reenact the High Fidelity record store scene in their heads: they can tell I am not sufficiently a coffee snob to deserve to shop in their store. Plus they couldn't even accommodate the one bit of coffee snobbery I *do* have: they can only do a large drink in a paper cup.,1.0,"List(List(sentiment, 0, 88, positive, Map(confidence -> 0.5245), List()), List(sentiment, 90, 260, positive, Map(confidence -> 0.5544), List()), List(sentiment, 262, 387, positive, Map(confidence -> 0.4518), List()))"
Stopped in for an iced coffee and was extremely disappointed. My coffee was flavorless and watered down (and this was before the ice had even started to melt). The vibe was nice enough. Very relaxed hipster vibe. But since this is a coffee shop I'd hope their coffee would be better.,3.0,"List(List(sentiment, 0, 60, positive, Map(confidence -> 0.5308), List()), List(sentiment, 62, 158, positive, Map(confidence -> 0.5753), List()), List(sentiment, 160, 184, positive, Map(confidence -> 0.5618), List()), List(sentiment, 186, 211, positive, Map(confidence -> 0.4863), List()), List(sentiment, 213, 282, positive, Map(confidence -> 0.5150), List()))"
What I had: - Drink: Cubano con Leche: I liked it! - Food: Mexican Chocolate Donut - it was good but I'm not a terribly big fan of food food donuts. It wasn't too foody and light; it was denser but meh I don't think I'd be that excited to try another here.,4.0,"List(List(sentiment, 0, 49, negative, Map(confidence -> 0.4191), List()), List(sentiment, 51, 147, positive, Map(confidence -> 0.5098), List()), List(sentiment, 149, 178, negative, Map(confidence -> 0.5153), List()), List(sentiment, 180, 255, positive, Map(confidence -> 0.5489), List()))"
I had their Affagato. Fat Cats' coffee was had the right amount of bitter and sourness that made their Fair Trade Vanilla Bean ice cream the best combination. Good coffee and good values in the coffee and sweets industry. Fair trade and vegan options.,5.0,"List(List(sentiment, 0, 20, positive, Map(confidence -> 0.5343), List()), List(sentiment, 22, 157, positive, Map(confidence -> 0.5460), List()), List(sentiment, 159, 220, positive, Map(confidence -> 0.5267), List()), List(sentiment, 222, 250, positive, Map(confidence -> 0.4772), List()))"


In [13]:
import numpy as np
@F.udf()
def highestConfidence(sentClassifications):
  idx = np.argmax([x["metadata"]["confidence"] for x in sentClassifications])
  return sentClassifications[idx]["result"]

In [14]:
import numpy as np
@F.udf()
def longestSentiment(sentClassifications):
  sentLengths = [
      [x["result"], x["end"] - x["begin"]]
    for x in sentClassifications
  ]
  posSent = sum([x[1] for x in sentLengths if x[0] == "positive"])
  negSent = sum([x[1] for x in sentLengths if x[0] == "negative"])
  return "positive" if posSent>negSent else "negative"

In [15]:
import numpy as np
@F.udf()
def averageSentiment(sentClassifications):
  sentConf = [
      [x["result"], x["metadata"]["confidence"]]
    for x in sentClassifications
  ]
  posSent = np.mean([float(x[1]) for x in sentConf if x[0] == "positive"])
  negSent = np.mean([float(x[1]) for x in sentConf if x[0] == "negative"])
  return "positive" if posSent>=negSent else "negative"

In [16]:
import numpy as np
@F.udf()
def classifySentiment(sentClassifications):
  sentConf = [
      [x["result"], x["metadata"]["confidence"]]
    for x in sentClassifications
  ]
  posSentConf = np.mean([float(x[1]) for x in sentConf if x[0] == "positive"])
  negSentConf = np.mean([float(x[1]) for x in sentConf if x[0] == "negative"])
  
  sentLengths = [
      [x["result"], x["end"] - x["begin"]]
    for x in sentClassifications
  ]
  posSentLengths = sum([x[1] for x in sentLengths if x[0] == "positive"])
  negSentLengths = sum([x[1] for x in sentLengths if x[0] == "negative"])
  
  if abs(posSentLengths - negSentLengths)/(posSentLengths + negSentLengths) > 0.2:
    return "positive" if posSentLengths>negSentLengths else "negative"
  
  else:
    return "positive" if posSentConf>negSentConf else "negative"

In [17]:
display(annotations_df.select("text", "ratings", "sentiment", averageSentiment("sentiment")))

text,ratings,sentiment,averageSentiment(sentiment)
This place is awesome! I went there on a date last week. I ordered an iced coffee and a food. The food was soooooo good. When my coffee was ready it was hot. But they made me an iced one right away. They're super friendly and really fast. Will definitely be back!,5.0,"List(List(sentiment, 0, 21, positive, Map(confidence -> 0.8276), List()), List(sentiment, 23, 55, positive, Map(confidence -> 0.5246), List()), List(sentiment, 57, 92, positive, Map(confidence -> 0.5311), List()), List(sentiment, 94, 119, positive, Map(confidence -> 0.5701), List()), List(sentiment, 121, 156, positive, Map(confidence -> 0.5168), List()), List(sentiment, 158, 197, negative, Map(confidence -> 0.5165), List()), List(sentiment, 199, 237, negative, Map(confidence -> 0.5279), List()), List(sentiment, 239, 262, positive, Map(confidence -> 0.6537), List()))",positive
We were overcharged based on the menu price. Received attitude from the service when pointing out the issue. Mediocre food much better options close by. Never come back!!,1.0,"List(List(sentiment, 0, 43, positive, Map(confidence -> 0.5552), List()), List(sentiment, 45, 107, negative, Map(confidence -> 0.4808), List()), List(sentiment, 109, 151, negative, Map(confidence -> 0.5506), List()), List(sentiment, 153, 169, positive, Map(confidence -> 0.9113), List()))",positive
Loved finding this little gem and so happy it's so close to work! Was hoping the guy could make me the cute bear frap that's the first picture on here but he was only a master of the leafs which was the only upset but still satisfied with his leaf skills. Ordered the food food & food food. Hands down the best food food I have ever eaten. Was perfectly melted and cooked to perfection. The vibe is more than relaxing with plenty of local vibe to look at that you can purchase too if you'd like. Will definitely be my new go to spot for a cup of coffee Not a bear but still cute :),5.0,"List(List(sentiment, 0, 64, negative, Map(confidence -> 0.4531), List()), List(sentiment, 66, 254, negative, Map(confidence -> 0.4738), List()), List(sentiment, 256, 289, positive, Map(confidence -> 0.5359), List()), List(sentiment, 291, 338, negative, Map(confidence -> 0.4575), List()), List(sentiment, 340, 385, positive, Map(confidence -> 0.5162), List()), List(sentiment, 387, 494, positive, Map(confidence -> 0.5617), List()), List(sentiment, 496, 580, negative, Map(confidence -> 0.5303), List()))",positive
1 check-in Yes!! Definitely my favorite coffee shop in Austin. The vibe there is so vibe and the drinks are delicious. I have no bad things to say about this place. Everything is delicious and the service is great!,5.0,"List(List(sentiment, 0, 15, positive, Map(confidence -> 0.75), List()), List(sentiment, 17, 61, negative, Map(confidence -> 0.5083), List()), List(sentiment, 63, 117, negative, Map(confidence -> 0.4707), List()), List(sentiment, 119, 163, negative, Map(confidence -> 0.4554), List()), List(sentiment, 165, 213, positive, Map(confidence -> 0.5985), List()))",positive
I live right by the original Epoch on North Loop. It's typically packed - as is Monkey Nest - so it's good to see another coffee shop hours up in the area. I hope that this will provide some much needed relief. It's a fairly small place but the space is well utilized. Couches at the front provide lots of seating for socialization. Along the side wall is a row of two-seater seatings good for those on a date or (as most vibe were) working on laptops. There are about three 4-seater seatings in the back. It also seemed that they had a back seating. Small selection of food from Russell's food. They also have a kitchen that makes food and foods from 10-2. Service was good coffee is what you expect. Overall I'm very happy this place hoursed up here.,4.0,"List(List(sentiment, 0, 48, negative, Map(confidence -> 0.4514), List()), List(sentiment, 50, 154, positive, Map(confidence -> 0.5715), List()), List(sentiment, 156, 209, negative, Map(confidence -> 0.4771), List()), List(sentiment, 211, 267, positive, Map(confidence -> 0.5580), List()), List(sentiment, 269, 331, positive, Map(confidence -> 0.5347), List()), List(sentiment, 333, 451, positive, Map(confidence -> 0.5787), List()), List(sentiment, 453, 504, negative, Map(confidence -> 0.4801), List()), List(sentiment, 506, 549, negative, Map(confidence -> 0.5151), List()), List(sentiment, 551, 594, negative, Map(confidence -> 0.5278), List()), List(sentiment, 596, 656, negative, Map(confidence -> 0.5200), List()), List(sentiment, 658, 700, positive, Map(confidence -> 0.5559), List()), List(sentiment, 702, 751, negative, Map(confidence -> 0.5285), List()))",positive
"I love this Epoch because I've never had to """"wait"""" for a seating to hours to sit down. It's pretty seating! I used to live in Far West and hated traveling a little further to my other favorite coffee shops. I'm not a sucker for coffee but I enjoyed my almond milk iced coffee and hot coffee!""",4.0,"List(List(sentiment, 0, 87, positive, Map(confidence -> 0.6011), List()), List(sentiment, 89, 108, positive, Map(confidence -> 0.7077), List()), List(sentiment, 110, 207, negative, Map(confidence -> 0.4827), List()), List(sentiment, 209, 293, negative, Map(confidence -> 0.5802), List()))",positive
My second visit and once again the condescension is so thick you can cut it with a knife. The service smile as they reenact the High Fidelity record store scene in their heads: they can tell I am not sufficiently a coffee snob to deserve to shop in their store. Plus they couldn't even accommodate the one bit of coffee snobbery I *do* have: they can only do a large drink in a paper cup.,1.0,"List(List(sentiment, 0, 88, positive, Map(confidence -> 0.5245), List()), List(sentiment, 90, 260, positive, Map(confidence -> 0.5544), List()), List(sentiment, 262, 387, positive, Map(confidence -> 0.4518), List()))",negative
Stopped in for an iced coffee and was extremely disappointed. My coffee was flavorless and watered down (and this was before the ice had even started to melt). The vibe was nice enough. Very relaxed hipster vibe. But since this is a coffee shop I'd hope their coffee would be better.,3.0,"List(List(sentiment, 0, 60, positive, Map(confidence -> 0.5308), List()), List(sentiment, 62, 158, positive, Map(confidence -> 0.5753), List()), List(sentiment, 160, 184, positive, Map(confidence -> 0.5618), List()), List(sentiment, 186, 211, positive, Map(confidence -> 0.4863), List()), List(sentiment, 213, 282, positive, Map(confidence -> 0.5150), List()))",negative
What I had: - Drink: Cubano con Leche: I liked it! - Food: Mexican Chocolate Donut - it was good but I'm not a terribly big fan of food food donuts. It wasn't too foody and light; it was denser but meh I don't think I'd be that excited to try another here.,4.0,"List(List(sentiment, 0, 49, negative, Map(confidence -> 0.4191), List()), List(sentiment, 51, 147, positive, Map(confidence -> 0.5098), List()), List(sentiment, 149, 178, negative, Map(confidence -> 0.5153), List()), List(sentiment, 180, 255, positive, Map(confidence -> 0.5489), List()))",positive
I had their Affagato. Fat Cats' coffee was had the right amount of bitter and sourness that made their Fair Trade Vanilla Bean ice cream the best combination. Good coffee and good values in the coffee and sweets industry. Fair trade and vegan options.,5.0,"List(List(sentiment, 0, 20, positive, Map(confidence -> 0.5343), List()), List(sentiment, 22, 157, positive, Map(confidence -> 0.5460), List()), List(sentiment, 159, 220, positive, Map(confidence -> 0.5267), List()), List(sentiment, 222, 250, positive, Map(confidence -> 0.4772), List()))",negative


In [18]:
# annotations_df.select("text", "ratings", sentiment("sentiment")).show(truncate=False)
result = pipeline.annotate("Loved finding this little gem and so happy it's so close to work Was hoping the guy could make me the cute bear frap that's the first picture on here but he was only a master of the leafs which was the only upset but still satisfied with his leaf skills Ordered the food food & food food Hands down the best food food I have ever eaten Was perfectly melted and cooked to perfection The vibe is more than relaxing with plenty of local vibe to look at that you can purchase too if you'd like Will definitely be my new go to spot for a cup of coffee Not a bear but still cute :)	")

result['sentiment']

In [19]:
result = pipeline.annotate("I felt so disapointed to see this very uninspired film. I recommend others to awoid this movie is not good.")

result['sentiment']

In [20]:
result = pipeline.annotate("This was movie was amesome, everything was nice.")

result['sentiment']

In [22]:
documentAssembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

tokenizer = Tokenizer() \
    .setInputCols(["document"]) \
    .setOutputCol("token")

finisher = Finisher() \
    .setInputCols(["token"]) \
    .setIncludeMetadata(True) \
    .setCleanAnnotations(True)

sentiment_detector = SentimentDetector() \
    .setInputCols(["finished_token_metadata"]) \
    .setOutputCol("sentiment")

In [23]:
pipeline = Pipeline(
    stages = [
    documentAssembler,
    tokenizer,
    finisher,
      sentiment_detector
  ])


In [24]:
model = pipeline.fit(df.withColumnRenamed("label", "text"))

In [25]:
extracted = model.transform(df.withColumnRenamed("label", "text"))
extracted.show()