# ChatGPT for sentiment analysis

In [2]:
import findspark
findspark.init()
from pyspark import SparkContext
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .master('local[*]') \
    .appName('Spark-Example-30-ChatGPT') \
    .getOrCreate()

In [3]:
# Bring in deps
import os 
from pyspark.sql import SparkSession
from working.apikey import apikey
import openai
import tiktoken

os.environ['OPENAI_API_KEY'] = apikey
openai.api_key = apikey

In [4]:
def num_tokens_from_string(string: str, encoding_model: str = "gpt-4o-mini") -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model(encoding_model)
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [8]:
t='{"name":"John","age":30,"car":null}'
num_tokens_from_string(t)

13

In [9]:
encoding = tiktoken.encoding_for_model("gpt-4o-mini")

In [12]:
# create openai instance
openai.api_key = apikey

# create opeinai prompt
prompt_task = 'Detect the sentiment of the sentences and return the result in JSON without any other explanations, '+\
              'in the format {"text":"sentence","sentiment":"sentiment"}\n'

text = """
The CS 777 class is awesome!
The CS 777 class is terrible!
The CS 777 class is ok!
The CS 777 is a challenging course.
The CS 777 class is a lot of fun but a lot of work.
The CS 777 course is quite enjoyable, but it demands a lot of effort.
"""
prompt = prompt_task + text 
# print number of tokens in prompt using split
print("Number of words in the prompt:",len(prompt.split()))
# print number of tokens in prompt using tiktoken
print("Number of tokens in the prompt:",num_tokens_from_string(prompt))

# create openai response using
response = openai.chat.completions.create(
    model="gpt-4o-mini",
    #model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.0,
)
r = response.choices[0].message.content

# print the first response
print("Result from OpenAI:")

print(r)


Number of words in the prompt: 73
Number of tokens in the prompt: 97
Result from OpenAI:
[
    {"text":"The CS 777 class is awesome!","sentiment":"positive"},
    {"text":"The CS 777 class is terrible!","sentiment":"negative"},
    {"text":"The CS 777 class is ok!","sentiment":"neutral"},
    {"text":"The CS 777 is a challenging course.","sentiment":"neutral"},
    {"text":"The CS 777 class is a lot of fun but a lot of work.","sentiment":"mixed"},
    {"text":"The CS 777 course is quite enjoyable, but it demands a lot of effort.","sentiment":"mixed"}
]


In [14]:
response

ChatCompletion(id='chatcmpl-AGDqLKNXu9thS2nCcB5qj1N8iY92X', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='[\n    {"text":"The CS 777 class is awesome!","sentiment":"positive"},\n    {"text":"The CS 777 class is terrible!","sentiment":"negative"},\n    {"text":"The CS 777 class is ok!","sentiment":"neutral"},\n    {"text":"The CS 777 is a challenging course.","sentiment":"mixed"},\n    {"text":"The CS 777 class is a lot of fun but a lot of work.","sentiment":"mixed"},\n    {"text":"The CS 777 course is quite enjoyable, but it demands a lot of effort.","sentiment":"mixed"}\n]', role='assistant', function_call=None, tool_calls=None, refusal=None))], created=1728429825, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_74ba47b4ac', usage=CompletionUsage(completion_tokens=125, prompt_tokens=114, total_tokens=239, prompt_tokens_details={'cached_tokens': 0}, completion_tokens_details={'rea

In [13]:
# create openai instance
openai.api_key = apikey

# create opeinai prompt
prompt_task = 'Detect the sentiment as (positive, negative, mixed, neutral) of the sentences and return the result in JSON without any other explanations, '+\
               'in the format {"text":"sentence","sentiment":"sentiment"}\n'

text = """
The CS 777 class is awesome!
The CS 777 class is terrible!
The CS 777 class is ok!
The CS 777 is a challenging course.
The CS 777 class is a lot of fun but a lot of work.
The CS 777 course is quite enjoyable, but it demands a lot of effort.
"""
prompt = prompt_task + text 
# print number of tokens in prompt using split
print("Number of words in the prompt:",len(prompt.split()))
# print number of tokens in prompt using tiktoken
print("Number of tokens in the prompt:",num_tokens_from_string(prompt))

# create openai response using
response = openai.chat.completions.create(
    #model="gpt-3.5-turbo",
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.0,
)
r = response.choices[0].message.content

# print the first response
print("Result from OpenAI:")

print(r)


Number of words in the prompt: 78
Number of tokens in the prompt: 107
Result from OpenAI:
[
    {"text":"The CS 777 class is awesome!","sentiment":"positive"},
    {"text":"The CS 777 class is terrible!","sentiment":"negative"},
    {"text":"The CS 777 class is ok!","sentiment":"neutral"},
    {"text":"The CS 777 is a challenging course.","sentiment":"mixed"},
    {"text":"The CS 777 class is a lot of fun but a lot of work.","sentiment":"mixed"},
    {"text":"The CS 777 course is quite enjoyable, but it demands a lot of effort.","sentiment":"mixed"}
]
