<a href="https://colab.research.google.com/github/minamh9/Prompt-Engineering-with-GPT/blob/main/GPT_Amazon_Review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install python-dotenv
!pip install openai
!pip install tiktoken

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0
Collecting openai
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m985.2 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0
Collecting tiktoken
  Downloading tiktoken-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tiktoken
Successfully installed tiktoken-0.4.0


In [None]:
import os
from pathlib import Path
import openai
import requests
import uuid
import pandas as pd
import tiktoken
import sys
import gzip
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error


In [None]:
from config import OPENAI_KEY

In [None]:
if OPENAI_KEY:
  print("OpenAI Key loaded successfully!")
else:
  print("Failed to load OpenAI Key!")


OpenAI Key loaded successfully!


In [None]:
openai.api_key = OPENAI_KEY

In [None]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0 # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [None]:
def count_tokens(text: str ,encoding_name: str) -> int:  # change string to text
    encoding = tiktoken.get_encoding(encoding_name)
    token_count = len(encoding.encode(text))  # directly count tokens of the review text
    return token_count

In [None]:
# Load dataset from JSON gzip file
file_path = '/content/drive/MyDrive/AMAZON_FASHION.json.gz'

with gzip.open(file_path) as f:
  df = pd.read_json(f, lines = True, nrows= 100000)

In [None]:
#keep only few colmns that we need
df_new = df[['overall','verified','reviewTime','reviewText']]

In [None]:
#drop the not vefired rows since there is only 6%
df_drop = df_new.drop(df_new[df_new['verified']==False].index)
df = df_drop.reset_index(drop = True)

In [None]:
#drop NA, NAN's
df = df.dropna(subset=['reviewText', 'overall'])
print(df.shape)

(93601, 4)


In [None]:
df['overall'] = df['overall'] - 1
df_test = df.iloc [:1000, :].reset_index(drop=True)
print(df_test.shape)

(1000, 4)


In [None]:
responses = []
for text in df_test['reviewText'].iloc[:100]:
    prompt = f"Please evaluate the following review, providing a score from 0 to 4. \
    Note that 0 denotes a poor review while 4 signifies an excellent one.\
    Your response should simply be a single integer between 0 and 4. \
    If the review references any photographs or similar elements, please disregard them. \
    Here's the review: \"{text}\""
    response = get_completion(prompt)
    responses.append(response)

    time.sleep(10)
len(responses)

100

In [None]:
for text in df_test['reviewText'].iloc[900:1000]:
    prompt = f"Please evaluate the following review, providing a score from 0 to 4. \
    Note that 0 denotes a poor review while 4 signifies an excellent one.\
    Your response should simply be a single integer between 0 and 4. \
    If the review references any photographs or similar elements, please disregard them. \
    Here's the review: \"{text}\""
    response = get_completion(prompt)
    responses.append(response)

    time.sleep(5)
len(responses)

1000

In [None]:
df_test.loc[:999,'GPT_predictions'] = responses
df_test.to_csv('./gpt_results.csv', index=False)

In [None]:
df_test

Unnamed: 0,overall,verified,reviewTime,reviewText,GPT_predictions
0,4,True,"10 20, 2014",Exactly what I needed.,4
1,1,True,"09 28, 2014","I agree with the other review, the opening is ...",2
2,1,True,"08 24, 2014",too tiny an opening,2
3,4,True,"07 19, 2014",Exactly what I wanted.,4
4,3,True,"05 31, 2014",These little plastic backs work great. No mor...,4
...,...,...,...,...,...
995,0,True,"09 14, 2014","Didn't order, must have hit by mistake, had to...",2
996,2,True,"08 9, 2014",I bought these for a Halloween costume and for...,2
997,2,True,"05 14, 2014","The product is just okay for me, it would have...",2
998,4,True,"04 21, 2014",I already had a pair of these that I have owne...,4


In [None]:
# Create a dataframe to compare true vs predicted values
result_df = pd.DataFrame({'True': df_test['overall'], 'Predicted': df_test['GPT_predictions'],'Review': df_test['reviewText']})
result_df

Unnamed: 0,True,Predicted,Review
0,4,4,Exactly what I needed.
1,1,2,"I agree with the other review, the opening is ..."
2,1,2,too tiny an opening
3,4,4,Exactly what I wanted.
4,3,4,These little plastic backs work great. No mor...
...,...,...,...
995,0,2,"Didn't order, must have hit by mistake, had to..."
996,2,2,I bought these for a Halloween costume and for...
997,2,2,"The product is just okay for me, it would have..."
998,4,4,I already had a pair of these that I have owne...


In [None]:
result_df['Predicted'] = result_df['Predicted'].astype(int)

In [None]:
# Compute evaluation metrics to assess model performance
accuracy = accuracy_score(result_df['True'], result_df['Predicted'])
print("Accuracy:", accuracy)

precision = precision_score(result_df['True'], result_df['Predicted'], average='weighted')
print("Precision:", precision)

recall = recall_score(result_df['True'], result_df['Predicted'], average='weighted')
print("Recall: ", recall)

f1 = f1_score(result_df['True'], result_df['Predicted'], average='weighted')
print("F1_score:" , f1)

mae = mean_absolute_error(result_df['True'], result_df['Predicted'])
print("mae" , mae)


Accuracy: 0.531
Precision: 0.593706866530396
Recall:  0.531
F1_score: 0.5365194336864444
mae 0.557


In [None]:
idx = 2053
print(df['reviewText'].values[idx])
print(df['overall'].values[idx])

Velcro backs are no good. They don't stay on. Look old after one use.
0
