In this notebook, you will find all the code necessary to extract information from the two LLMs, ChatGPT-3.5 and ChatGPT-4. All the required functions are defined in this notebook, along with a brief explanation of each.

Finally, we provide an example of the steps to follow to obtain this information. For the sentiment dimension, we use Apple as an example, and for the preference dimension, we use Apple and Samsung as examples.


If you have any questions, please contact us at: jaumesanchez1@hotmail.com

# Setup

In [None]:
KEY_DEF = "Insert here you API KEY for ChatGPT"

#For more information, see https://openai.com/api/pricing/

In [None]:
!pip install openai==0.28

import os
import openai
import numpy as np
openai.api_key = KEY_DEF

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: openai
Successfully installed openai-0.28.0


# FUNCTIONS TO EXTRACT INFORMATION FROM THE LLM RESPONSES


In [None]:
import re
import time
import random

## Yes/No

In [None]:
def extract_numbers_yn(output):

  #output = output received by the LLM
  #It returns a vector with only 1 and 0.

    # Initialize an empty list to store numbers
    vector = []
    regex_yes = r'\byes\b'
    regex_no = r'\bno\b'

    # Split the output by newline characters to get individual lines
    lines = output.strip().split('\n')

    # Iterate over each line to extract the number
    for line in lines:
        # Check if the line contains 'yes' and 'no'
        contains_yes = re.search(regex_yes, line.lower())
        contains_no = re.search(regex_no, line.lower())

        # Consider the line only if it doesn't contain both 'yes' and 'no'
        if contains_yes and not contains_no:
            vector.append(1)  # Append 1 for 'yes'
        elif contains_no and not contains_yes:
            vector.append(0)  # Append 0 for 'no'
        # Ignore lines that do not contain either brand
        elif line.strip():  # This ensures non-blank lines are considered
            continue
    # Return the constructed vector
    return vector

In [None]:
def yes_no(M,N,selected_model, text):
  # M = times we repeat the same test
  # N = how many questions does the test have
  # selected_model = can be either "gpt-3.5-turbo" or 'gpt-4o'
  # text = suitable questions according to the format yes_no
  #It returns a matrix with only 1 and 0.

  matrix_yn = []
  incorrect_attempts = 0  # Counter for incorrect outputs
  correct_attempts = 0
  start_time = time.time()
  cont= 0

  for i in range(M):
      while True:  # Keep trying until a valid vector is obtained
          response = openai.ChatCompletion.create(
                      model=selected_model,
                      messages= text ,
                      temperature = 1,
                      top_p = 0.95,
                      max_tokens = 1000
                  )

          texto = response.choices[0].message['content']
          #texto = re.sub(r'\*\*', '', texto)
          vect = extract_numbers_yn(texto)

          # Check if the length of vect is 100
          if len(vect) == N:
              correct_attempts +=1
              matrix_yn.append(vect)
              #print("OK ##################### ", correct_attempts,texto)
              #print(vect)
              break  # Exit the inner loop and move to the next iteration of the outer loop
          else:
              incorrect_attempts += 1  # Increment the counter for incorrect outputs
              #print("NOT OK ##############", incorrect_attempts, texto)
              cont = cont + 1

  end_time = time.time()
  print("Yes/No Finished with a number of", incorrect_attempts, "incorrect attempts and it took", (end_time - start_time)/60, "minutes")
  return(np.array(matrix_yn))

## A/B

In [None]:
def extract_numbers_ab(output):
  #output = output received by the LLM
  #It returns a vector with only 1 and 0.

    # Initialize an empty list to store numbers
    vector = []

    # Split the output by newline characters to get individual lines
    lines = output.strip().split('\n')

    # If splitting by lines didn't work, try splitting by commas
    if len(lines) == 1:
        lines = output.strip().split(',')

    # Iterate over each line or comma-separated element to extract the number
    for element in lines:

        # Check if the element contains 'a' or 'b' as separate words
        if re.search(r'\ba\)', element.lower()):
            vector.append(1)  # Append 1 for 'a)'
        elif re.search(r'\ba$', element.lower()):
            vector.append(1)  # Append 1 for 'a)'

        elif re.search(r'\bb\b', element.lower()):
            vector.append(0)  # Append 0 for 'b'

    # Return the constructed vector
    return vector

In [None]:
def a_b(M,N,selected_model,text):
  # M = times we repeat the same test
  # N = how many questions does the test have
  # selected_model = can be either "gpt-3.5-turbo" or 'gpt-4o'
  # text = suitable questions according to the format A/B
  #It returns a matrix with only 1 and 0.

  matrix_ab = []
  incorrect_attempts = 0  # Counter for incorrect outputs
  correct_attempts = 0
  start_time = time.time()
  for i in range(M):
    while True:  # Keep trying until a valid vector is obtained
        response = openai.ChatCompletion.create(
                      model=selected_model,
                      messages=text,
                      temperature = 1,
                      top_p = 0.95,
                      max_tokens = 1000
                  )
        texto = response.choices[0].message['content']
        vect = extract_numbers_ab(texto)

        # Check if the length of vect is 100 (assuming this is the desired length)
        if len(vect) == N:
            correct_attempts +=1
            matrix_ab.append(vect)
            #print("OK ##################### ", correct_attempts,texto)
            break  # Exit the inner loop and move to the next iteration of the outer loop
        else:
            incorrect_attempts += 1  # Increment the counter for incorrect outputs
            #print("NOT OK ##############", incorrect_attempts, texto)

  end_time = time.time()
  print("A/B finished with a number of", incorrect_attempts, "incorrect attempts and it took", (end_time - start_time)/60, "minutes")
  return(np.array(matrix_ab))

## LIKERT (4 and 5)

In [None]:
def extract_numbers_likert(output, number):
  #output = output received by the LLM
  #number = 4 if it's Likert 4 and 5 if it's Likert 5
  #It returns a vector with numbers according to the Likert scale.

    vector = []
    # Likert with 4 options or 5 options
    valid_numbers = {'1', '2', '3', '4'} if number == 4 else {'1', '2', '3', '4', '5'}
    pattern = re.compile(r'\b[1-4]\b') if number == 4 else re.compile(r'\b[1-5]\b')

    lines = output.strip().split('\n')

    # Iterate over each line to extract the number
    for line in lines:
        # Find all matches in the line
        matches = pattern.findall(line)
        if len(matches) >= 2:
            # Extract the second number
            num = matches[1]
        elif len(matches) == 1:
            # Extract the only number present
            num = matches[0]
        else:
            # Skip the line if there are no matches
            continue

        # Check if the number is in the valid range
        if num not in valid_numbers:
            # If it's not in the valid range, print a message and exit
            print("not in range")
            break

        # Convert the number to an integer and append it to the vector
        try:
            vector.append(int(num))
        except ValueError:
            pass  # Skip lines that don't contain a valid number

    # Return the constructed vector
    return vector

In [None]:
def transform_vector(vector, num):
    """
    Transforms a vector based on the given conditions, for Likert 5:
    - If the number is 1 or 2, assigns 0.
    - If the number is 4 or 5, assigns 1.
    - If the number is 3, assigns 0 or 1 with 50% probability.

    Transforms a vector based on the given conditions, for Likert 4:
    - If the number is 1 or 2, assigns 0.
    - If the number is 3 or 4, assigns 1.

    Args:
    vector (list): Output from extract_numbers_likert which is a vector containing numbers from 1 to 4 or 1 to 5.
    num (int): A number determining the transformation conditions 4 for Likert 4 and 5 for Likert 5.

    Returns:
    It returns a vector with only 1 and 0.
    """

    # Initialize an empty list to store the new values
    new_vector = []

    # Define the conditions based on the given number
    if num == 4:  #Likert modificated
        conditions = {1: 0, 2: 0, 3: 1, 4: 1}
    else: #normal likert
        conditions = {1: 0, 2: 0, 3: random.choice([0, 1]), 4: 1, 5: 1}

    # Iterate over each number in the vector and transform according to conditions
    for number in vector:
        new_vector.append(conditions.get(number, number))

    return new_vector

In [None]:
def likert(M,N,number,selected_model,text):
  # M = times we repeat the same test
  # N = how many questions does the test have
  #number = 4 if it's Likert 4 and 5 if it's Likert 5
  # selected_model = can be either "gpt-3.5-turbo" or 'gpt-4o'
  # text = suitable questions according to the format Likert 4 or Likert 5
  #It returns two matrices: a matrix with only 1 and 0 and another matrix storing the answers (from 1 to 4 or 1 to 5) from de LLM  by rows


  matrix_likert = []
  matrix_likert_numbers = []
  correct_attempts = 0
  incorrect_attempts = 0  # Counter for incorrect outputs
  start_time = time.time()
  for i in range(M):
      while True:  # Keep trying until a valid vector is obtained



          response = openai.ChatCompletion.create(
                      model=selected_model,
                      messages=text,
                      temperature = 1,
                      top_p = 0.95,
                      max_tokens = 1000
                  )
          texto = response.choices[0].message['content']
          #print(texto)
          vector = extract_numbers_likert(texto,number)
          transformed_vector = transform_vector(vector,number)

          # Check if the transformed_vector meets your criteria
          if len(transformed_vector) == N:  # Assuming the desired length is 100
              correct_attempts +=1
              matrix_likert_numbers.append(vector)
              matrix_likert.append(transformed_vector)
              #print("OK ##################### ", correct_attempts)
              #print(vector)
              break  # Exit the inner loop and move to the next iteration of the outer loop
          else:
              incorrect_attempts += 1  # Increment the counter for incorrect outputs
              #print(texto)
              #print("NOT OK ##############", incorrect_attempts)

  end_time = time.time()
  print("Likert finished with a number of", incorrect_attempts, "incorrect attempts and it took", (end_time - start_time)/60, "minutes")
  return(np.array(matrix_likert),np.array(matrix_likert_numbers))

## Comparison between two brands

In [None]:
def extract_brands(output, brand1, brand2):
  #output = output received by the LLM
  # brand1 = Char variable containing the name of the brand we want to study.
  # brand2 = Char variable containing the name of the brand we want to study.
  # Note that it will always return a 1 value for the first brand and a 0 for the second.
    vector = []

    # Compile regex patterns for brands, case insensitive
    brand1_pattern = re.compile(rf'\b{brand1}\b', re.IGNORECASE)
    brand2_pattern = re.compile(rf'\b{brand2}\b', re.IGNORECASE)

    # Split the output by newline characters to get individual lines
    lines = output.strip().split('\n')
    # If splitting by lines didn't work, try splitting by commas
    if len(lines) == 1:
        lines = output.strip().split(',')

    # Iterate over each line to extract the brand
    for line in lines:
        # Check if the line contains brands as separate words
        if brand1_pattern.search(line):
            vector.append(1)  # Append 1 for First brand
        elif brand2_pattern.search(line):
            vector.append(0)  # Append 0 for Second brand
        # Ignore lines that do not contain either brand
        elif line.strip():  # This ensures non-blank lines are considered
            continue

    # Return the constructed vector
    return vector

In [None]:
def two_brands(M,N, first_brand, second_brand, selected_model, text):

  # M = times we repeat the same test
  # N = how many questions does the test have
  # selected_model = can be either "gpt-3.5-turbo" or 'gpt-4o'
  # text = suitable questions according to the format Likert 4 or Likert 5
  #It returns a matrix with only 1 and 0, always the 1 will go for the first brand (regardless of the order in the text)

  brand1 = first_brand
  brand2 = second_brand
  matrix_2b = []
  correct_attempts = 0
  incorrect_attempts = 0  # Counter for incorrect outputs
  start_time = time.time()
  for i in range(M):
    while True:  # Keep trying until a valid vector is obtained
        response = openai.ChatCompletion.create(
                      model=selected_model,
                      messages= text,
                      temperature = 1,
                      top_p = 0.95,
                      max_tokens = 1000
                  )
        texto = response.choices[0].message['content']
        vect = extract_brands(texto,brand1,brand2)
        #print(texto)

        if len(vect) == N: # Check if the length of vect is N
            matrix_2b.append(vect)
            correct_attempts += 1
            #print("OK #####################", correct_attempts,texto)

            break  # Exit the inner loop and move to the next iteration of the outer loop
        else:
            incorrect_attempts += 1  # Increment the counter for incorrect outputs
            #print("NOT OK #####################", incorrect_attempts,texto)


  end_time = time.time()
  print("Process finished with a number of", incorrect_attempts, "incorrect attempts and it took", (end_time - start_time)/60, "minutes")
  return(np.array(matrix_2b))

## Swapping brands and replacing brands functions

In [None]:
def swap_specific_brands_in_messages(messages, brand1, brand2):
  #messages= text where we want to swap (the order in which the brands appear) the brands.
  #brand1 = brand1 that is currently in the messages text
  #brand2 = brand2 that is currently in the messages text.
  ### This function will be useful when we want to adapt the default messages we have defined previously and when we want to swap the order in which the brands appear for the preference dimension.
  # It returns the original text with the brands in the opposite order.
    def swap_specific_brands(text, brand1, brand2):
        # Define patterns for both brands to handle case insensitivity
        pattern1 = re.compile(re.escape(brand1), re.IGNORECASE)
        pattern2 = re.compile(re.escape(brand2), re.IGNORECASE)

        # Temporarily replace brand1 and brand2 with placeholders
        text = pattern1.sub("TEMP_PLACEHOLDER_1", text)
        text = pattern2.sub("TEMP_PLACEHOLDER_2", text)

        # Replace placeholders with the opposite brands
        text = text.replace("TEMP_PLACEHOLDER_1", brand2)
        text = text.replace("TEMP_PLACEHOLDER_2", brand1)

        return text

    for message in messages:
        message["content"] = swap_specific_brands(message["content"], brand1, brand2)

    return messages

In [None]:
def replace_brands_in_messages(messages, brandA, brandB, brand1, brand2):
  #messages= text where we want to replace  the brands.
  #brandA = The first brand (first to appear) that is currently in the messages text
  #brandB = The second brand (second to appear) that is currently in the messages text
  #brand1 = The brand we want to be in the first place (order).
  #brand2 = The brand we want to be in the second place (order).

    def replace_brands(text, brandA, brandB, brand1, brand2):
        return text.replace(brandA, brand1).replace(brandB, brand2)

    for message in messages:
        message["content"] = replace_brands(message["content"], brandA, brandB, brand1, brand2)

    return messages

# CODE TO EXTRACT THE DATA

We will give an example on how to extract the data for a Brand given the different set of questions we have defined in the other Notebook (*Questions*). Make sure to read the the corresponding messages first and also the functions:

- *swap_specific_brands_in_messages(messages, brand1, brand2)*

- *replace_brands_in_messages(messages, brandA, brandB, brand1, brand2)*


## Sentiment dimension. Example with Apple.

Note that in the notebook *Questions* all the questions are defined with a default brand "$B1$", hence we have to change each test to the brand we want to study. We will see an example on how we would do it with the brand Apple.

### ChatGPT3.5

In [None]:
M = 30
N = 100

#Yes/No
messages = swap_specific_brands_in_messages(messages_yn, 'B1', 'Apple')
mat1 = yes_no(M,N,'gpt-3.5-turbo',messages)
np.savetxt('mat_yesno_apple_chatgpt3.csv', mat1, delimiter=',')

#A/B
messages = swap_specific_brands_in_messages(messages_ab, 'B1', 'Apple')
mat2 = a_b(M,N,'gpt-3.5-turbo',messages)
np.savetxt('mat_ab_apple_chatgpt3.csv', mat2, delimiter=',')

#Likert 4
messages = swap_specific_brands_in_messages(messages_likert4, 'B1', 'Apple')
mat3,mat4 = likert(M,N,4,'gpt-3.5-turbo',messages)
np.savetxt('mat_likert4_apple_chatgpt3.csv', mat3, delimiter=',')
np.savetxt('mat_likert4_numbers_apple_chatgpt3.csv', mat4, delimiter=',')

#Likert5
messages = swap_specific_brands_in_messages(messages_likert5, 'B1', 'Apple')
mat5,mat6 = likert(M,N,5,'gpt-3.5-turbo',messages)
np.savetxt('mat_likert5_huawei_chatgpt3.csv', mat5, delimiter=',')
np.savetxt('mat_likert5_huawei__chatgpt3.csv', mat6, delimiter=',')

### ChatGPT4

In [None]:
M = 30
N = 100

#Yes/No
messages = swap_specific_brands_in_messages(messages_yn, 'B1', 'Apple')
mat1 = yes_no(M,N,'gpt-4o',messages)
np.savetxt('mat_yesno_apple_chatgpt4.csv', mat1, delimiter=',')

#A/B
messages = swap_specific_brands_in_messages(messages_ab, 'B1', 'Apple')
mat2 = a_b(M,N,'gpt-4o',messages)
np.savetxt('mat_ab_apple_chatgpt4.csv', mat2, delimiter=',')

#Likert 4
messages = swap_specific_brands_in_messages(messages_likert4, 'B1', 'Apple')
mat3,mat4 = likert(M,N,4,'gpt-4o',messages)
np.savetxt('mat_likert4_apple_chatgpt4.csv', mat3, delimiter=',')
np.savetxt('mat_likert4_numbers_apple_chatgpt4.csv', mat4, delimiter=',')

#Likert5
messages = swap_specific_brands_in_messages(messages_likert5, 'B1', 'Apple')
mat5,mat6 = likert(M,N,5,'gpt-4o',messages)
np.savetxt('mat_likert5_huawei_chatgpt4.csv', mat5, delimiter=',')
np.savetxt('mat_likert5_huawei__chatgpt4.csv', mat6, delimiter=',')

Thus, if we want to repeat all this experiments with another Brand, make sure to run again first the  default messages (*messages_yn, messages_ab, messages_likert4, messages_likert5*) and then just change Apple to the Brand you want to study, for instance, Samsung.

## Preference Dimension. Example with Apple and Samsung

Note that in the notebook *Questions* all the questions are defined with a default brand "$B1$" and "$B_2$", hence we have to change each test to the brands we want to study.

As explained in the report, we will ask half of the questions in one order (AvsB) and the other half (BvsA). Hence, it is important to note that in all the functions, except for the first one (*two_brands*) we will need to change the results (only half) because we want to save the preference from one brand (A) over the other (B). This refers to this change : *np.logical_not(mat1).astype(int)*.

We will see an example on how we would do it with the comparison Apple vs Samsung, in all the possible formats, that is: $B_1$ vs $B_2$, Yes/No, A/B, Likert 4 y Likert 5.

### $B_1$ vs $B_2$

In [None]:
M = 15 # we want to run this half the times first (with one order) and then the
      #other half changing the order in which the brands appear
N = 100

#Apple vs Samsung
messages = replace_brands_in_messages(messages_2b, 'B1', 'B2', 'Apple', 'Samsung')
mat2b = two_brands(M,N, "Apple", "Samsung",'gpt-3.5-turbo',messages) #we will asign the 1's for apple (in both comparisons)
np.savetxt('mat_Apple_vs_Samsung_chatgpt3.csv', mat2b, delimiter=',')

#Samsung vs Apple
mess_new = swap_specific_brands_in_messages(messages, 'Apple', 'Samsung')
mat2b = two_brands(M,N, 'Apple', 'Samsung','gpt-3.5-turbo',mess_new) #we assign again 1 for Apple, even thought it first appears Samsung. Thus we are calculatin the preference of Apple over Samsung.
np.savetxt('mat_Samsung_vs_Apple_chatgpt3.csv', mat2b, delimiter=',')

### Yes/No

In [None]:
M = 15
N = 100

#Apple vs Samsung
messages = replace_brands_in_messages(messages_yn_2, 'B1', 'B2', 'Apple', 'Samsung')
mat2b = yes_no(M,N,'gpt-3.5-turbo',messages)
np.savetxt('mat_yesno_AS_chatgpt3.csv', mat2b, delimiter=',')

#Samsung vs Apple
mess_new = swap_specific_brands_in_messages(messages, 'Apple', 'Samsung')
mat1 = yes_no(M,N, 'gpt-3.5-turbo', mess_new)
mat2 = np.logical_not(mat1).astype(int) # The way the function yes_no works we need to change the 1 to 0 in order to capture only one preference.
np.savetxt('mat_yesno_SA_chatgpt3.csv', mat2, delimiter=',') #

### A/B

In [None]:
M = 15
N = 100

#Apple vs Samsung
messages = replace_brands_in_messages(messages_ab_2, 'B1', 'B2', 'Apple', 'Samsung')
mat2b = a_b(M,N,'gpt-3.5-turbo',messages)
np.savetxt('mat_ab_AS_chatgpt3.csv', mat2b, delimiter=',')

#Samsung vs Apple
mess_new = swap_specific_brands_in_messages(messages, 'Apple', 'Samsung')
mat1 = a_b(M,N, 'gpt-3.5-turbo', mess_new)
mat2 = np.logical_not(mat1).astype(int) # The way the function a_b works we need to change the 1 to 0 in order to capture only one preference.
np.savetxt('mat_ab_SA_chatgpt3.csv', mat2, delimiter=',') #

### Likert 4

In [None]:
M = 15
N = 100

#####We wont switch the numbers from 0 to 1 and 1 to 0 in this case, we will do it later in the results notebook.
##### It could be done here but the autor prefered to do it later

#Apple vs Samsung
messages = replace_brands_in_messages(messages_likert4_2, 'B1', 'B2', 'Apple', 'Samsung')
mat1,mat2 = likert(M,N,4,'gpt-3.5-turbo',messages)
np.savetxt('mat_likert4_as_chatgpt3.csv', mat1, delimiter=',')
np.savetxt('mat_likert4_as_numbers_chatgpt3.csv', mat2, delimiter=',')

#Samsung vs Apple
mess_new = swap_specific_brands_in_messages(messages, 'Apple', 'Samsung')
mat1,mat2 = likert(M,N,4,'gpt-3.5-turbo',mess_new)
np.savetxt('mat_likert4_sa_chatgpt3.csv', mat1, delimiter=',')
np.savetxt('mat_likert4_sa_numbers_chatgpt3.csv', mat2, delimiter=',')

### Likert 5

In [None]:
M = 15
N = 100

#####We wont switch the numbers from 0 to 1 and 1 to 0 in this case, we will do it later in the results notebook.
##### It could be done here but the autor prefered to do it later

#Apple vs Samsung
messages = replace_brands_in_messages(messages_likert5_2, 'B1', 'B2', 'Apple', 'Samsung')
mat1,mat2 = likert(M,N,5,'gpt-3.5-turbo',messages)
np.savetxt('mat_likert5_as_chatgpt3.csv', mat1, delimiter=',')
np.savetxt('mat_likert5_as_numbers_chatgpt3.csv', mat2, delimiter=',')

#Samsung vs Apple
mess_new = swap_specific_brands_in_messages(messages, 'Apple', 'Samsung')
mat1,mat2 = likert(M,N,5,'gpt-3.5-turbo',mess_new)
np.savetxt('mat_likert5_sa_chatgpt3.csv', mat1, delimiter=',')
np.savetxt('mat_likert5_sa_numbers_chatgpt3.csv', mat2, delimiter=',')

In order to avoid repetition, we will not write the version for ChatGPT-4 here, but note that the only change that needs to be done is, when calling every function, replacing 'gpt-3.5-turbo' by 'gpt4-o' as we did before (Sentiment Dimension).