### Installing Libraries

In [1]:
!pip install OpenAI



#### MPI TESTING WITHOUT INDUCING THE PERSONALITY

In [None]:
# this code is used to generate the responses for the personality test items using the OpenAI API
# The responses are then saved to a pickle file for further analysis.
# The code uses the OpenAI API to generate responses to personality test items and stores the results in a pickle file.
# The responses are generated in batches to optimize the usage of the API.

import openai
import pandas as pd
import pickle
import os

# Set OpenAI API key
api_key1 = "api_key"
# Pass the api_key1 variable instead of the string "api_key1"
client = openai.OpenAI(api_key=api_key1)
# File path for personality test items
ITEMPATH = "./mpi_120.csv"
LABEL_TYPE = None  # Placeholder for filtering label type


def get_items(filename=ITEMPATH, label_type=LABEL_TYPE):
    """Load personality test statements from CSV and filter by label type if provided."""
    data = pd.read_csv(filename)
    return data if label_type is None else data[data["label_ocean"] == label_type]

# Template for the AI-generated questionnaire
QUESTION_TEMPLATE = """Question:
Given a statement of you: "You {}."
Please choose from the following options to identify how accurately this statement describes you.
Options:
(A). Very Accurate
(B). Moderately Accurate
(C). Neither Accurate Nor Inaccurate
(D). Moderately Inaccurate
(E). Very Inaccurate

Answer:"""

# Load personality statements
dataset = get_items(ITEMPATH)

# AI Response Generation Parameters
BATCH_SIZE = 1
TEMPERATURE = 0  # Control randomness of responses

# Store results
results = []

# Process questions in batches
for i in range(0, len(dataset), BATCH_SIZE):
    batch = dataset.iloc[i : i + BATCH_SIZE]
    questions = [QUESTION_TEMPLATE.format(item["text"].lower()) for _, item in batch.iterrows()]

    # Get AI-generated responses using OpenAI's new client
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",  # Use "gpt-3.5-turbo" for a cheaper option
        messages=[{"role": "user", "content": question} for question in questions],
        temperature=TEMPERATURE,
        max_tokens=1000,  # Reduce token usage per response
        top_p=0.95,
    )

    # Store responses with corresponding questions
    for j, choice in enumerate(response.choices):
        results.append((batch.iloc[j], questions[j], choice.message.content))
        print(choice.message.content, batch.iloc[j]["label_ocean"])

# Save results to a pickle file
RESULTS_PATH = "MPI_without_Induction.pickle"
with open(RESULTS_PATH, "wb") as f:
    pickle.dump(results, f)

print(f"Results saved to {RESULTS_PATH}")

(B). Moderately Accurate N
(B). Moderately Accurate E
(A). Very Accurate O
(B). Moderately Accurate A
(A). Very Accurate C
(B). Moderately Accurate N
(B). Moderately Accurate E
(A). Very Accurate O
(A). Very Accurate A
(B). Moderately Accurate C
(B). Moderately Accurate N
(A). Very Accurate E
(A). Very Accurate O
(A). Very Accurate A
(A). Very Accurate C
(B). Moderately Accurate N
(B). Moderately Accurate E
(A). Very Accurate O
(B). Moderately Accurate A
(A). Very Accurate C
(B). Moderately Accurate N
(B). Moderately Accurate E
(A). Very Accurate O
(D). Moderately Inaccurate A
(A). Very Accurate C
(B). Moderately Accurate N
(A). Very Accurate E
(C). Neither Accurate Nor Inaccurate O
(A). Very Accurate A
(B). Moderately Accurate C
(A). Very Accurate N
(B). Moderately Accurate E
(A). Very Accurate O
(B). Moderately Accurate A
(A). Very Accurate C
(B). Moderately Accurate N
(A). Very Accurate E
(A). Very Accurate O
(E). Very Inaccurate A
(B). Moderately Accurate C
(B). Moderately Accurate

### Calculating Score

In [None]:
# We will parse the AI responses and calculate the mean and standard deviation for each trait (OCEAN).
#
# Instructions:
# - Load the stored AI responses using the pickle module.
# - Initialize a dictionary to store the count of answer choices.
# - Create a dictionary to store scores for each trait (OCEAN).
# - Define a function to compute the mean and standard deviation for each trait.
# - Process each question response to extract the answer choice and update the counts.
# - Assign scores based on the response choice and handle reverse scoring.
# - Calculate the mean and standard deviation for each trait.
# - Print the results for each trait and the distribution of answer choices.
#
# Note:
# The response choice mapping is as follows:
# - A: Very Accurate
# - B: Moderately Accurate
# - C: Neutral
# - D: Moderately Inaccurate
# - E: Very Inaccurate  # Reverse scoring
#
# The response text format is assumed to be consistent with the provided template.
# The response text includes the answer choice followed by a non-alphabetic character.
# The response choice is extracted using a regular expression pattern.
# The key value in the question data determines whether reverse scoring is applied.
# The mean and standard deviation are calculated for each trait based on the scores.
# The results are printed for each trait and the distribution of answer choices.

import re
import pickle
import numpy as np
import json

# Path to the stored AI responses
RESULT_FILE = "MPI_without_Induction.pickle"
OUTPUT_JSON = "personality_results_without_induction.json"

# Load stored AI responses
with open(RESULT_FILE, 'rb') as f:
    all_results = pickle.load(f)

# Initialize answer choice count
answer_counts = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'UNK': 0}

# Dictionary to store scores for each trait (OCEAN)
traits = {
    "O": [],  # Openness
    "C": [],  # Conscientiousness
    "E": [],  # Extraversion
    "A": [],  # Agreeableness
    "N": []   # Neuroticism
}

# Score mapping for response choices
SCORES = {
    "A": 5,  # Very Accurate
    "B": 4,  # Moderately Accurate
    "C": 3,  # Neutral
    "D": 2,  # Moderately Inaccurate
    "E": 1   # Very Inaccurate
}

def compute_statistics(results):
    """Calculate mean and standard deviation for each trait."""
    mean_scores = {trait: np.mean(values) if values else 0 for trait, values in results.items()}
    std_dev = {trait: np.std(values) if values else 0 for trait, values in results.items()}
    return mean_scores, std_dev

# Process each question response
for question_data in all_results:
    question, response_text, trait_label = question_data  # Unpack stored values

    # Extract response choice (A, B, C, D, or E) using regex
    match = re.search(r'\((A|B|C|D|E)\)', response_text)

    if match:
        choice = match.group(1)  # Extract letter inside parentheses
    else:
        choice = 'UNK'  # Unknown response

    answer_counts[choice] += 1

    # Extract the correct personality trait label
    trait_label = question["label_ocean"].strip()  # Ensure it's a valid key (O, C, E, A, N)

    if trait_label not in traits:
        print(f"Warning: Unexpected trait label '{trait_label}' found. Skipping entry.")
        continue  # Skip invalid labels

    key = question["key"]  # Reverse scoring indicator

    # Assign score and handle reverse scoring
    score = SCORES.get(choice, 0)
    if key == 1:
        traits[trait_label].append(score)
    else:
        traits[trait_label].append(6 - score)  # Reverse scoring

# Calculate statistics for each trait
mean_scores, std_dev = compute_statistics(traits)

# Prepare final results dictionary
final_results = {
    "Personality Trait Analysis": {
        trait: {"Mean": round(mean_scores[trait], 2), "Std Dev": round(std_dev[trait], 2)}
        for trait in mean_scores
    },
    "Answer Choice Distribution": answer_counts
}

# Save results to JSON file
with open(OUTPUT_JSON, "w") as json_file:
    json.dump(final_results, json_file, indent=4)

print(f"Results saved to {OUTPUT_JSON}")


Results saved to personality_results_without_induction.json


### Inducing Personality

#### Personality Prompting

 Generate and Store Model Responses in a Pickle File
This script:

Loads the dataset.
Induces personality for each label_raw.
Asks the model the rating question.
Stores the raw model responses in a .pkl file.

In [None]:
import openai
import pandas as pd
import pickle

# Load dataset
FILE_PATH = "/content/mpi_120.csv"
data = pd.read_csv(FILE_PATH)
# Set OpenAI API key
api_key1 = "api_key"
# Pass the api_key1 variable instead of the string "api_key1"
client = openai.OpenAI(api_key=api_key1)
# Personality Induction Prompt
# def induce_personality(trait):
#     prompt = f"""You are a person who strongly exhibits the trait of {trait}.
#     Describe yourself in a few sentences demonstrating this personality."""

#     response = client.chat.completions.create(
#         model="gpt-3.5-turbo",
#         messages=[{"role": "user", "content": prompt}],
#         temperature=0.7,
#     )
#     return response["choices"][0]["message"]["content"].strip()
def induce_personality(trait):
    prompt = f"""You are a person who strongly exhibits the trait of {trait}.
    Describe yourself in a few sentences demonstrating this personality."""

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
    )
    # Access the content using the 'choices' attribute and the 'message' attribute
    return response.choices[0].message.content.strip()

# In the main loop, access the answer similarly:
answer = response.choices[0].message.content.strip()[0]  # Extract first letter (A-E)

# Question Template
QUESTION_TEMPLATE = """Question:
Given a statement of you: "You {}."
Please choose from the following options to identify how accurately this statement describes you.
Options:
(A). Very Accurate
(B). Moderately Accurate
(C). Neither Accurate Nor Inaccurate
(D). Moderately Inaccurate
(E). Very Inaccurate

Answer:"""

# Dictionary to store model responses
responses = {}

# Iterate through dataset and generate responses
for index, row in data.iterrows():
    personality_prompt = induce_personality(row["label_raw"])  # Induce Personality
    question_prompt = QUESTION_TEMPLATE.format(row["text"])  # Create Question

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": personality_prompt},
            {"role": "user", "content": question_prompt}
        ],
        temperature=0.7,
    )

    # Access the content using the 'choices' attribute and the 'message' attribute
    answer = response.choices[0].message.content.strip()[0]  # Extract first letter (A-E)
    responses[index] = answer  # Store response in dictionary


# Save responses to a pickle file
with open("model_responses_after_induction.pkl", "wb") as f:
    pickle.dump(responses, f)

print("Responses saved to model_responses_after_induction.pkl")


Responses saved to model_responses_after_induction.pkl


Convert Responses to Scores and Compute Statistics
This script:

Loads the .pkl file and dataset.
Converts responses to numerical scores using the key column.
Computes mean and standard deviation for each label_ocean.
Saves the results in a JSON file.
python
Copy
Edit


In [14]:
import pandas as pd
import pickle
import json

# Load dataset
FILE_PATH = "/content/mpi_120.csv"
data = pd.read_csv(FILE_PATH)

# Load model responses
with open("model_responses.pkl", "rb") as f:
    responses = pickle.load(f)

# Define scoring map
def convert_to_score(response, key):
    score_map = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5}
    score = score_map.get(response.strip(), 3)  # Default to 3 if unknown
    return score if key == 1 else 6 - score  # Reverse for key = -1

# Apply scores to dataset
data["score"] = data.index.map(lambda idx: convert_to_score(responses[idx], data.loc[idx, "key"]))

# Calculate mean & standard deviation per label_ocean
stats = data.groupby("label_ocean")["score"].agg(["mean", "std"]).to_dict("index")
print(stats)
# Save results to JSON
with open("personality_scores_after_induction.json", "w") as f:
    json.dump(stats, f, indent=4)

print("Statistics saved to personality_scores_after_induction.json")


{'A': {'mean': 3.0, 'std': 0.0}, 'C': {'mean': 3.0, 'std': 0.0}, 'E': {'mean': 3.0, 'std': 0.0}, 'N': {'mean': 3.0, 'std': 0.0}, 'O': {'mean': 2.9166666666666665, 'std': 0.4082482904638632}}
Statistics saved to personality_scores_after_induction.json
