In [None]:
# Install necessary packages
!pip install -q openai anthropic mistralai

In [None]:
import os

os.environ['OPENAI_API_KEY']='...'
os.environ['ANTHROPIC_API_KEY']='...'
os.environ['MISTRAL_API_KEY']='...'

In [None]:
import os
import json
import pandas as pd
from datetime import datetime
from models.language_models import get_model_instance
from questionnaires import BFI44  # Or other questionnaires
from utils import save_results
from text_generation import generate_texts
from text_analysis import analyze_texts
from plotting import plot_confusion_matrices
from questionnaire_experiment import run_questionnaire_experiment
from questionnaire_visualization import visualize_questionnaire_answers
from text_similarity_analysis import analyze_text_similarity  # New import
import logging

In [None]:
# Set up logging
logging.basicConfig(
    filename='experiment.log',
    level=logging.INFO,
    format='%(asctime)s %(levelname)s:%(message)s'
)
logger = logging.getLogger(__name__)

In [None]:
# Load personality trait definitions
def load_traits_definitions():
    """
    Loads personality trait definitions from 'traits_definitions.json'.

    Returns:
        dict: A dictionary containing personality trait definitions.
    """
    with open('traits_definitions.json', 'r') as file:
        traits_definitions = json.load(file)
    return traits_definitions

traits_definitions = load_traits_definitions()

#Questionnaire Completion

In [None]:
# Experiment settings (User can modify these settings to add new models or change parameters)
questionnaire_settings = {
    'models': {
        'gpt-3.5-turbo': {'temperature': 0.7}
        # Add more models here if needed or change temperature
    },
    'experiment_count': 1,
    'questionnaire_module': BFI44  # You can replace this with another questionnaire
}

In [None]:
# Execute the experiment
for model_name, model_params in questionnaire_settings['models'].items():
    model = get_model_instance(model_name)
    temperature = model_params['temperature']
    run_questionnaire_experiment(
        model,
        temperature,
        questionnaire_settings['questionnaire_module'],
        questionnaire_settings['experiment_count'],
        traits_definitions
    )

# Visualization of Questionnaire Answers

In [1]:
# Visualization settings (User can modify 'input_files' to specify CSV files or use the DataFrame from the previous step)

visualization_settings = {
    'input_files': [],  # List of paths to CSV files with questionnaire answers. If empty, uses DataFrame from the previous step.
    'save_path': 'results/questionnaire_visualization',  # Folder to save the plots
    'trait_name_mapping': {  # Mapping of trait names (User can modify)
        'Agreeableness': 'Agreeableness',
        'Conscientiousness': 'Conscientiousness',
        'Extraversion': 'Extraversion',
        'Neuroticism': 'Neuroticism',
        'Openness': 'Openness to experience'
    },
    'model_name_mapping': {  # Mapping of model names (User can modify)
        'gpt-3.5-turbo': 'GPT-3.5 Turbo',
        'gpt-4': 'GPT-4',
        # Add other models if necessary
    }
}

In [None]:
# Check if data from the previous step is available
if 'df_answers_full' in globals():
    visualize_questionnaire_answers(df_answers_full, visualization_settings)
else:
    # If not, you can specify file paths manually
    visualization_settings['input_files'] = [
        'path/to/answers_gpt-3.5-turbo_experiment_0.csv',
        'path/to/answers_gpt-4_experiment_0.csv'
    ]
    visualize_questionnaire_answers(None, visualization_settings)

#Text Generation

In [None]:
# Experiment settings for text generation (User can add models or change temperatures)
text_generation_settings = {
    'models': {
        'gpt-3.5-turbo': {'temperatures': [0.7]}
        # Add more models and temperatures here if needed
    },
    'experiment_count': 1,
    'questions_file': 'questions.csv'  # Path to CSV file with questions (User can change)
}

In [None]:
# Load questions from a CSV file
try:
    questions_df = pd.read_csv(text_generation_settings['questions_file'])
    if 'question' not in questions_df.columns:
        raise ValueError("CSV file must contain a 'question' column.")
    questions = questions_df['question'].tolist()
except Exception as e:
    print(f"Error loading questions from '{text_generation_settings['questions_file']}': {e}")
    questions = []

In [None]:
# Execute text generation
generated_texts_files = []

In [None]:
for model_name, model_params in text_generation_settings['models'].items():
    model = get_model_instance(model_name)
    temperatures = model_params.get('temperatures', [0.7])  # Default temperature is 0.7 if not specified
    for temperature in temperatures:
        for experiment_num in range(text_generation_settings['experiment_count']):
            df_texts = generate_texts(
                model,
                traits_definitions,
                {'temperature': temperature},
                questions,
                experiment_num
            )
            # Save generated texts
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            texts_folder = f"results/texts_{timestamp}"
            os.makedirs(texts_folder, exist_ok=True)
            texts_filename = f"{texts_folder}/texts_{model_name}_temp_{temperature}_experiment_{experiment_num}.csv"
            df_texts.to_csv(texts_filename, index=False)
            generated_texts_files.append(texts_filename)
            logger.info(f"Texts saved: {texts_filename}")

In [None]:
# Output list of saved file paths
print("Generated texts files:")
for file in generated_texts_files:
    print(file)

# Text Annotation

In [None]:
# List of generated text files (User can specify their own files)
#generated_texts_files = ['results/texts_20241012_173727/texts_gpt-3.5-turbo_temp_0.7_experiment_0.csv']

In [None]:
# Classifier settings (User can change model or temperature)
text_analysis_settings = {
    'model_name': 'gpt-4o',  # Model for analysis
    'temperature': 0,
    'input_files': generated_texts_files  # Use files from the previous step
    # You can specify your own files: ['path/to/file1.csv', 'path/to/file2.csv']
}

In [None]:
# Execute text analysis
analyzed_files = []

model = get_model_instance(text_analysis_settings['model_name'])
temperature = text_analysis_settings['temperature']

for input_file in text_analysis_settings['input_files']:
    try:
        df_texts = pd.read_csv(input_file)
        required_columns = {'experiment_num', 'model', 'temperature', 'trait', 'trait_score', 'question', 'answer'}
        if not required_columns.issubset(df_texts.columns):
            missing_cols = required_columns - set(df_texts.columns)
            raise ValueError(f"CSV file is missing columns: {', '.join(missing_cols)}")
    except Exception as e:
        print(f"Error loading texts from '{input_file}': {e}")
        continue

    df_analysis = analyze_texts(
        model,
        df_texts,
        traits_definitions,
        {'temperature': temperature}
    )
    # Save analysis results
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    analysis_folder = f"results/analysis_{timestamp}"
    os.makedirs(analysis_folder, exist_ok=True)
    analysis_filename = f"{analysis_folder}/analysis_{os.path.basename(input_file)}"
    df_analysis.to_csv(analysis_filename, index=False)
    analyzed_files.append(analysis_filename)
    logger.info(f"Analysis results saved: {analysis_filename}")

In [None]:
# Output list of saved analysis file paths
print("Analyzed files:")
for file in analyzed_files:
    print(file)

# Text Similarity Analysis and Visualization

In [None]:
# Analysis settings (User can specify their own files or adjust the save path)
text_similarity_settings = {
    'input_files': generated_texts_files,  # Use files from the text generation step
    # You can specify your own files:
    # 'input_files': ['path/to/texts_file1.csv', 'path/to/texts_file2.csv'],
    'save_path': 'results/text_similarity_analysis',
    'model_name_mapping': {  # Mapping of model names for display (User can modify)
        'gpt-3.5-turbo': 'GPT-3.5 Turbo',
        'gpt-4': 'GPT-4',
        # Add other models if necessary
    }
}

In [None]:
# Execute text similarity analysis
if 'df_texts' in globals():
    analyze_text_similarity(df_texts, text_similarity_settings)
else:
    # If data is not available, load from files
    analyze_text_similarity(None, text_similarity_settings)

# Plotting Confusion Matrices

In [None]:
# Plotting settings (User can specify their own analysis files)
plot_settings = {
    'input_files': analyzed_files,  # Use files from the previous analysis step
    # You can specify your own files: ['path/to/analysis_file1.csv', 'path/to/analysis_file2.csv']
}

In [None]:
# Generate plots
for analysis_file in plot_settings['input_files']:
    try:
        df_analysis = pd.read_csv(analysis_file)
        required_columns = {'model', 'trait', 'trait_score', 'score', 'analyzed_trait'}
        if not required_columns.issubset(df_analysis.columns):
            missing_cols = required_columns - set(df_analysis.columns)
            raise ValueError(f"CSV file is missing columns: {', '.join(missing_cols)}")
    except Exception as e:
        print(f"Error loading analysis data from '{analysis_file}': {e}")
        continue

    # Get list of models from analysis data
    model_names = df_analysis['model'].unique().tolist()

    # Plot confusion matrices
    plot_confusion_matrices(df_analysis, model_names, traits_definitions, analysis_file)