In [1]:
import pandas as pd
import requests
import json
import time
from typing import List, Dict
import subprocess
import signal
import sys
import logging
from datetime import datetime
import pytz

# Configure logging
ist = pytz.timezone('Asia/Kolkata')
ist_time = datetime.now(ist)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(f'llm_evaluation_{ist_time.strftime("%Y%m%d_%H%M%S")}.log'),
        logging.StreamHandler()
    ]
)

# Constants
OLLAMA_API_URL = "http://localhost:11434/api/generate"
MODELS = [
    # "hf.co/bartowski/Qwen2.5-7B-Instruct-GGUF:Q5_K_L",
    # "hf.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q5_K_L",
    # "hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:F16",
    # "hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:F16",
    # "hf.co/bartowski/Qwen2.5-14B-Instruct-GGUF:Q2_K",
    # "hf.co/bartowski/Qwen2.5-3B-Instruct-GGUF:F16",
    # "hf.co/bartowski/Qwen2.5-1.5B-Instruct-GGUF:F16",
    "hf.co/bartowski/Phi-3-medium-128k-instruct-GGUF:Q3_K_S",
    "hf.co/RichardErkhov/princeton-nlp_-_gemma-2-9b-it-SimPO-gguf:Q4_K_M",
]

def load_questions() -> pd.DataFrame:
    """Load questions from CSV file."""
    try:
        df = pd.read_csv('/home/ubuntu/quantumLeap/quantumEval_claude.csv')
        logging.info(f"Successfully loaded {len(df)} questions from CSV")
        return df
    except Exception as e:
        logging.error(f"Error loading questions: {e}")
        sys.exit(1)

def start_ollama_model(model_name: str) -> subprocess.Popen:
    """Start an Ollama model."""
    try:
        process = subprocess.Popen(
            ['ollama', 'run', model_name],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )
        time.sleep(10)  # Wait for model to initialize
        logging.info(f"Started model: {model_name}")
        return process
    except Exception as e:
        logging.error(f"Error starting model {model_name}: {e}")
        return None

def stop_ollama_model(process: subprocess.Popen):
    """Stop the Ollama model process."""
    if process:
        process.send_signal(signal.SIGTERM)
        process.wait()
        logging.info("Stopped Ollama model")

def query_model(question: str, model_name: str) -> str:
    """Query the model and get response."""
    prompt = f"Please answer the following question about Clotaire Rapaille's Culture Code methodology:\n\n{question}\n\nProvide a clear and concise answer based on Rapaille's work."
    
    payload = {
        "model": model_name,
        "prompt": prompt,
        "stream": False
    }
    
    try:
        response = requests.post(OLLAMA_API_URL, json=payload)
        response.raise_for_status()
        return response.json()['response']
    except Exception as e:
        logging.error(f"Error querying model {model_name}: {e}")
        return f"ERROR: {str(e)}"

def evaluate_models(questions_df: pd.DataFrame) -> pd.DataFrame:
    """Evaluate all models on all questions."""
    results = []
    
    for model in MODELS:
        logging.info(f"Starting evaluation for model: {model}")
        
        # Start the model
        process = start_ollama_model(model)
        if not process:
            continue
            
        try:
            for _, row in questions_df.iterrows():
                logging.info(f"Processing Question {row['Question Number']}")
                
                response = query_model(row['Question Text'], model)
                
                result = {
                    'Question Number': row['Question Number'],
                    'Attribute': row['Attribute'],
                    'Expertise Level': row['Expertise Level'],
                    'Question Text': row['Question Text'],
                    'LLM Name': model,
                    'Model Response': response,
                    'Score': '',  # To be filled later
                    'Explanation': ''  # To be filled later
                }
                results.append(result)
                
                # Save intermediate results with IST timestamp
                ist_time = datetime.now(ist)
                pd.DataFrame(results).to_csv(
                    f'/home/ubuntu/quantumLeap/data/eval_for_base_model_selection/temp/results_{ist_time.strftime("%Y%m%d_%H%M%S")}.csv',
                    index=False
                )
                
                time.sleep(2)  # Brief pause between questions
                
        except Exception as e:
            logging.error(f"Error during evaluation of model {model}: {e}")
        finally:
            stop_ollama_model(process)
            time.sleep(5)  # Wait before starting next model
    
    return pd.DataFrame(results)

def main():
    """Main execution function."""
    try:
        # Load questions
        questions_df = load_questions()
        
        # Run evaluations
        results_df = evaluate_models(questions_df)
        
        # Save final results with IST timestamp
        ist_time = datetime.now(ist)
        output_file = f'/home/ubuntu/quantumLeap/data/eval_for_base_model_selection/final/final_results_{ist_time.strftime("%Y%m%d_%H%M%S")}.csv'
        results_df.to_csv(output_file, index=False)
        logging.info(f"Results saved to {output_file}")
        
    except Exception as e:
        logging.error(f"Error in main execution: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()

2024-11-06 04:59:49,772 - INFO - Successfully loaded 50 questions from CSV
2024-11-06 04:59:49,774 - INFO - Starting evaluation for model: hf.co/bartowski/Phi-3-medium-128k-instruct-GGUF:Q3_K_S
2024-11-06 04:59:59,775 - INFO - Started model: hf.co/bartowski/Phi-3-medium-128k-instruct-GGUF:Q3_K_S
2024-11-06 04:59:59,778 - INFO - Processing Question 1
2024-11-06 05:00:05,826 - INFO - Processing Question 2
2024-11-06 05:00:08,975 - INFO - Processing Question 3
2024-11-06 05:00:12,779 - INFO - Processing Question 4
2024-11-06 05:00:17,011 - INFO - Processing Question 5
2024-11-06 05:00:22,223 - INFO - Processing Question 6
2024-11-06 05:00:26,055 - INFO - Processing Question 7
2024-11-06 05:00:29,151 - INFO - Processing Question 8
2024-11-06 05:00:32,479 - INFO - Processing Question 9
2024-11-06 05:00:35,615 - INFO - Processing Question 10
2024-11-06 05:00:40,771 - INFO - Processing Question 11
2024-11-06 05:00:47,783 - INFO - Processing Question 12
2024-11-06 05:00:54,251 - INFO - Process

In [1]:
import pandas as pd

df = pd.read_csv('/home/ubuntu/quantumLeap/data/eval_for_base_model_selection/final/final_results_20241106_104834_v2.csv')
df.head()

Unnamed: 0,Question Number,Attribute,Expertise Level,LLM Name,LLM Name.1,Model Response,Score,Explanation
0,1,Accuracy,Beginner,What is the basic definition of a 'Culture Cod...,hf.co/bartowski/Phi-3-medium-128k-instruct-GGU...,"The 'Culture Code,' as defined by Clotaire Rap...",,
1,2,Accuracy,Beginner,What was Rapaille's key insight about how cult...,hf.co/bartowski/Phi-3-medium-128k-instruct-GGU...,Rapaille's key insight is that cultural imprin...,,
2,3,Relevance,Beginner,How did Rapaille use his Culture Code methodol...,hf.co/bartowski/Phi-3-medium-128k-instruct-GGU...,Clotaire Rapaille used his Culture Code method...,,
3,4,Relevance,Beginner,Why did Rapaille conduct his research sessions...,hf.co/bartowski/Phi-3-medium-128k-instruct-GGU...,Clotaire Rapaille conducted his research sessi...,,
4,5,Depth,Beginner,Explain Rapaille's concept of 'imprints' and h...,hf.co/bartowski/Phi-3-medium-128k-instruct-GGU...,"Clotaire Rapaille's concept of 'imprints', als...",,


In [2]:
df.shape

(450, 8)

In [17]:
import requests
import json

# Define the API endpoint
url = "http://localhost:11434/api/generate"

# Set the headers
headers = {
    "Content-Type": "application/json",
}

# Define the payload
payload = {
    "model": "hf.co/RichardErkhov/Qwen_-_Qwen2.5-72B-Instruct-gguf:Q3_K",  # Replace with your model name
    "prompt": "Envision a groundbreaking theory that unites Dr. Clotaire Rapaille's 'The Culture Code' with advanced artificial intelligence and machine learning technologies to predict and influence consumer behavior in the era of globalization and cultural hybridization. Provide a comprehensive explanation of your proposed theory, detailing how it integrates cultural codes with AI, the potential impact on global marketing strategies, and how it could transform consumer research methodologies. Ensure your response is accurate, relevant, deep, clear, and original.",
}

# Send the POST request
response = requests.post(url, headers=headers, data=json.dumps(payload), stream=True)

# Check if the request was successful
if response.status_code == 200:
    # Process each line in the streaming response
    for line in response.iter_lines():
        if line:
            try:
                # Decode the JSON object
                result = json.loads(line.decode('utf-8'))
                # Access the generated text
                generated_text = result.get("response", "")
                print(generated_text)
            except json.JSONDecodeError as e:
                print(f"JSON decode error: {e}")
else:
    print(f"Request failed with status code {response.status_code}: {response.text}")


###
 Theory
:
 The
 Cultural
 AI
 Nexus
 (
CAN
)


####
 Introduction


The
 Cultural
 AI
 Nexus
 (
CAN
)
 is
 a
 groundbreaking
 theory
 that
 integrates
 Dr
.
 Cl
ota
ire
 Rap
aille
's
 "
The
 Culture
 Code
"
 with
 advanced
 artificial
 intelligence
 (
AI
)
 and
 machine
 learning
 (
ML
)
 technologies
 to
 predict
 and
 influence
 consumer
 behavior
 in
 an
 era
 of
 globalization
 and
 cultural
 hybrid
ization
.
 This
 theory
 lever
ages
 the
 deep
 cultural
 insights
 provided
 by
 Rap
aille
’s
 work
 and
 combines
 them
 with
 the
 computational
 power


KeyboardInterrupt: 