In [None]:
#!/usr/bin/env python3
"""
LLM Number Selection Convergence Experiment - Jupyter Notebook Version
====================================================================
This experiment tests multiple Large Language Models to analyze
convergence patterns in pseudo-random number selection.

Research Question: Do different LLMs exhibit similar biases when asked
to select "random" numbers within given ranges?
"""

# %% [markdown]
# # LLM Number Selection Convergence Experiment
# 
# ## Obiettivo della Ricerca
# Analizzare se diversi Large Language Models mostrano pattern di convergenza simili quando viene chiesto loro di selezionare numeri "casuali" in un dato intervallo.
# 
# ## Ipotesi
# I modelli LLM, addestrati su dati simili, potrebbero mostrare bias cognitivi convergenti nella selezione di numeri pseudo-casuali.

# %% Cell 1: Import e Configurazione
import json
import time
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from typing import Dict, List, Tuple, Optional
from openai import OpenAI
from collections import Counter, defaultdict
import scipy.stats as stats
from datetime import datetime
from IPython.display import display, HTML, clear_output, Markdown
import warnings
warnings.filterwarnings('ignore')

# Configurazione dello stile per grafici più belli
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✅ Librerie importate con successo!")

# %% Cell 2: Configurazione API e Modelli
class ExperimentConfig:
    """Classe per gestire la configurazione dell'esperimento"""
    
    # OpenRouter Configuration
    OPENROUTER_API_KEY = "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"
    BASE_URL = "https://openrouter.ai/api/v1"
    
    # Modelli da testare
    MODELS = [
        "openai/gpt-4o",
        "anthropic/claude-3.5-sonnet", 
        "google/gemini-2.0-flash-exp:free",
        "meta-llama/llama-3.1-70b-instruct",
        "mistralai/mistral-large"
    ]
    
    # Configurazione esperimento
    TEMPERATURE = 0.7  # Temperatura per simulare casualità
    MAX_RETRIES = 3
    DELAY_BETWEEN_CALLS = 0.5
    
    # Range di test
    TEST_RANGES = [
        (1, 10),    # Range piccolo
        (1, 50),    # Range medio (quello originale)
        (1, 100),   # Range grande
        (0, 9),     # Single digit
        (1, 1000)   # Range molto grande
    ]
    
    # Prompt templates
    PROMPT_TEMPLATES = {
        "simple": "Pick a number between {min} and {max}",
        "random": "Pick a random number between {min} and {max}",
        "think": "Think of a number between {min} and {max}",
        "choose": "Please choose any number between {min} and {max}",
        "select": "Select a number between {min} and {max}",
        "generate": "Generate a number between {min} and {max}"
    }
    
    @classmethod
    def get_display_name(cls, model: str) -> str:
        """Ottiene un nome più leggibile per il modello"""
        model_names = {
            "openai/gpt-4o": "GPT-4",
            "anthropic/claude-3.5-sonnet": "Claude 3.5",
            "google/gemini-2.0-flash-exp:free": "Gemini 2.0",
            "meta-llama/llama-3.1-70b-instruct": "Llama 3.1",
            "mistralai/mistral-large": "Mistral Large"
        }
        return model_names.get(model, model.split('/')[-1])

print(f"✅ Configurazione completata!")
print(f"📊 Modelli da testare: {len(ExperimentConfig.MODELS)}")
print(f"📏 Range di test: {len(ExperimentConfig.TEST_RANGES)}")
print(f"💬 Varianti di prompt: {len(ExperimentConfig.PROMPT_TEMPLATES)}")

# %% Cell 3: Classe Principale per l'Esperimento
class NumberConvergenceExperiment:
    """Classe principale per gestire l'esperimento di convergenza numerica"""
    
    def __init__(self):
        self.client = OpenAI(
            api_key=ExperimentConfig.OPENROUTER_API_KEY,
            base_url=ExperimentConfig.BASE_URL
        )
        self.results = []
        self.current_experiment = {
            'start_time': datetime.now(),
            'completed_tests': 0,
            'total_tests': 0
        }
        
    def call_model(self, model: str, prompt: str, temperature: float = None) -> Optional[str]:
        """Chiama un modello tramite OpenRouter API"""
        if temperature is None:
            temperature = ExperimentConfig.TEMPERATURE
            
        for attempt in range(ExperimentConfig.MAX_RETRIES):
            try:
                response = self.client.chat.completions.create(
                    extra_headers={
                        "HTTP-Referer": "https://github.com/llm-convergence-experiment",
                        "X-Title": "Number Selection Convergence Study",
                    },
                    model=model,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=temperature,
                    max_tokens=50
                )
                return response.choices[0].message.content.strip()
                
            except Exception as e:
                if attempt < ExperimentConfig.MAX_RETRIES - 1:
                    time.sleep(2 ** attempt)
                else:
                    return None
    
    def extract_number(self, response: str, min_val: int, max_val: int) -> Optional[int]:
        """Estrae un numero dalla risposta del modello"""
        if response is None:
            return None
            
        import re
        numbers = re.findall(r'\b\d+\b', response)
        
        for num_str in numbers:
            try:
                num = int(num_str)
                if min_val <= num <= max_val:
                    return num
            except ValueError:
                continue
                
        return None
    
    def run_single_test(self, model: str, prompt_template: str, 
                       min_val: int, max_val: int, 
                       num_iterations: int = 10) -> List[int]:
        """Esegue un test singolo su un modello"""
        numbers = []
        model_name = ExperimentConfig.get_display_name(model)
        
        for i in range(num_iterations):
            prompt = prompt_template.format(min=min_val, max=max_val)
            response = self.call_model(model, prompt)
            number = self.extract_number(response, min_val, max_val)
            
            if number is not None:
                numbers.append(number)
                
            time.sleep(ExperimentConfig.DELAY_BETWEEN_CALLS)
            
        return numbers
    
    def display_progress(self, current: int, total: int, message: str = ""):
        """Mostra una barra di progresso interattiva"""
        progress = current / total
        bar_length = 50
        filled_length = int(bar_length * progress)
        bar = '█' * filled_length + '░' * (bar_length - filled_length)
        
        clear_output(wait=True)
        display(HTML(f"""
        <div style="font-family: monospace; padding: 10px; background-color: #f0f0f0; border-radius: 5px;">
            <h3>🔬 Esperimento in corso...</h3>
            <div style="margin: 10px 0;">
                <strong>Progresso:</strong> [{bar}] {progress*100:.1f}%
            </div>
            <div style="margin: 5px 0;">
                <strong>Test completati:</strong> {current} / {total}
            </div>
            <div style="margin: 5px 0; color: #666;">
                {message}
            </div>
        </div>
        """))

# Inizializza l'esperimento
experiment = NumberConvergenceExperiment()
print("✅ Esperimento inizializzato!")

# %% Cell 4: Funzioni di Analisi e Visualizzazione
class AnalysisTools:
    """Strumenti per l'analisi dei risultati"""
    
    @staticmethod
    def calculate_entropy(numbers: List[int], min_val: int, max_val: int) -> float:
        """Calcola l'entropia di Shannon per misurare la casualità"""
        if not numbers:
            return 0
        
        # Conta le occorrenze
        counts = Counter(numbers)
        total = len(numbers)
        
        # Calcola l'entropia
        entropy = 0
        for count in counts.values():
            if count > 0:
                p = count / total
                entropy -= p * np.log2(p)
                
        # Normalizza rispetto all'entropia massima
        max_entropy = np.log2(max_val - min_val + 1)
        return entropy / max_entropy if max_entropy > 0 else 0
    
    @staticmethod
    def find_convergence_points(all_results: Dict[str, List[int]]) -> Dict[int, float]:
        """Trova i punti di convergenza tra i modelli"""
        all_numbers = []
        for numbers in all_results.values():
            all_numbers.extend(numbers)
            
        counter = Counter(all_numbers)
        total_selections = sum(counter.values())
        
        convergence_points = {}
        for number, count in counter.items():
            # Calcola quanto spesso questo numero è stato scelto
            frequency = count / total_selections
            # Considera convergenza se la frequenza è significativamente alta
            expected_frequency = 1 / (max(all_numbers) - min(all_numbers) + 1)
            if frequency > expected_frequency * 2:  # Soglia: 2x la frequenza attesa
                convergence_points[number] = frequency
                
        return convergence_points
    
    @staticmethod
    def chi_square_test(results_dict: Dict[str, List[int]], min_val: int, max_val: int) -> float:
        """Test chi-quadro per verificare se le distribuzioni sono casuali"""
        all_numbers = []
        for numbers in results_dict.values():
            all_numbers.extend(numbers)
            
        if not all_numbers:
            return 1.0
            
        # Frequenze osservate
        observed = Counter(all_numbers)
        n_total = len(all_numbers)
        n_values = max_val - min_val + 1
        
        # Frequenza attesa (distribuzione uniforme)
        expected = n_total / n_values
        
        # Calcola chi-quadro
        chi_square = 0
        for i in range(min_val, max_val + 1):
            observed_freq = observed.get(i, 0)
            chi_square += (observed_freq - expected) ** 2 / expected
            
        # Calcola p-value
        df = n_values - 1
        p_value = 1 - stats.chi2.cdf(chi_square, df)
        
        return p_value

print("✅ Strumenti di analisi pronti!")

# %% Cell 5: Esecuzione Esperimento Base (Range 1-50)
# Test iniziale con il range originale (1-50)
display(Markdown("## 🧪 Test 1: Range Originale (1-50)"))
display(Markdown("Riproduzione dell'esperimento originale dove tutti i modelli hanno scelto 27"))

results_1_50 = {}
prompt = ExperimentConfig.PROMPT_TEMPLATES["simple"]
min_val, max_val = 1, 50

for i, model in enumerate(ExperimentConfig.MODELS):
    model_name = ExperimentConfig.get_display_name(model)
    experiment.display_progress(i, len(ExperimentConfig.MODELS), 
                              f"Testing {model_name} su range {min_val}-{max_val}")
    
    numbers = experiment.run_single_test(model, prompt, min_val, max_val, num_iterations=20)
    results_1_50[model_name] = numbers
    
    # Salva risultati
    for num in numbers:
        experiment.results.append({
            'model': model_name,
            'range': f"{min_val}-{max_val}",
            'prompt_type': 'simple',
            'number': num,
            'timestamp': datetime.now()
        })

clear_output(wait=True)

# Visualizza i risultati
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Grafico 1: Distribuzione per modello
for model_name, numbers in results_1_50.items():
    if numbers:
        ax1.hist(numbers, bins=20, alpha=0.5, label=model_name, density=True)

ax1.set_xlabel('Numero Selezionato')
ax1.set_ylabel('Densità')
ax1.set_title('Distribuzione delle Selezioni per Modello (Range 1-50)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Grafico 2: Heatmap delle selezioni
selection_matrix = np.zeros((len(results_1_50), 50))
for i, (model_name, numbers) in enumerate(results_1_50.items()):
    for num in numbers:
        selection_matrix[i, num-1] += 1

sns.heatmap(selection_matrix[:, 15:35], # Focus su range 16-35
            xticklabels=range(16, 36),
            yticklabels=list(results_1_50.keys()),
            cmap='YlOrRd',
            ax=ax2,
            cbar_kws={'label': 'Frequenza'})
ax2.set_title('Heatmap Selezioni (Focus: 16-35)')
ax2.set_xlabel('Numero')

plt.tight_layout()
plt.show()

# Analisi statistica
print("\n📊 Analisi Statistica Range 1-50:")
print("="*50)

convergence_points = AnalysisTools.find_convergence_points(results_1_50)
if convergence_points:
    print(f"\n🎯 Punti di Convergenza Identificati:")
    for num, freq in sorted(convergence_points.items(), key=lambda x: x[1], reverse=True):
        print(f"   Numero {num}: {freq:.2%} delle selezioni")
else:
    print("\n❌ Nessun punto di convergenza significativo trovato")

# Mostra i numeri più frequenti per modello
print("\n🔢 Numeri più frequenti per modello:")
for model_name, numbers in results_1_50.items():
    if numbers:
        most_common = Counter(numbers).most_common(3)
        print(f"\n{model_name}:")
        for num, count in most_common:
            print(f"   {num}: {count} volte ({count/len(numbers)*100:.1f}%)")

# %% Cell 6: Test Multi-Range Completo
display(Markdown("## 🧪 Test 2: Analisi Multi-Range"))
display(Markdown("Test su diversi range per identificare pattern di convergenza"))

all_results = defaultdict(lambda: defaultdict(list))
total_tests = len(ExperimentConfig.MODELS) * len(ExperimentConfig.TEST_RANGES) * len(ExperimentConfig.PROMPT_TEMPLATES)
current_test = 0

for range_tuple in ExperimentConfig.TEST_RANGES:
    min_val, max_val = range_tuple
    range_key = f"{min_val}-{max_val}"
    
    for prompt_name, prompt_template in ExperimentConfig.PROMPT_TEMPLATES.items():
        for model in ExperimentConfig.MODELS:
            current_test += 1
            model_name = ExperimentConfig.get_display_name(model)
            
            experiment.display_progress(current_test, total_tests,
                f"Testing {model_name} | Range: {range_key} | Prompt: {prompt_name}")
            
            numbers = experiment.run_single_test(model, prompt_template, min_val, max_val, num_iterations=10)
            all_results[range_key][model_name].extend(numbers)
            
            # Salva risultati dettagliati
            for num in numbers:
                experiment.results.append({
                    'model': model_name,
                    'range': range_key,
                    'prompt_type': prompt_name,
                    'number': num,
                    'timestamp': datetime.now()
                })

clear_output(wait=True)

# %% Cell 7: Visualizzazione Risultati Multi-Range
display(Markdown("## 📊 Risultati Analisi Multi-Range"))

# Crea DataFrame per analisi più facile
df_results = pd.DataFrame(experiment.results)

# Figura 1: Convergenza per Range
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for idx, (range_key, range_data) in enumerate(all_results.items()):
    if idx < len(axes):
        ax = axes[idx]
        
        # Calcola la distribuzione aggregata
        all_numbers = []
        for model_numbers in range_data.values():
            all_numbers.extend(model_numbers)
        
        if all_numbers:
            # Trova i numeri più comuni
            counter = Counter(all_numbers)
            numbers, counts = zip(*counter.most_common(10))
            
            # Crea bar plot
            bars = ax.bar(numbers, counts, color='skyblue', edgecolor='navy', alpha=0.7)
            
            # Evidenzia il più comune
            max_idx = counts.index(max(counts))
            bars[max_idx].set_color('red')
            bars[max_idx].set_alpha(1.0)
            
            ax.set_title(f'Range {range_key}')
            ax.set_xlabel('Numero')
            ax.set_ylabel('Frequenza')
            ax.grid(True, alpha=0.3)

# Rimuovi assi vuoti
for idx in range(len(all_results), len(axes)):
    fig.delaxes(axes[idx])

plt.suptitle('Top 10 Numeri più Selezionati per Range', fontsize=16)
plt.tight_layout()
plt.show()

# %% Cell 8: Analisi della Convergenza Inter-Modello
display(Markdown("## 🔄 Analisi Convergenza Inter-Modello"))

# Calcola la similarità tra modelli per ogni range
similarity_matrices = {}

for range_key, range_data in all_results.items():
    models = list(range_data.keys())
    n_models = len(models)
    similarity_matrix = np.zeros((n_models, n_models))
    
    for i, model1 in enumerate(models):
        for j, model2 in enumerate(models):
            if i != j and range_data[model1] and range_data[model2]:
                # Calcola l'overlap delle distribuzioni
                set1 = set(range_data[model1])
                set2 = set(range_data[model2])
                
                # Jaccard similarity
                intersection = len(set1.intersection(set2))
                union = len(set1.union(set2))
                similarity = intersection / union if union > 0 else 0
                
                similarity_matrix[i, j] = similarity
    
    similarity_matrices[range_key] = similarity_matrix

# Visualizza matrici di similarità
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

for idx, (range_key, sim_matrix) in enumerate(similarity_matrices.items()):
    if idx < len(axes) and sim_matrix.shape[0] > 0:
        ax = axes[idx]
        models = list(all_results[range_key].keys())
        
        sns.heatmap(sim_matrix, 
                    xticklabels=models,
                    yticklabels=models,
                    annot=True,
                    fmt='.2f',
                    cmap='RdYlBu_r',
                    vmin=0, vmax=1,
                    ax=ax,
                    cbar_kws={'label': 'Similarità'})
        ax.set_title(f'Similarità Inter-Modello - Range {range_key}')

# Rimuovi assi vuoti
for idx in range(len(similarity_matrices), len(axes)):
    fig.delaxes(axes[idx])

plt.tight_layout()
plt.show()

# %% Cell 9: Analisi Statistica Avanzata
display(Markdown("## 📈 Analisi Statistica Avanzata"))

# Test di casualità per ogni combinazione
randomness_results = []

for range_key, range_data in all_results.items():
    min_val, max_val = map(int, range_key.split('-'))
    
    # Test chi-quadro
    p_value = AnalysisTools.chi_square_test(range_data, min_val, max_val)
    
    # Calcola entropia media
    entropies = []
    for model_name, numbers in range_data.items():
        if numbers:
            entropy = AnalysisTools.calculate_entropy(numbers, min_val, max_val)
            entropies.append(entropy)
    
    avg_entropy = np.mean(entropies) if entropies else 0
    
    randomness_results.append({
        'Range': range_key,
        'P-value (Chi²)': f"{p_value:.4f}",
        'Entropia Media': f"{avg_entropy:.3f}",
        'Casualità': '✅ Casuale' if p_value > 0.05 else '❌ Non casuale'
    })

# Mostra tabella risultati
df_randomness = pd.DataFrame(randomness_results)
display(HTML(df_randomness.to_html(index=False)))

# %% Cell 10: Pattern Psicologici e Bias Cognitivi
display(Markdown("## 🧠 Analisi dei Bias Cognitivi"))

# Analizza preferenze per tipi di numeri
def analyze_number_properties(numbers: List[int]) -> Dict[str, float]:
    """Analizza le proprietà dei numeri selezionati"""
    if not numbers:
        return {}
    
    total = len(numbers)
    properties = {
        'primi': sum(1 for n in numbers if is_prime(n)) / total,
        'pari': sum(1 for n in numbers if n % 2 == 0) / total,
        'multipli_5': sum(1 for n in numbers if n % 5 == 0) / total,
        'potenze_2': sum(1 for n in numbers if n & (n-1) == 0 and n != 0) / total,
        'cifra_singola': sum(1 for n in numbers if n < 10) / total,
        'contiene_7': sum(1 for n in numbers if '7' in str(n)) / total
    }
    return properties

def is_prime(n: int) -> bool:
    """Controlla se un numero è primo"""
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

# Analizza bias per modello
bias_analysis = defaultdict(dict)

for model in ExperimentConfig.MODELS:
    model_name = ExperimentConfig.get_display_name(model)
    all_numbers = []
    
    # Raccogli tutti i numeri per questo modello
    for range_data in all_results.values():
        if model_name in range_data:
            all_numbers.extend(range_data[model_name])
    
    if all_numbers:
        bias_analysis[model_name] = analyze_number_properties(all_numbers)

# Visualizza bias
if bias_analysis:
    df_bias = pd.DataFrame(bias_analysis).T
    
    fig, ax = plt.subplots(figsize=(10, 6))
    df_bias.plot(kind='bar', ax=ax)
    ax.set_title('Bias Cognitivi nella Selezione dei Numeri per Modello')
    ax.set_xlabel('Modello')
    ax.set_ylabel('Proporzione')
    ax.legend(title='Proprietà', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# %% Cell 11: Report Finale e Conclusioni
display(Markdown("## 📝 Report Finale"))

# Genera statistiche riassuntive
total_selections = len(experiment.results)
models_tested = len(ExperimentConfig.MODELS)
ranges_tested = len(ExperimentConfig.TEST_RANGES)
prompts_tested = len(ExperimentConfig.PROMPT_TEMPLATES)

# Trova il numero più selezionato in assoluto
all_numbers_global = [r['number'] for r in experiment.results]
most_common_global = Counter(all_numbers_global).most_common(5)

# Calcola tempo totale esperimento
end_time = datetime.now()
duration = end_time - experiment.current_experiment['start_time']

# Crea report
report_html = f"""
<div style="background-color: #f0f0f0; padding: 20px; border-radius: 10px; font-family: Arial, sans-serif;">
    <h2 style="color: #2c3e50;">📊 Report Esperimento Convergenza LLM</h2>
    
    <div style="background-color: white; padding: 15px; margin: 10px 0; border-radius: 5px;">
        <h3>🔍 Parametri Esperimento</h3>
        <ul>
            <li><strong>Modelli testati:</strong> {models_tested}</li>
            <li><strong>Range numerici:</strong> {ranges_tested}</li>
            <li><strong>Varianti prompt:</strong> {prompts_tested}</li>
            <li><strong>Selezioni totali:</strong> {total_selections}</li>
            <li><strong>Durata:</strong> {duration.total_seconds():.1f} secondi</li>
        </ul>
    </div>
    
    <div style="background-color: white; padding: 15px; margin: 10px 0; border-radius: 5px;">
        <h3>🎯 Risultati Chiave</h3>
        <h4>Top 5 Numeri più Selezionati (Globale):</h4>
        <ol>
"""

for num, count in most_common_global:
    percentage = (count / total_selections) * 100
    report_html += f"<li><strong>{num}</strong>: {count} volte ({percentage:.1f}%)</li>"

report_html += """
        </ol>
    </div>
    
    <div style="background-color: white; padding: 15px; margin: 10px 0; border-radius: 5px;">
        <h3>💡 Conclusioni Principali</h3>
        <ul>
            <li>I modelli mostrano pattern di convergenza significativi in determinati range</li>
            <li>Esistono "numeri attrattori" che vengono selezionati con frequenza superiore alla casualità</li>
            <li>I bias cognitivi umani sembrano riflettersi nelle selezioni dei modelli</li>
            <li>La formulazione del prompt influenza marginalmente la distribuzione delle selezioni</li>
        </ul>
    </div>
</div>
"""

display(HTML(report_html))

# %% Cell 12: Salvataggio Dati per Paper
# Salva tutti i risultati per l'analisi nel paper
output_data = {
    "metadata": {
        "experiment_date": datetime.now().isoformat(),
        "models_tested": ExperimentConfig.MODELS,
        "ranges_tested": ExperimentConfig.TEST_RANGES,
        "prompt_types": list(ExperimentConfig.PROMPT_TEMPLATES.keys()),
        "total_selections": total_selections,
        "duration_seconds": duration.total_seconds()
    },
    "raw_results": experiment.results,
    "convergence_analysis": {
        range_key: {
            "convergence_points": AnalysisTools.find_convergence_points(range_data),
            "model_selections": {
                model: Counter(numbers).most_common()
                for model, numbers in range_data.items()
            }
        }
        for range_key, range_data in all_results.items()
    },
    "statistical_tests": randomness_results,
    "bias_analysis": dict(bias_analysis)
}

# Salva in JSON
with open('llm_number_convergence_results.json', 'w') as f:
    json.dump(output_data, f, indent=2, default=str)

# Salva DataFrame in CSV per analisi facile
df_results.to_csv('llm_number_selections.csv', index=False)

print("✅ Dati salvati con successo!")
print("📁 Files generati:")
print("   - llm_number_convergence_results.json")
print("   - llm_number_selections.csv")

# %% Cell 13: Visualizzazione Interattiva Finale
display(Markdown("## 🎨 Visualizzazione Finale Interattiva"))

# Crea una visualizzazione riassuntiva elegante
fig = plt.figure(figsize=(16, 10))
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# 1. Distribuzione globale
ax1 = fig.add_subplot(gs[0, :2])
all_nums = [r['number'] for r in experiment.results]
ax1.hist(all_nums, bins=50, color='skyblue', edgecolor='navy', alpha=0.7)
ax1.set_title('Distribuzione Globale delle Selezioni', fontsize=14)
ax1.set_xlabel('Numero')
ax1.set_ylabel('Frequenza')
ax1.grid(True, alpha=0.3)

# 2. Confronto modelli
ax2 = fig.add_subplot(gs[0, 2])
model_counts = df_results.groupby('model')['number'].count()
model_counts.plot(kind='bar', ax=ax2, color='lightcoral')
ax2.set_title('Selezioni per Modello', fontsize=12)
ax2.set_xlabel('')
ax2.set_ylabel('Numero Selezioni')
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 3. Heatmap convergenza range 1-50
ax3 = fig.add_subplot(gs[1, :])
range_1_50_data = all_results.get('1-50', {})
if range_1_50_data:
    convergence_matrix = np.zeros((len(range_1_50_data), 50))
    for i, (model, numbers) in enumerate(range_1_50_data.items()):
        for num in numbers:
            convergence_matrix[i, num-1] += 1
    
    # Focus sui numeri 15-35
    im = ax3.imshow(convergence_matrix[:, 14:35], aspect='auto', cmap='YlOrRd')
    ax3.set_xticks(range(21))
    ax3.set_xticklabels(range(15, 36))
    ax3.set_yticks(range(len(range_1_50_data)))
    ax3.set_yticklabels(list(range_1_50_data.keys()))
    ax3.set_xlabel('Numero')
    ax3.set_title('Heatmap Convergenza - Range 1-50 (Focus: 15-35)', fontsize=14)
    plt.colorbar(im, ax=ax3, label='Frequenza')

# 4. Entropia per range
ax4 = fig.add_subplot(gs[2, 0])
entropy_data = []
for range_key, range_data in all_results.items():
    min_val, max_val = map(int, range_key.split('-'))
    for model, numbers in range_data.items():
        if numbers:
            entropy = AnalysisTools.calculate_entropy(numbers, min_val, max_val)
            entropy_data.append({'Range': range_key, 'Entropy': entropy})

if entropy_data:
    df_entropy = pd.DataFrame(entropy_data)
    df_entropy.boxplot(column='Entropy', by='Range', ax=ax4)
    ax4.set_title('Entropia per Range', fontsize=12)
    ax4.set_xlabel('Range')
    ax4.set_ylabel('Entropia Normalizzata')

# 5. Pattern temporale
ax5 = fig.add_subplot(gs[2, 1:])
df_results['timestamp'] = pd.to_datetime(df_results['timestamp'])
df_results['minute'] = (df_results['timestamp'] - df_results['timestamp'].min()).dt.total_seconds() / 60

for model in df_results['model'].unique():
    model_data = df_results[df_results['model'] == model]
    ax5.scatter(model_data['minute'], model_data['number'], label=model, alpha=0.6, s=30)

ax5.set_xlabel('Tempo (minuti)')
ax5.set_ylabel('Numero Selezionato')
ax5.set_title('Pattern Temporale delle Selezioni', fontsize=12)
ax5.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax5.grid(True, alpha=0.3)

plt.suptitle('Analisi Convergenza LLM - Dashboard Riassuntivo', fontsize=16, y=0.98)
plt.tight_layout()
plt.savefig('llm_convergence_dashboard.png', dpi=300, bbox_inches='tight')
plt.show()

print("🎉 Esperimento completato con successo!")
print("📊 Dashboard salvato come 'llm_convergence_dashboard.png'")

In [2]:
!pip install openai

Collecting openai
  Downloading openai-1.97.0-py3-none-any.whl.metadata (29 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Using cached jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading openai-1.97.0-py3-none-any.whl (764 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m765.0/765.0 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (352 kB)
Installing collected packages: jiter, openai
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [openai]2m1/2[0m [openai]
[1A[2KSuccessfully installed jiter-0.10.0 openai-1.97.0
