In [9]:
import matplotlib.pyplot as plt
import numpy as np
from tabulate import tabulate

In [10]:
class LLMComparator:
    def __init__(self):
        # Dados das tr√™s fontes diferentes
        self.data_sources = {
            'DeepSeek Source': {
                'ChatGPT (GPT-4)': {
                    'Overall Score': 85,
                    'IFEval': 85,
                    'BBH': 87,
                    'MATH': 70,
                    'GPQA': 80,
                    'MuSR': 80,
                    'MMLU-Pro': 85,
                    'CO2_Emissions': 4.3  # g por consulta
                },
                'DeepSeek-V2': {
                    'Overall Score': 78,
                    'IFEval': 80,
                    'BBH': 80,
                    'MATH': 75,
                    'GPQA': 70,
                    'MuSR': 75,
                    'MMLU-Pro': None,  # N√£o divulgado
                    'CO2_Emissions': 1.8  # g por consulta
                }
            },
            'ChatGPT Source': {
                'ChatGPT (GPT-4)': {
                    'Overall Score': 85,
                    'IFEval': 90,
                    'BBH': 87,
                    'MATH': 80,
                    'GPQA': 88,
                    'MuSR': 86,
                    'MMLU-Pro': 92,
                    'CO2_Emissions': 450  # g por resposta (0.45kg)
                },
                'DeepSeek-V2': {
                    'Overall Score': 80,
                    'IFEval': 80,
                    'BBH': 82,
                    'MATH': 75,
                    'GPQA': 85,
                    'MuSR': 78,
                    'MMLU-Pro': 85,
                    'CO2_Emissions': 600  # g por resposta (0.60kg)
                }
            },
            'Gemini Source': {
                'ChatGPT (GPT-4)': {
                    'Overall Score': 90,
                    'IFEval': 90,
                    'BBH': 85,
                    'MATH': 60,
                    'GPQA': 60,
                    'MuSR': 85,
                    'MMLU-Pro': 90,
                    'CO2_Emissions': None  # N√£o divulgado
                },
                'DeepSeek-V2': {
                    'Overall Score': 80,
                    'IFEval': 85,
                    'BBH': 75,
                    'MATH': 50,
                    'GPQA': 50,
                    'MuSR': 80,
                    'MMLU-Pro': 80,
                    'CO2_Emissions': None  # N√£o divulgado
                }
            }
        }

        self.current_source = 'DeepSeek Source'
        self.criteria = [
            'Overall Score', 'IFEval', 'BBH', 'MATH',
            'GPQA', 'MuSR', 'MMLU-Pro'
        ]

    def set_data_source(self, source_name):
        """Define a fonte de dados atual"""
        if source_name in self.data_sources:
            self.current_source = source_name
            print(f"‚úÖ Fonte alterada para: {source_name}")
        else:
            print("‚ùå Fonte n√£o encontrada. Fontes dispon√≠veis: DeepSeek Source, ChatGPT Source, Gemini Source")

    def get_current_models(self):
        """Retorna os modelos da fonte atual"""
        return self.data_sources[self.current_source]

    def display_comparison_table(self):
        """Exibe tabela comparativa formatada da fonte atual"""
        models = self.get_current_models()
        table_data = []
        headers = ['Crit√©rio', 'ChatGPT (GPT-4)', 'DeepSeek-V2', 'Diferen√ßa']

        for criterion in self.criteria:
            chatgpt_score = models['ChatGPT (GPT-4)'][criterion]
            deepseek_score = models['DeepSeek-V2'][criterion]

            if chatgpt_score is None or deepseek_score is None:
                chatgpt_display = "N/D" if chatgpt_score is None else f"{chatgpt_score}%"
                deepseek_display = "N/D" if deepseek_score is None else f"{deepseek_score}%"
                difference_display = "N/D"
            else:
                chatgpt_display = f"{chatgpt_score}%"
                deepseek_display = f"{deepseek_score}%"
                difference = chatgpt_score - deepseek_score
                difference_display = f"{difference:+.1f}%"

            table_data.append([
                criterion,
                chatgpt_display,
                deepseek_display,
                difference_display
            ])

        # Adiciona emiss√µes de CO2
        chatgpt_co2 = models['ChatGPT (GPT-4)']['CO2_Emissions']
        deepseek_co2 = models['DeepSeek-V2']['CO2_Emissions']

        if chatgpt_co2 is None or deepseek_co2 is None:
            co2_difference = "N/D"
            co2_reduction = "N/D"
        else:
            # Converter para a mesma unidade (gramas) para compara√ß√£o
            chatgpt_g = chatgpt_co2 if chatgpt_co2 < 100 else chatgpt_co2  # j√° est√° em gramas ou precisa converter?
            deepseek_g = deepseek_co2 if deepseek_co2 < 100 else deepseek_co2

            if self.current_source == 'ChatGPT Source':
                # Nesta fonte, os valores est√£o em g por resposta (450g, 600g)
                co2_difference = f"{deepseek_g - chatgpt_g:+.1f}g"
                co2_reduction = f"{(chatgpt_g - deepseek_g) / chatgpt_g * 100:.1f}%"
            else:
                # DeepSeek Source: valores em g por consulta
                co2_difference = f"{deepseek_g - chatgpt_g:+.1f}g"
                co2_reduction = f"{(chatgpt_g - deepseek_g) / chatgpt_g * 100:.1f}%"

        co2_unit = "g/consulta" if self.current_source == 'DeepSeek Source' else "g/resposta"

        table_data.append([
            f'CO2 Emissions ({co2_unit})',
            f"{chatgpt_co2 if chatgpt_co2 else 'N/D'}",
            f"{deepseek_co2 if deepseek_co2 else 'N/D'}",
            co2_reduction if co2_reduction != "N/D" else "N/D"
        ])

        print(f"üîç COMPARA√á√ÉO: {self.current_source}")
        print("=" * 70)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))
        print("\n" + "=" * 70)

    def display_all_sources_comparison(self):
        """Exibe compara√ß√£o entre todas as fontes de dados"""
        print("üìä COMPARA√á√ÉO ENTRE TODAS AS FONTES DE DADOS")
        print("=" * 90)

        all_data = []
        headers = ['Crit√©rio', 'DeepSeek Source', 'ChatGPT Source', 'Gemini Source', 'Varia√ß√£o Max']

        for criterion in self.criteria + ['CO2_Emissions']:
            row = [criterion]
            values = []

            for source_name in ['DeepSeek Source', 'ChatGPT Source', 'Gemini Source']:
                models = self.data_sources[source_name]
                chatgpt_val = models['ChatGPT (GPT-4)'][criterion]
                deepseek_val = models['DeepSeek-V2'][criterion]

                if criterion == 'CO2_Emissions':
                    if chatgpt_val is None or deepseek_val is None:
                        display_val = "ChatGPT: N/D\nDeepSeek: N/D"
                    else:
                        unit = "g/consulta" if source_name == 'DeepSeek Source' else "g/resposta"
                        display_val = f"ChatGPT: {chatgpt_val}{unit}\nDeepSeek: {deepseek_val}{unit}"
                else:
                    if chatgpt_val is None or deepseek_val is None:
                        display_val = "ChatGPT: N/D\nDeepSeek: N/D"
                    else:
                        display_val = f"ChatGPT: {chatgpt_val}%\nDeepSeek: {deepseek_val}%"

                row.append(display_val)
                # Modified condition to ensure both values are not None before adding to 'values' for comparison
                if chatgpt_val is not None and deepseek_val is not None and criterion != 'CO2_Emissions':
                    values.extend([chatgpt_val, deepseek_val])

            # Calcular varia√ß√£o m√°xima
            if values and criterion != 'CO2_Emissions':
                variation = max(values) - min(values)
                row.append(f"{variation:.1f}%")
            else:
                row.append("N/D")

            all_data.append(row)

        print(tabulate(all_data, headers=headers, tablefmt='grid'))
        print("\n" + "=" * 90)

    def create_radar_chart(self):
        """Cria gr√°fico radar para compara√ß√£o visual da fonte atual"""
        models = self.get_current_models()
        # Use criteria excluding 'MMLU-Pro' as the base for the radar chart
        criteria_base = self.criteria[:-1]

        # Filter criteria where BOTH models have non-None data
        criteria_to_plot = []
        chatgpt_scores_filtered = []
        deepseek_scores_filtered = []

        for c in criteria_base:
            chatgpt_val = models['ChatGPT (GPT-4)'][c]
            deepseek_val = models['DeepSeek-V2'][c]
            if chatgpt_val is not None and deepseek_val is not None:
                criteria_to_plot.append(c)
                chatgpt_scores_filtered.append(chatgpt_val)
                deepseek_scores_filtered.append(deepseek_val)

        if not criteria_to_plot:
            print("‚ùå Dados insuficientes para criar o gr√°fico radar")
            return

        # Prepare angles and data for radar plot (closing the loop)
        num_criteria = len(criteria_to_plot)
        # Generate angles for N criteria, ending before 2*pi
        angles = np.linspace(0, 2*np.pi, num_criteria, endpoint=False).tolist()

        # Add the first point to the end of angles and scores to close the circular plot
        angles_closed = angles + [angles[0]]
        chatgpt_scores_closed = chatgpt_scores_filtered + [chatgpt_scores_filtered[0]]
        deepseek_scores_closed = deepseek_scores_filtered + [deepseek_scores_filtered[0]]

        fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(projection='polar'))

        # Plot using the closed lists
        ax.plot(angles_closed, chatgpt_scores_closed, 'o-', linewidth=2, label='ChatGPT (GPT-4)', color='#10a37f')
        ax.fill(angles_closed, chatgpt_scores_closed, alpha=0.25, color='#10a37f')

        ax.plot(angles_closed, deepseek_scores_closed, 'o-', linewidth=2, label='DeepSeek-V2', color='#ff6b35')
        ax.fill(angles_closed, deepseek_scores_closed, alpha=0.25, color='#ff6b35')

        # Use original angles and criteria for thetagrids
        ax.set_thetagrids(np.degrees(angles), criteria_to_plot)
        ax.set_ylim(0, 100)
        ax.set_yticks([20, 40, 60, 80, 100])
        ax.grid(True)

        plt.title(f'Compara√ß√£o de Desempenho: {self.current_source}\n', size=14, fontweight='bold')
        plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
        plt.tight_layout()
        plt.show()

    def create_bar_chart(self):
        """Cria gr√°fico de barras para compara√ß√£o lado a lado da fonte atual"""
        models = self.get_current_models()
        criteria = [c for c in self.criteria[:-1] if models['ChatGPT (GPT-4)'][c] is not None and models['DeepSeek-V2'][c] is not None]

        if not criteria:
            print("‚ùå Dados insuficientes para criar o gr√°fico de barras")
            return

        chatgpt_scores = [models['ChatGPT (GPT-4)'][c] for c in criteria]
        deepseek_scores = [models['DeepSeek-V2'][c] for c in criteria]

        x = np.arange(len(criteria))
        width = 0.35

        fig, ax = plt.subplots(figsize=(12, 6))

        bars1 = ax.bar(x - width/2, chatgpt_scores, width, label='ChatGPT (GPT-4)', color='#10a37f', alpha=0.8)
        bars2 = ax.bar(x + width/2, deepseek_scores, width, label='DeepSeek-V2', color='#ff6b35', alpha=0.8)

        ax.set_xlabel('Crit√©rios de Avalia√ß√£o')
        ax.set_ylabel('Pontua√ß√£o (%)')
        ax.set_title(f'Compara√ß√£o Detalhada por Crit√©rio - {self.current_source}')
        ax.set_xticks(x)
        ax.set_xticklabels(criteria, rotation=45, ha='right')
        ax.legend()
        ax.set_ylim(0, 100)

        # Adicionar valores nas barras
        for bar in bars1:
            height = bar.get_height()
            ax.annotate(f'{height}%',
                       xy=(bar.get_x() + bar.get_width() / 2, height),
                       xytext=(0, 3),
                       textcoords="offset points",
                       ha='center', va='bottom')

        for bar in bars2:
            height = bar.get_height()
            ax.annotate(f'{height}%',
                       xy=(bar.get_x() + bar.get_width() / 2, height),
                       xytext=(0, 3),
                       textcoords="offset points",
                       ha='center', va='bottom')

        plt.tight_layout()
        plt.show()

    def environmental_impact_comparison(self):
        """Mostra compara√ß√£o de impacto ambiental da fonte atual"""
        models = self.get_current_models()
        chatgpt_co2 = models['ChatGPT (GPT-4)']['CO2_Emissions']
        deepseek_co2 = models['DeepSeek-V2']['CO2_Emissions']

        if chatgpt_co2 is None or deepseek_co2 is None:
            print("üå± DADOS DE EMISS√ïES N√ÉO DISPON√çVEIS PARA ESTA FONTE")
            return

        unit = "g por consulta" if self.current_source == 'DeepSeek Source' else "g por resposta"

        reduction = ((chatgpt_co2 - deepseek_co2) / chatgpt_co2) * 100
        efficiency_ratio = chatgpt_co2 / deepseek_co2

        print("üå± AN√ÅLISE DE IMPACTO AMBIENTAL")
        print("=" * 50)
        print(f"ChatGPT (GPT-4): {chatgpt_co2}{unit}")
        print(f"DeepSeek-V2: {deepseek_co2}{unit}")
        print(f"Redu√ß√£o: {reduction:.1f}% menos emiss√µes")
        print(f"Efici√™ncia: DeepSeek √© {efficiency_ratio:.1f}x mais eficiente")
        print("\nüí° Interpreta√ß√£o:")
        print("- DeepSeek emite aproximadamente 58% menos CO‚ÇÇ que ChatGPT")
        print("- Arquitetura Mixture of Experts (MoE) √© mais sustent√°vel")
        print("- Impacto significativo quando escalado para milh√µes de usu√°rios")

    def recommendation_engine(self, user_priority):
        """
        Sistema de recomenda√ß√£o baseado na prioridade do usu√°rio
        Considera dados consolidados de todas as fontes
        """
        # Calcular m√©dias de todas as fontes dispon√≠veis
        chatgpt_scores = []
        deepseek_scores = []

        for source_name, models in self.data_sources.items():
            for criterion in self.criteria:
                chatgpt_val = models['ChatGPT (GPT-4)'][criterion]
                deepseek_val = models['DeepSeek-V2'][criterion]

                if chatgpt_val is not None:
                    chatgpt_scores.append(chatgpt_val)
                if deepseek_val is not None:
                    deepseek_scores.append(deepseek_val)

        avg_chatgpt = np.mean(chatgpt_scores) if chatgpt_scores else 0
        avg_deepseek = np.mean(deepseek_scores) if deepseek_scores else 0

        # Updated priorities based on user request
        priorities = {
            'logica': ['DeepSeek-V2', 'Superior em tarefas matem√°ticas e l√≥gicas'],
            'texto': ['ChatGPT (GPT-4)', 'Melhor para gera√ß√£o e compreens√£o de texto']
        }

        if user_priority in priorities:
            recommendation, reason = priorities[user_priority]
            print(f"\nüéØ RECOMENDA√á√ÉO PARA PRIORIDADE: {user_priority.upper()}")
            print("=" * 60)
            print(f"Modelo Recomendado: {recommendation}")
            print(f"Motivo: {reason}")

            if user_priority == 'logica':
                print(f"\nüìä Performance M√©dia: DeepSeek {avg_deepseek:.1f}% vs ChatGPT {avg_chatgpt:.1f}%")
                print("‚úÖ Vantagem: Sustent√°vel e econ√¥mico, com desempenho competitivo")
            elif user_priority == 'texto':
                print(f"\nüìä Performance M√©dia: ChatGPT {avg_chatgpt:.1f}% vs DeepSeek {avg_deepseek:.1f}%")
                print("‚úÖ Vantagem: Flu√™ncia textual superior e coer√™ncia contextual")
        else:
            print("Prioridade n√£o reconhecida. Use: logica, texto")

In [11]:
def main():
    comparator = LLMComparator()

    while True:
        print("\n" + "="*70)
        print("ü§ñ COMPARADOR AVAN√áADO DE LLMs: M√∫ltiplas Fontes de Dados")
        print("="*70)
        print("1. üìä Ver Tabela Comparativa (Fonte Atual)")
        print("2. üåê Compara√ß√£o entre Todas as Fontes")
        print("3. üìä Gr√°fico de Barras")
        print("4. üå± An√°lise de Impacto Ambiental")
        print("5. üí° Sistema de Recomenda√ß√£o")
        print("6. üö™ Sair")
        print("="*70)
        print(f"Fonte atual: {comparator.current_source}")

        choice = input("\nEscolha uma op√ß√£o (1-6): ").strip()

        if choice == '1':
            comparator.display_comparison_table()
        elif choice == '2':
            comparator.display_all_sources_comparison()
        elif choice == '3':
            comparator.create_bar_chart()
        elif choice == '4':
            comparator.environmental_impact_comparison()
        elif choice == '5':
            print("\nPrioridades dispon√≠veis: logica, texto")
            priority = input("Digite sua prioridade principal: ").strip().lower()
            comparator.recommendation_engine(priority)
        elif choice == '6':
            print("Obrigado por usar o comparador avan√ßado! üëã")
            break
        else:
            print("‚ùå Op√ß√£o inv√°lida! Tente novamente.")

        input("\nPressione Enter para continuar...")

In [12]:
if __name__ == "__main__":
    # Instala√ß√£o de depend√™ncias necess√°rias (executar no terminal)
    # pip install matplotlib numpy tabulate

    main()


ü§ñ COMPARADOR AVAN√áADO DE LLMs: M√∫ltiplas Fontes de Dados
1. üìä Ver Tabela Comparativa (Fonte Atual)
2. üåê Compara√ß√£o entre Todas as Fontes
3. üìä Gr√°fico de Barras
4. üå± An√°lise de Impacto Ambiental
5. üí° Sistema de Recomenda√ß√£o
6. üö™ Sair
Fonte atual: DeepSeek Source

Escolha uma op√ß√£o (1-6): 2
üìä COMPARA√á√ÉO ENTRE TODAS AS FONTES DE DADOS
+---------------+-------------------------+-------------------------+-----------------+----------------+
| Crit√©rio      | DeepSeek Source         | ChatGPT Source          | Gemini Source   | Varia√ß√£o Max   |
| Overall Score | ChatGPT: 85%            | ChatGPT: 85%            | ChatGPT: 90%    | 12.0%          |
|               | DeepSeek: 78%           | DeepSeek: 80%           | DeepSeek: 80%   |                |
+---------------+-------------------------+-------------------------+-----------------+----------------+
| IFEval        | ChatGPT: 85%            | ChatGPT: 90%            | ChatGPT: 90%    | 10.0%        