In [0]:
%pip install pandas==1.5.3
%pip install openai
%pip install transformers torch sklearn
%pip install seaborn==0.11.2 matplotlib==3.4.3

In [0]:
dbutils.widgets.text("tweet", "")


new_tweet = dbutils.widgets.get("tweet")


In [0]:
# CARREGAMENTO DE DADOS

import pandas as pd

df_base = pd.read_csv(
    '/Workspace/Users/flavio.assis@vale.com/TCC/left_right_speeches/speeches.csv',
    sep=','
)
display(df_base.info())

In [0]:
from transformers import AutoTokenizer

# Carregar o tokenizer do modelo BERT
tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased")

# Contar os tokens de cada texto
df_base["num_tokens"] = df_base["texto"].apply(lambda x: len(tokenizer.tokenize(x)))



df = df_base[(df_base["num_tokens"] <= 256) & (df_base["num_tokens"] >= 20)] #modelo 2, troquei de 128 =[12390,7552] para 20 = [16260,11162] = 27422


In [0]:
df['label'].value_counts()

In [0]:



df["id"] = range(29940, 29940 + len(df))


# Load your DataFrame (assuming it's already loaded as df)
df_0 = df[df["label"] == 'Direita'].sample(n=4000, random_state=42) #reduzi o sample de direita
df_1 = df[df["label"] == 'Esquerda'].sample(n=6000, random_state=42)

# Concatenate the two samples
df_train = pd.concat([df_0, df_1])


df_test = df[~df.index.isin(df_train.index)]




print('train:',df_train.info())
print('test:',df_test.info())

In [0]:
display(df_test)

In [0]:
df_test['label'].value_counts()

In [0]:
df_test_spark = spark.createDataFrame(df_test)
df_train_spark = spark.createDataFrame(df_train)
df_test_spark.write.mode("overwrite").saveAsTable("db.analises_speeches_test_m2")
df_train_spark.write.mode("overwrite").saveAsTable("db.analises_speeches_train_m2")

In [0]:
%sql
CREATE OR REPLACE TABLE db.analises_speeches_results AS
SELECT 
    *, 
    length(texto) as texto_length,
    CASE 
        WHEN length(texto) BETWEEN 438 AND 650 THEN '438-650'
        WHEN length(texto) BETWEEN 651 AND 900 THEN '651-900'
        WHEN length(texto) BETWEEN 901 AND 1150 THEN '901-1150'
        WHEN length(texto) BETWEEN 1151 AND 1336 THEN '1151-1336'
        ELSE 'Out of range'
    END AS texto_length_group
FROM db.analises_speeches_results

In [0]:
df_test = spark.table("db.analises_speeches_test_m2").toPandas()
df_train = spark.table("db.analises_speeches_train_m2").toPandas()

In [0]:
print(df_test.info())
print(df_train.info())

In [0]:
import os
import numpy as np
import pickle
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from datetime import datetime
from openai import OpenAI

class SpeechLabelClassifier:
    def __init__(self, model_name='neuralmind/bert-base-portuguese-cased', 
                 examples_per_category=5, 
                 llm_model="databricks-meta-llama-3-3-70b-instruct",
                 save_path="/Workspace/Users/flavio.assis@vale.com/TCC/left_right_speeches/"):
        # Model configuration
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.examples_per_category = examples_per_category
        self.embeddings_cache = {}
        self.reference_examples = None
        self.reference_embeddings = None
        self.save_path = save_path
        
        # LLM configuration
        self.llm_client = OpenAI(
            api_key="SECRET_DB_KEY",
            base_url="URL_DB"
        )
        self.llm_model = llm_model
        # Create directory if it doesn't exist
        dbutils.fs.mkdirs(f"dbfs:{save_path}")
        # Keyword dictionary for label categories
       
        print(f"Embeddings will be saved to: {self.save_path}")
        
    def get_embedding(self, text):
        """Generates embedding for a text with proper error handling"""
        try:
            if not isinstance(text, str):
                print(f"Warning: Received non-string input: {type(text)}")
                text = str(text)
                
            
            inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True)
            
            with torch.no_grad():
                outputs = self.model(**inputs)
                embedding = outputs.last_hidden_state.mean(dim=1).numpy()[0]
                #print(f"Successfully generated embedding of shape: {embedding.shape}")
                return embedding
                
        except Exception as e:
            print(f"Error in get_embedding: {str(e)}")
            print(f"Failed text: {text}")
            raise  # Re-raise the exception to handle it in prepare_reference_data

    def save_embeddings(self, filename='speechs_embeddings.pkl'):
        """Saves embeddings and reference examples"""
        try:
            if self.reference_examples is None or self.reference_embeddings is None:
                print("No embeddings to save - reference data is None")
                return
                
            # Construct full Databricks path
            full_path = os.path.join(self.save_path, filename)
            
            data = {
                'reference_examples': self.reference_examples,
                'reference_embeddings': self.reference_embeddings
            }
            
            # Save using Databricks file system
            with open(full_path, 'wb') as f:
                pickle.dump(data, f)
            print(f"Embeddings saved successfully to {full_path}")
            
        except Exception as e:
            print(f'Error saving embeddings: {str(e)}')
            
    def load_embeddings(self, filename='speechs_embeddings.pkl'):
        """Loads saved embeddings and examples"""
        full_path = os.path.join(self.save_path, filename)
        with open(full_path, 'rb') as f:
            data = pickle.load(f)
        self.reference_examples = data['reference_examples']
        self.reference_embeddings = data['reference_embeddings']
    
    def prepare_reference_data(self, df, force_compute=True, filename='speechs_embeddings.pkl'):
        """Prepares reference data with enhanced error handling"""
        try:
            print("\nStarting prepare_reference_data...")
            print(f"Input DataFrame shape: {df.shape}")
            print(f"Columns available: {df.columns.tolist()}")
            
            full_path = os.path.join(self.save_path, filename)
            print(f"Working with path: {full_path}")
            
           
            if not force_compute and os.path.exists(full_path):
                print("Loading saved embeddings...")
                self.load_embeddings(filename)
                return self.reference_examples
            
            print("\nComputing new embeddings...")
            if df is None or len(df) == 0:
                raise ValueError("Input DataFrame is empty or None")
                
            representative_examples = []
            
            # Print unique label for debugging
            unique_label = df['label'].unique()
            print(f"Unique label values found: {unique_label}")
            
            for label in unique_label:
                print(f"\nProcessing label: {label}")
                label_df = df[df['label'] == label]
                print(f"Number of examples for label {label}: {len(label_df)}")
                
                embeddings = []
                successful_indices = []
                
                for idx, row in label_df.iterrows():
                    try:
                        texto = row['texto']
                        embedding = self.get_embedding(texto)
                        embeddings.append(embedding)
                        successful_indices.append(idx)
                    except Exception as e:
                        print(f"Failed to process row {idx}: {str(e)}")
                        continue
                
                if not embeddings:
                    print(f"Warning: No successful embeddings for label {label}")
                    continue
                    
                embeddings = np.array(embeddings)
                print(f"Generated {len(embeddings)} embeddings for label {label}")
                
                if len(embeddings) <= self.examples_per_category:
                    for idx in successful_indices:
                        representative_examples.append(label_df.loc[idx].to_dict())
                else:
                    kmeans = KMeans(n_clusters=self.examples_per_category, random_state=42)
                    kmeans.fit(embeddings)
                    
                    for centroid in kmeans.cluster_centers_:
                        distances = np.linalg.norm(embeddings - centroid, axis=1)
                        closest_idx = distances.argmin()
                        actual_idx = successful_indices[closest_idx]
                        representative_examples.append(label_df.loc[actual_idx].to_dict())
            
            print(f"\nTotal representative examples gathered: {len(representative_examples)}")
            
            if not representative_examples:
                raise ValueError("No representative examples were generated")
                
            self.reference_examples = pd.DataFrame(representative_examples)
            print(f"Created reference examples DataFrame with shape: {self.reference_examples.shape}")
            
            self.reference_embeddings = np.array([
                self.get_embedding(text) 
                for text in self.reference_examples['texto']
            ])
            print(f"Created reference embeddings array with shape: {self.reference_embeddings.shape}")
            
            # Save the computed embeddings
            self.save_embeddings(filename)
            print(f"Embeddings computation completed and saved to {full_path}")
            
        except Exception as e:
            print(f'Error in prepare_reference_data: {str(e)}')
            import traceback
            print(traceback.format_exc())
            raise
            
        return self.reference_examples
    

    #################################################################################

    def find_matching_keywords(self, text):
        """Finds matching keywords in the text for each label category"""
        matches = {}
        text_lower = text.lower()
        
        for label, keywords in self.label_keywords.items():
            label_matches = []
            for keyword in keywords:
                if keyword.lower() in text_lower:
                    label_matches.append(keyword)
            if label_matches:
                matches[label] = label_matches
                
        return matches
    
    def find_similar_examples(self, new_speech, top_k=5):
        """Finds the most similar examples to the new tweet"""
        new_embedding = self.get_embedding(new_speech)
        similarities = cosine_similarity([new_embedding], self.reference_embeddings)[0]
        top_indices = similarities.argsort()[-top_k:][::-1]
        
        similar_examples = []
        for idx in top_indices:
            similar_examples.append({
                'speech': self.reference_examples.iloc[idx]['texto'],
                'label': self.reference_examples.iloc[idx]['label'],
                'similarity': similarities[idx]
            })
            
        return similar_examples
    
    def create_classification_prompt(self, new_speech, similar_examples):
        """Creates an optimized prompt for LLM using similar examples and keywords"""
        prompt = "Você é um especialista em análise de discursos em português de políticos, para definir se a orientação política deles é de Esquerda ou Direita.\n\n"
        prompt += "Categorias disponíveis:\n"
        prompt += "Esquerda ou Direita\n\n"

        prompt += "Exemplos similares ao novo discurso:\n\n"
        for ex in similar_examples:
            prompt += f"Discurso: {ex['speech']}\n"
            prompt += f"Orientação política: {ex['label']}\n"
            prompt += f"Similaridade: {ex['similarity']:.2f}\n\n"
        
        prompt += f"Novo discurso para classificar:\n{new_speech}\n\n"
        prompt += "Instruções específicas:\n"
        prompt += "1. Analise o contexto geral do discurso\n"
        prompt += "2. Compare com os exemplos similares fornecidos\n"
        prompt += "3. Considere o que você entende de políticas de Direita e Esquerda\n\n"
        prompt += "Classifique o discurso com Direita ou Esquerda."
        prompt += "Responda apenas com categorias Direita ou Esquerda."
        
        return prompt
    
    def classify_speech(self, new_speech):
        """Classifies a new tweet using embeddings + LLM"""
        
        similar_examples = self.find_similar_examples(new_speech, top_k=5)
        
        prompt = self.create_classification_prompt(new_speech, similar_examples)
        
        
        response = self.llm_client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "Você é um especialista em análise de discursos em português de políticos, para definir se a orientação política deles é de Esquerda ou Direita"
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            model=self.llm_model,
            max_tokens=500,
            temperature=0.3
        )
        
        classification = response.choices[0].message.content.strip()
        #print(similar_examples)
        return {
            'predict_label': classification,
            'similar_examples': similar_examples
        }

In [0]:
import time
import pandas as pd

def run_speech_analysis(df_test, classifier, max_retries=6, sleep_time=600):
    # Initialize empty lists to store results
    ids = []
    speeches = []
    i = 0
   
    # Process each tweet
    for _, row in df_test.iterrows():
        retries = 0
        while retries < max_retries:
            try:
                
                result = classifier.classify_speech(row['texto'])
                
                new_row = pd.DataFrame(
                    {
                        "id": [str(row['id'])],
                        "texto": [str(row['texto'])],
                        "label": [str(row['label'])],
                        "predict_label": [str(result['predict_label'])],
                        "model": ["2.0"],
                    }
                )
                
                
                spark_new_row = spark.createDataFrame(new_row)
                
            # Step 2: Append the new row to the Delta table
                spark_new_row.write.format("delta").mode("append").saveAsTable(
                    "db.analises_speeches_results"
                )
               
               
                i += 1
                print(f"Processed {str(row['id'])} , {i}")
                break  # Exit retry loop on success
            except Exception as e:
                print(f"Error: {e}. Retrying {retries + 1}/{max_retries}...")
                print(str(row['id']))
                retries += 1
                time.sleep(sleep_time)  # Wait before retrying
        else:
            print(f"Skipping tweet ID {row['id']} after {max_retries} failed attempts.")
    
    
    
    return




In [0]:
df_results = spark.sql("SELECT id FROM db.analises_speeches_results WHERE model = '2.0'").toPandas()
df_missing_ids = df_test[~df_test['id'].isin(df_results['id'].astype(int))]


In [0]:
print('test',df_test.count())
print('missing:',df_missing_ids.count())
print('results:',df_results.count())

In [0]:
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
# Initialize classifier
classifier = SpeechLabelClassifier()

# First, prepare the reference data using training data
classifier.prepare_reference_data(df_train)  # Using your training dataset


# Run analysis
run_speech_analysis(df_missing_ids, classifier)



In [0]:
%sql
select *, predict_label = label as match from db.analises_speeches_results 

In [0]:
%sql
UPDATE db.analises_speeches_results
SET texto_length_group = 
    CASE 
        WHEN length(texto) BETWEEN 50 AND 250 THEN '50-250'
        WHEN length(texto) BETWEEN 251 AND 500 THEN '251-500'
        WHEN length(texto) BETWEEN 501 AND 750 THEN '501-750'
        WHEN length(texto) BETWEEN 751 AND 1000 THEN '751-1000'
        WHEN length(texto) BETWEEN 1001 AND 1250 THEN '1001-1250'
        WHEN length(texto) BETWEEN 1251 AND 1500 THEN '1251-1500'
        ELSE 'out_of_range'
    END
WHERE model = "2.0" AND predict_label IN ('Direita', 'Esquerda');

In [0]:
%sql
SELECT 
    texto_length_group,
    (SUM(CASE WHEN predict_label = label THEN 1 ELSE 0 END) / COUNT(*)) * 100 AS percent_true,
    COUNT(*) AS count_total,
    COUNT(CASE WHEN predict_label = label THEN 1 END) as count_true,
    (SUM(CASE WHEN predict_label = label AND label = 'Direita' THEN 1 ELSE 0 END) /
    NULLIF(COUNT(CASE WHEN label = 'Direita' THEN 1 END), 0)) * 100 AS percent_true_direita,
    COUNT(CASE WHEN label = 'Direita' THEN 1 END) AS count_direita,
    COUNT(CASE WHEN predict_label = label AND label = 'Direita' THEN 1 END) as count_direita_true,
    (SUM(CASE WHEN predict_label = label AND label = 'Esquerda' THEN 1 ELSE 0 END) /
    NULLIF(COUNT(CASE WHEN label = 'Esquerda' THEN 1 END), 0)) * 100 AS percent_true_esquerda,
    COUNT(CASE WHEN label = 'Esquerda' THEN 1 END) AS count_esquerda,
    COUNT(CASE WHEN predict_label = label AND label = 'Esquerda' THEN 1 END) as count_esquerda_true
FROM db.analises_speeches_results
WHERE model = "2.0" AND predict_label IN ('Direita', 'Esquerda')
GROUP BY texto_length_group;

In [0]:
# SQL query to get the required data
query = """
SELECT 
    texto_length_group,
    (SUM(CASE WHEN predict_label = label THEN 1 ELSE 0 END) / COUNT(*)) * 100 AS percent_true,
    COUNT(*) AS count_total,
    COUNT(CASE WHEN predict_label = label THEN 1 END) as count_true,
    (SUM(CASE WHEN predict_label = label AND label = 'Direita' THEN 1 ELSE 0 END) /
    NULLIF(COUNT(CASE WHEN label = 'Direita' THEN 1 END), 0)) * 100 AS percent_true_direita,
    COUNT(CASE WHEN label = 'Direita' THEN 1 END) AS count_direita,
    COUNT(CASE WHEN predict_label = label AND label = 'Direita' THEN 1 END) as count_direita_true,
    (SUM(CASE WHEN predict_label = label AND label = 'Esquerda' THEN 1 ELSE 0 END) /
    NULLIF(COUNT(CASE WHEN label = 'Esquerda' THEN 1 END), 0)) * 100 AS percent_true_esquerda,
    COUNT(CASE WHEN label = 'Esquerda' THEN 1 END) AS count_esquerda,
    COUNT(CASE WHEN predict_label = label AND label = 'Esquerda' THEN 1 END) as count_esquerda_true
FROM db.analises_speeches_results
WHERE model = '2.0' AND predict_label IN ('Direita', 'Esquerda')
GROUP BY texto_length_group
"""

# Execute the query and get the results
df_results = spark.sql(query)
display(df_results)

# Create the confusion matrix
confusion_matrix_query = """
SELECT 
    label,
    predict_label,
    COUNT(*) AS count
FROM db.analises_speeches_results
WHERE model = '2.0' AND predict_label IN ('Direita', 'Esquerda')
GROUP BY label, predict_label
"""

# Execute the query and get the confusion matrix
df_confusion_matrix = spark.sql(confusion_matrix_query)
display(df_confusion_matrix)

In [0]:
%sql
SELECT 
    (SUM(CASE WHEN predict_label = label THEN 1 ELSE 0 END) / COUNT(*)) * 100 AS percent_true,
    COUNT(*) AS count_total,
    COUNT(CASE WHEN predict_label = label THEN 1 END) as count_true,
    (SUM(CASE WHEN predict_label = label AND label = 'Direita' THEN 1 ELSE 0 END) /
    NULLIF(COUNT(CASE WHEN label = 'Direita' THEN 1 END), 0)) * 100 AS percent_true_direita,
    COUNT(CASE WHEN label = 'Direita' THEN 1 END) AS count_direita,
    COUNT(CASE WHEN predict_label = label AND label = 'Direita' THEN 1 END) as count_direita_true,
    (SUM(CASE WHEN predict_label = label AND label = 'Esquerda' THEN 1 ELSE 0 END) /
    NULLIF(COUNT(CASE WHEN label = 'Esquerda' THEN 1 END), 0)) * 100 AS percent_true_esquerda,
    COUNT(CASE WHEN label = 'Esquerda' THEN 1 END) AS count_esquerda,
    COUNT(CASE WHEN predict_label = label AND label = 'Esquerda' THEN 1 END) as count_esquerda_true
    
    
    
FROM db.analises_speeches_results
WHERE model = "2.0" and predict_label in ('Direita','Esquerda') ;


In [0]:
# Display results in the requested format
results = spark.sql("SELECT * FROM db.analises_speeches_results where model = '2.0' and predict_label in ('Direita','Esquerda')").toPandas()
display(results)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calcular métricas
accuracy = accuracy_score(results['label'], results['predict_label'])
precision = precision_score(results['label'], results['predict_label'], average='weighted')
recall = recall_score(results['label'], results['predict_label'], average='weighted')
f1 = f1_score(results['label'], results['predict_label'], average='weighted')

# Exibir os resultados
print(f"Acurácia: {accuracy:.4f}")
print(f"Precisão: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

In [0]:
results_direita = spark.sql("SELECT * FROM db.analises_speeches_results where label =  'Direita' and model = '2.0'").toPandas()

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calcular métricas
accuracy = accuracy_score(results_direita['label'], results_direita['predict_label'])
precision = precision_score(results_direita['label'], results_direita['predict_label'], average='weighted')
recall = recall_score(results_direita['label'], results_direita['predict_label'], average='weighted')
f1 = f1_score(results_direita['label'], results_direita['predict_label'], average='weighted')

# Exibir os resultados
print(f"Acurácia: {accuracy:.4f}")
print(f"Precisão: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

In [0]:
results_esquerda = spark.sql("SELECT * FROM db.analises_speeches_results where label =  'Esquerda' and model = '2.0'").toPandas()

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calcular métricas
accuracy = accuracy_score(results_esquerda['label'], results_esquerda['predict_label'])
precision = precision_score(results_esquerda['label'], results_esquerda['predict_label'], average='weighted')
recall = recall_score(results_esquerda['label'], results_esquerda['predict_label'], average='weighted')
f1 = f1_score(results_esquerda['label'], results_esquerda['predict_label'], average='weighted')

# Exibir os resultados
print(f"Acurácia: {accuracy:.4f}")
print(f"Precisão: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

In [0]:
results['texto_length'] = results['texto'].apply(len)

def length_group(length):
    if 50 <= length <= 250:
        return '50-250'
    elif 251 <= length <= 500:
        return '251-500'
    elif 501 <= length <= 750:
        return '501-750'
    elif 751 <= length <= 1000:
        return '751-1000'
    elif 1001 <= length <= 1250:
        return '1001-1250'
    elif 1251 <= length <= 1500:
        return '1251-1500'
    else:
        return 'out_of_range'

results['texto_length_group'] = results['texto_length'].apply(length_group)

display(results)

In [0]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(results['label'], results['predict_label'])
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Direita', 'Esquerda'], yticklabels=['Direita', 'Esquerda'])
plt.xlabel("Predito")
plt.ylabel("Verdadeiro")
plt.title("Matriz de Confusão")
plt.show()

In [0]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Create confusion matrix for each texto_length_group
for group in results['texto_length_group'].unique():
    group_results = results[results['texto_length_group'] == group]
    cm = confusion_matrix(group_results['label'], group_results['predict_label'])
    
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Direita', 'Esquerda'], yticklabels=['Direita', 'Esquerda'])
    plt.xlabel("Predito")
    plt.ylabel("Verdadeiro")
    plt.title(f"Matriz de Confusão - Grupo de Comprimento de Texto: {group}")
    plt.show()

In [0]:
import pandas as pd
import matplotlib.pyplot as plt

# Calculate the percentage of correct predictions for each label and texto_length_group
percentages = results.groupby(['texto_length_group', 'label']).apply(lambda x: (x['predict_label'] == x['label']).mean()).reset_index()
percentages.columns = ['texto_length_group', 'label', 'percent_true']

# Pivot the data for easier plotting
percentages_pivot = percentages.pivot(index='texto_length_group', columns='label', values='percent_true')

# Define the order of the x-axis
order = ['50-250', '251-500', '501-750', '751-1000', '1001-1250', '1251-1500']
percentages_pivot = percentages_pivot.reindex(order)

# Plot the line chart
plt.figure(figsize=(12, 8))
for label in percentages_pivot.columns:
    plt.plot(percentages_pivot.index, percentages_pivot[label], marker='o', label=label)

plt.xlabel('Texto Length Group')
plt.ylabel('Percent of True Predictions')
plt.title('Percent of True Predictions by Texto Length Group and Label')
plt.legend(title='Label')
plt.grid(True)
plt.show()

In [0]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Create a single figure for all confusion matrices
fig, axes = plt.subplots(nrows=1, ncols=len(results['texto_length_group'].unique()), figsize=(20, 7))

# Create confusion matrix for each texto_length_group
for ax, group in zip(axes, results['texto_length_group'].unique()):
    group_results = results[results['texto_length_group'] == group]
    cm = confusion_matrix(group_results['label'], group_results['predict_label'])
    
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Direita', 'Esquerda'], yticklabels=['Direita', 'Esquerda'], ax=ax)
    ax.set_xlabel("Predito")
    ax.set_ylabel("Verdadeiro")
    ax.set_title(f"Grupo: {group}")

plt.tight_layout()
plt.show()