# Calculate Embeddings without hashtags

Ajuda a calcular embeddings (sem posts com hashtags) de maneira fácil e prática

In [25]:
import pandas as pd
import re
from sentence_transformers import SentenceTransformer

In [26]:
# 1. Pegar o modelo para testar
type_model = 'sentence-transformers/all-MiniLM-L6-v2'
model = SentenceTransformer(type_model)

# 2. Pegar as sentenças (nesse caso, no Post-filtrado)
file_path = 'Post-filtrado.xlsx'
file_path_features = "Embeddings_Feature_without_Hashtags_allMini.xlsx"

column_text = "Texto"
column_id = "ID"
column_author = "Autor"
column_likes = "Curtidas"

In [27]:
# 3. Ler os Arquivos e Remover NaN
rf = pd.read_excel(file_path)
rf = rf.dropna(subset=[column_text])


In [28]:
# 4. Função para verificar se o texto contém apenas hashtags
def contains_only_hashtags(text):
    # Verificar se o texto contém apenas hashtags (e emojis, números, etc.)
    hashtags = re.findall(r'#\S+', text)  # Captura as hashtags
    # Se o texto não tiver nada além de hashtags (ou estiver vazio)
    return len(hashtags) == len(text.split())

# Filtrar
filtered_rf = rf[~rf[column_text].apply(contains_only_hashtags)]

ids = filtered_rf[column_id].tolist()
authors = filtered_rf[column_author].tolist()
sentences = filtered_rf[column_text].tolist()
likes = filtered_rf[column_likes].tolist()

In [29]:
# 6. Calcular os embeddings das sentenças
embeddings = model.encode(sentences)
df_embeddings = pd.DataFrame(embeddings) 
df_embeddings.columns = [f'x{i+1}' for i in range(df_embeddings.shape[1])]

df_final = pd.DataFrame({
    column_id: ids,
    "Candidato": authors,
    column_likes: likes
})

df_final = pd.concat([df_final, df_embeddings], axis=1)



In [30]:
# 5. Salvar no Excel
df_final.to_excel(file_path_features, index=False)

In [31]:
df_final.head(5)

Unnamed: 0,ID,Candidato,Curtidas,x1,x2,x3,x4,x5,x6,x7,...,x375,x376,x377,x378,x379,x380,x381,x382,x383,x384
0,7115033431473474822,Lula,11700,-0.009553,0.061238,0.019681,-0.049951,-0.063027,-0.004563,0.065209,...,0.014746,-0.040006,0.092884,0.073248,0.002842,-0.00905,0.043108,0.078581,0.029602,-0.012377
1,7115174031162215686,Lula,33600,0.01339,0.045046,-0.025437,0.024324,-0.013387,0.017399,0.036831,...,-0.019052,-0.002531,0.070738,-0.007486,-0.018966,-0.006195,0.07821,0.030236,-0.024722,0.033967
2,7115357413712153861,Lula,34600,-0.029225,0.039265,0.029184,-0.049127,-0.011882,0.010364,0.069706,...,-0.010907,-0.075849,0.112,0.053613,0.059229,-0.013182,0.07248,0.07246,0.005212,-0.002076
3,7115560675824422149,Lula,47500,-0.030557,0.024923,-0.03213,0.041768,-0.030083,0.014276,0.03692,...,-0.015045,-0.050335,0.0402,0.057963,-0.002836,0.001662,0.035748,0.058508,-0.031289,-0.032341
4,7115738690105756933,Jair Bolsonaro,8068,-0.02016,0.058555,-0.038323,0.017131,-0.025525,-0.005567,0.100013,...,-0.027758,0.011872,0.126488,0.015329,-0.012235,0.018455,0.076294,0.005112,0.087423,-0.074225
