GENERAL ANALYSIS

In [None]:
pip install pysentimiento


In [None]:
import pandas as pd
import os
import csv
from pysentimiento import create_analyzer
from google.colab import drive
from collections import defaultdict

In [None]:
drive.mount('/content/drive')

In [None]:
sentiment_analyzer = create_analyzer(task="sentiment", lang="es")
hate_analyzer = create_analyzer(task="hate_speech", lang="es")

In [None]:
def analyze_sentiment_text(text_series, analyzer):
    results = text_series.map(lambda x: analyzer.predict(x))
    labels, scores = zip(*[(r.output, r.probas) for r in results])
    return labels, scores

# Mean probability of each hate label
def analyze_hate_probabilities(text_series, analyzer):
    prob_sums = defaultdict(float)
    count = 0

    for text in text_series:
        result = analyzer.predict(text)
        for label, prob in result.probas.items():
            prob_sums[label] += prob
        count += 1

    if count == 0:
        return None

    avg_probs = {label: prob_sum / count for label, prob_sum in prob_sums.items()}
    return avg_probs


# Function to analyse sentiment score
def process_folder_sentiment(party_path):
    sentiment_scores = []
    for file_name in os.listdir(party_path):
        if not file_name.endswith(".csv"):
            continue
        file_path = os.path.join(party_path, file_name)
        try:
            df = pd.read_csv(file_path, delimiter='\t')
        except pd.errors.EmptyDataError:
            continue

        if 'text' not in df.columns or 'id' not in df.columns:
            continue

        df = df[df['text'].notna() & (df['text'] != '')]
        if df.empty:
            continue


        labels, scores = analyze_sentiment_text(df['text'], sentiment_analyzer)
        df['sentiment_label'] = labels
        sentiment_mapping = {'POS': 1, 'NEU': 0, 'NEG': -1}
        df['sentiment_score'] = df['sentiment_label'].map(sentiment_mapping)

        sentiment_scores.extend(df['sentiment_score'].tolist())

    if len(sentiment_scores) == 0:
        print("No sentiment scores computed for this party.")
        return None

    avg_score = sum(sentiment_scores) / len(sentiment_scores)
    print(f"Average sentiment score for party at {party_path}: {avg_score:.3f}")
    return avg_score


# Function to analyse hate speech
def process_party_hate_avg(party_path, analyzer):
    all_texts = []

    for filename in os.listdir(party_path): # Iterates over each csv file
        if not filename.endswith(".csv"):
            continue
        filepath = os.path.join(party_path, filename)
        try:
            df = pd.read_csv(filepath, delimiter="\t", on_bad_lines='skip')
        except pd.errors.EmptyDataError:
            continue

        if 'text' not in df.columns:
            continue

        texts = df['text'].dropna().astype(str)
        texts = texts[texts != '']

        all_texts.extend(texts.tolist())

    if not all_texts:
        return None

    return analyze_hate_probabilities(all_texts, analyzer)



In [None]:
party_path = 'PARTY_FOLDER'

In [None]:
process_folder_sentiment(party_path)

In [None]:
process_party_hate_avg(party_path, hate_analyzer)

CASE STUDY ANALYSIS

In [None]:
# Join all comments
all_comments = []

for file in os.listdir(party_path):
    if file.startswith("comments_") and file.endswith(".csv"):
        file_path = os.path.join(party_path, file)
        df = pd.read_csv(file_path, delimiter='\t', encoding='utf-8')
        df['file'] = file

        all_comments.append(df)

full_comments = pd.concat(all_comments, ignore_index=True)
full_comments.head()

In [None]:
full_comments_sorted = full_comments.sort_values(['like_count'], ascending = False)

In [None]:
full_comments_sorted = full_comments_sorted[full_comments_sorted['text'].notna()]

# Sentiment analyser
sentiment_labels, _ = analyze_sentiment_text(full_comments_sorted['text'], sentiment_analyzer)
full_comments_sorted['sentiment'] = sentiment_labels
sentiment_mapping = {'POS': 1, 'NEU': 0, 'NEG': -1}
full_comments_sorted['sentiment_score'] = full_comments_sorted['sentiment'].map(sentiment_mapping)

In [None]:
# Hate analyser
full_comments_sorted['hate_label'] = full_comments_sorted['text'].map(lambda x: hate_analyzer.predict(x).probas)

In [None]:
# Top 30 comments
full_comments_sorted[['text', 'video_id', 'sentiment']].head(30)

FIND VIDEO WITH MOST LIKED COMMENT

In [None]:
def find_video_file(video_id, voice_path, party):

    party_folder = os.path.join(voice_path, party)

    for csv_file in os.listdir(party_folder):
        if csv_file.endswith('.csv'):
            file_path = os.path.join(party_folder, csv_file)
            try:
                df = pd.read_csv(file_path, delimiter="\t")
                if 'id' in df.columns and video_id in df['id'].astype(str).values:
                    return csv_file  # Returns csv file, and now it is easy to find the video to get the content
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
    return None

In [None]:
voice_path = "PARTY_PATH"
video_id = 'VIDEO_ID'
party = 'PARTY NAME FILE'
find_video_file(video_id, voice_path, party)

In [None]:
# Analyse comment text
text = 'es que yo creo que Alberto Núñez fejo sigue cometiendo el mismo error qué error el error es eh primero no comprender el Marco constitucional ni la democracia primero Alberto Núñez fejo sigue um sin comprender que hay 1 artículo 99 en la Constitución española que dice que el modelo constitucional es 1 1 modelo parlamentario en el que 1 tiene que buscar apoyos 2 tiene disfunciones con la democracia habla de partido de estado el partido que está bloqueando 1 de las instituciones fundamentales del estado se llama consejo general del poder judicial 1 partido que instrumentaliza todas y cada 1 de las instituciones que toca si se aplica el término de estado entiendo que lo dice de manera institucional que cumpla con el mandato y renueve los órganos constitucionales pendientes sería bueno que lo haga así y que sea 1 demócrata y 1 demócrata sabe que vamos a IR a 1 investidura en la que él no tiene aliados y no tiene votos es 1 investidura absolutamente fracasada ya sé que el corsé de la democracia y de la institucionalidad a Alberto 1 espejo le queda grande para muestra 1 botón el consejo general del poder judicial que no es pequeña cosa'
sentiment_result = sentiment_analyzer.predict(text)
print(sentiment_result)


LDA