In [15]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline
import warnings
import torch

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')

# Plot styling configuration
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = [10, 6]

print("✅ Libraries imported successfully.")

✅ Libraries imported successfully.


In [16]:
class AdvancedMessageMetrics:
    def __init__(self):
        print("⏳ Loading Sentiment Model (XLM-RoBERTa)...")
        # Using CPU (device=-1). If you have a GPU, change to device=0
        self.sentiment_pipe = pipeline(
            "sentiment-analysis",
            model="cardiffnlp/twitter-xlm-roberta-base-sentiment",
            tokenizer="cardiffnlp/twitter-xlm-roberta-base-sentiment",
            top_k=None,
            truncation=True,
            max_length=512,
            device = 0 if torch.cuda.is_available() else -1
        )

        print("⏳ Loading Toxicity Model (Multilingual Toxic XLM-R)...")
        self.toxic_pipe = pipeline(
            "text-classification",
            model="unitary/multilingual-toxic-xlm-roberta",
            tokenizer="unitary/multilingual-toxic-xlm-roberta",
            top_k=None,
            truncation=True,
            max_length=512,
            device = 0 if torch.cuda.is_available() else -1
        )
        print("✅ Models loaded!")

    def _get_sentiment_score(self, text):
        """Converts model output into a number from -1.0 (Negative) to 1.0 (Positive)."""
        try:
            results = self.sentiment_pipe(text)[0]
            scores = {item['label'].lower(): item['score'] for item in results}

            # Normalize label names (sometimes models return label_0/1/2)
            if 'label_0' in scores:
                scores['negative'] = scores.pop('label_0')
                scores['neutral'] = scores.pop('label_1')
                scores['positive'] = scores.pop('label_2')

            # Formula: Positive minus Negative
            return scores.get('positive', 0.0) - scores.get('negative', 0.0)
        except Exception as e:
            print(f"Error in sentiment: {e}")
            return 0.0

    def _get_toxicity_score(self, text):
        """Returns toxicity probability (0.0 - 1.0)."""
        try:
            results = self.toxic_pipe(text)[0]
            scores = {item['label']: item['score'] for item in results}
            # We take the general 'toxic' score
            return scores.get('toxic', 0.0)
        except Exception as e:
            print(f"Error in toxicity: {e}")
            return 0.0

    def calculate_ucqs(self, comments_content):
        """
        Calculates Unified Comment Quality Score (UCQS).
        Input: List of comment texts or string representation of a list.
        Output: Float 0-100.
        """
        # 1. Validation and parsing of input data
        if isinstance(comments_content, str):
            try:
                comments_content = ast.literal_eval(comments_content)
            except:
                return 50.0 # Return neutral baseline on parse error

        if not isinstance(comments_content, list) or not comments_content:
            return 50.0

        sentiments = []
        toxicity_scores = []

        # 2. Analyze each comment
        for text in comments_content:
            if not isinstance(text, str) or not text.strip():
                continue

            # Truncate to 512 chars for speed
            short_text = text[:512]

            sentiments.append(self._get_sentiment_score(short_text))
            toxicity_scores.append(self._get_toxicity_score(short_text))

        if not sentiments:
            return 50.0

        # 3. Aggregating statistics
        s_avg = np.mean(sentiments)      # Average sentiment (-1..1)
        s_var = np.std(sentiments)       # Sentiment variance (0..1)
        t_avg = np.mean(toxicity_scores) # Average toxicity (0..1)

        # 4. Final UCQS Formula
        # Base 50 + (Sentiment * 40) - (Variance * 20) - (Toxicity * 30)
        ucqs = 50 + (s_avg * 40) - (s_var * 20) - (t_avg * 30)

        return float(np.clip(ucqs, 0, 100))

    def calculate_efficiency(self, score, num_comments, upvote_ratio):
        """Calculates Adjusted Efficiency Score (Virality)."""
        # Handle null values
        score = score if pd.notnull(score) else 0
        comments = num_comments if pd.notnull(num_comments) else 0
        ratio = upvote_ratio if pd.notnull(upvote_ratio) else 0.5

        # Comments are weighted higher (x2) as they require more effort
        raw_engagement = score + (comments * 2.0)

        # Logarithm smooths out outliers (million-view posts)
        log_engagement = np.log1p(raw_engagement)

        return round(log_engagement * ratio, 2)

In [17]:
df = pd.read_csv("/content/reddit.csv")
df

Unnamed: 0,post_id,post_author,post_created_at,subreddit,title,selftext,post_score,upvote_ratio,num_comments,url,domain,comments_content,comments_scores
0,1lnl1el,humanlikecorvus,2025-06-29 20:57:14+03:00,UkrainianConflict,JOINT SUBREDDIT FUNDRAISER FOR UKRAINE X UNITED24,"Hello users of r/UkrainianConflict ,\n\nFor th...",104,0.98,6,https://i.redd.it/ik3j60vfmw9f1.png,i.redd.it,['[removed]'\n 'Hi! United24s initial goal of ...,[1 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
1,1met4oq,Distinct-Quit-3223,2025-08-01 13:42:32+03:00,CombatFootage,FPV's of the 14th Operational Assignment Briga...,,349,0.96,9,https://v.redd.it/om1h95v40egf1,/r/CombatFootage/comments/1met4oq/fpvs_of_the_...,['Please keep the [community guidelines](https...,[ 1 1 1 11 29 9 10 1 1 1 1 1 11 29 9 ...
2,1metez4,Mil_in_ua,2025-08-01 13:59:17+03:00,UkrainianConflict,Germany to Supply Two Patriot Systems to Ukrai...,,90,0.98,1,https://militarnyi.com/en/news/germany-to-supp...,militarnyi.com,"[""Please take the time to read [the rules](/r/...",[1 1]
3,1mevbmj,UNITED24Media,2025-08-01 15:35:50+03:00,UkrainianConflict,"Ukraine Pushes East in Donetsk Region, Forcing...",,145,0.99,3,https://united24media.com/latest-news/ukraine-...,united24media.com,"[""Please take the time to read [the rules](/r/...",[1 2 3 1 2 3]
4,1mevf5c,Pristine_Squirrel_27,2025-08-01 15:40:20+03:00,UkrainianConflict,Germany to deliver two Patriot systems to Ukra...,,206,1.00,2,https://www.reuters.com/world/europe/germany-d...,reuters.com,"[""Please take the time to read [the rules](/r/...",[ 1 10 1 10]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3697,1lfftln,Econ_Orc,2025-06-19 19:55:02+03:00,europe,Denmark to push for Ukraines EU membership dur...,,1221,0.97,61,https://www.reuters.com/sustainability/climate...,reuters.com,,
3698,1lfqv6t,SpecialK_Anon,2025-06-20 03:45:54+03:00,ukraine,I wrote this letter to my representatives in c...,**Find your representatives here:** [**https:/...,96,1.00,1,https://www.reddit.com/r/ukraine/comments/1lfq...,self.ukraine,,
3699,1lgwjya,HellYeahDamnWrite,2025-06-21 16:27:17+03:00,UkrainianConflict,Putin declares ‘all of Ukraine is ours’ in lat...,,253,0.95,41,https://www.independent.co.uk/news/world/europ...,independent.co.uk,,
3700,1lj5jqe,BezugssystemCH1903,2025-06-24 11:20:01+03:00,de,Sichere ukrainische Gebiete - Schweiz schränkt...,,27,0.89,0,https://www.srf.ch/news/schweiz/sichere-ukrain...,srf.ch,,


In [18]:

metrics_engine = AdvancedMessageMetrics()



df['Efficiency'] = df.apply(
    lambda x: metrics_engine.calculate_efficiency(
        x['post_score'], x['num_comments'], x['upvote_ratio']
    ), axis=1
)

# 2. Calculate UCQS (this may take time depending on the number of comments)
df['UCQS'] = df['comments_content'].apply(metrics_engine.calculate_ucqs)


⏳ Loading Sentiment Model (XLM-RoBERTa)...


Device set to use cpu


⏳ Loading Toxicity Model (Multilingual Toxic XLM-R)...


Device set to use cpu


✅ Models loaded!


In [21]:
df.to_csv("/content/reddit_metrics.csv")