# Load, Sample and Add english translation of messaged

In [7]:
import pandas as pd
import numpy as np
import json
from deep_translator import GoogleTranslator

SAMPLE_SIZE = 100

def translate(x):
    try:
        tranlsation = GoogleTranslator(source='auto', target='en').translate(x)
    except:
        tranlsation = ""
    return tranlsation

def total_interactions(x):
    reactions_total = 0
    for _, value in json.loads(x.replace("'", '"')).items():
        reactions_total += value
    return reactions_total

def weighted_popularity_score(row):
    return np.mean([row.views, row.forwards, row.reaction_nb])

def min_max_normalization(col, df):
    return (df[col]-df[col].min())/(df[col].max()-df[col].min())


df = pd.read_csv('../../data/telegram/telegram.csv')
df['reaction_nb'] = df.reactions.apply(lambda x: total_interactions(x))

df["views_norm"] = min_max_normalization("views", df)
df["forwards_norm"] = min_max_normalization("forwards", df)
df["reaction_nb_norm"] = min_max_normalization("reaction_nb", df)

df['popularity_score'] = df.apply(lambda x: weighted_popularity_score(x), axis=1)
df = df.sort_values('popularity_score', ascending=False)

sampled = df.iloc[:SAMPLE_SIZE]
sampled['englishMessageText'] = sampled.messageText.apply(lambda x: translate(x))
sampled.to_csv('../../data/telegram/sample_with_translations.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sampled['englishMessageText'] = sampled.messageText.apply(lambda x: translate(x))


# Request propaganda detection for sample messages

In [8]:
import asyncio
import websockets
import json
import time

async def connect_to_websocket(dataframe, client_id, message):
    async with websockets.connect("ws://13.48.71.178:8000/ws/analyze_propaganda", ping_interval=None) as websocket:
        # Define the request data
        request_data = {
            "model_name": "gpt-4o",  # Example model
            "text": message,
            "contextualize": "True"
        }

        # Send the request data
        await websocket.send(json.dumps(request_data))
        print(f"Client {client_id}: Request sent.")

        response = None

        # Keep receiving responses indefinitely
        while True:
            try:
                response = await websocket.recv()
                print(f"Client {client_id}: Streamed response received:\n{response}")
                dataframe.at[client_id, "response"] = response
            except websockets.ConnectionClosedOK:
                print(f"Client {client_id}: Connection closed OK.")
                break
            except websockets.ConnectionClosedError:
                print(f"Client {client_id}: Connection closed.")
                break
            except Exception as e:
                print(f"Client {client_id}: An error occurred: {e}")
                break

async def simulate_multiple_clients(dataframe):
    tasks = []
    counter = 0
    for i, row in dataframe.iterrows():  # Simulate 5 clients
        tasks.append(connect_to_websocket(dataframe, i, row.englishMessageText))
        counter += 1
        if counter % 10 == 0:
            await asyncio.gather(*tasks)
            time.sleep(30)
            tasks = []

    

await simulate_multiple_clients(sampled)
sampled.to_csv('../../data/telegram/best_100_detection.csv', index=False)

Client 376484: Request sent.
Client 16242: Request sent.
Client 31552: Request sent.
Client 414570: Request sent.
Client 409734: Request sent.
Client 27778: Request sent.
Client 415855: Request sent.
Client 420006: Request sent.
Client 410078: Request sent.
Client 16582: Request sent.
Client 376484: Streamed response received:
{"user_id": "186646b2-f153-4eee-b9f5-5c530010810e", "type": "propaganda_detection", "status": "success", "data": {}}
Client 376484: Streamed response received:
{"user_id": "186646b2-f153-4eee-b9f5-5c530010810e", "type": "contextualization", "status": "success", "data": {}}
Client 376484: Connection closed OK.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe.at[client_id, "response"] = response


Client 27778: Streamed response received:
{"user_id": "47f23402-b41d-425a-a712-625c9a8164b9", "type": "propaganda_detection", "status": "success", "data": {"Flag-Waving": [{"explanation": "The phrase 'Appeal to the citizens of Russia' suggests a call to action based on national identity, which is a common technique to evoke strong national feelings.", "location": "Appeal to the citizens of Russia"}]}}
Client 16242: Streamed response received:
{"user_id": "a61a3edd-866a-41fd-873a-7e9db731a848", "type": "propaganda_detection", "status": "success", "data": {"Loaded_Language": [{"explanation": "The use of words like 'bastards' and 'degenerates' carries a strong emotional impact and is intended to provoke a negative emotional response from the audience.", "location": "I answer \u2013 I hate them. They are bastards and degenerates."}], "Name_Calling, Labeling": [{"explanation": "The terms 'bastards' and 'degenerates' are labels given to the opponents, which are intended to dehumanize and vil

In [11]:
def unfold_results(x):
    obj = json.loads(x['response'])
    for key, value in obj['data'].items():
        x[key] = 1

    return x

new = sampled.apply(lambda x: unfold_results(x), axis=1)
new.fillna(0, inplace=True)
new['is_propaganda'] = new[['Appeal_to_Authority', 'Appeal_to_fear-prejudice',
       'Bandwagon, Reductio_ad_hitlerum', 'Black-and-White_Fallacy',
       'Causal_Oversimplification', 'Doubt', 'Exaggeration, Minimization',
       'Flag-Waving', 'Loaded_Language', 'Name_Calling, Labeling',
       'Repetition', 'Slogans', 'Whataboutism, Straw_Men, Red_Herring']].sum(axis=1)
new['is_propaganda'] = new['is_propaganda']>0
print(new['is_propaganda'].value_counts())
new.to_csv('../../data/telegram/unfolded.csv')

is_propaganda
True     87
False    13
Name: count, dtype: int64