In [1]:
from groq import Groq
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import google.generativeai as genai
import time
import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv("./twitter_processed.csv")

In [3]:
env_path = Path("../.env")
load_dotenv(dotenv_path=env_path)  

True

In [4]:
client = Groq(api_key=os.environ["GROQ_API_KEY"])
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

In [5]:
model = genai.GenerativeModel('gemini-1.5-flash')

In [6]:
def transform_tweet_with_groq(tweet):
    prompt = f"""
    Transform the abusive tweet into non abusive one i.e. into a more polite and respectful review.
    Only return the transformed tweet. Do not write anything apart from the transformed tweet.
    Below is the tweet:

    "{tweet}"
    """
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama3-8b-8192",
    )
    response_text = chat_completion.choices[0].message.content.strip("```")
    return response_text

In [13]:
from google.api_core.retry import Retry 

retry_policy = Retry(
    initial=1.0,  
    maximum=10.0, 
    multiplier=2.0, 
    deadline=30.0,  
)

In [14]:
from google.generativeai.types import HarmCategory, HarmBlockThreshold, RequestOptions
def transform_review(review):
    prompt = f"""
    Transform the following text by replacing offensive or harmful language with neutral terms while maintaining the overall meaning and context as much as possible:

    {review}
    """
    try:
        response = retry_policy(model.generate_content)(
            prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            },
        )
        return response.text.strip()
    except Exception as e:
        print(f"Error occurred: {e}")
        return None  

In [8]:
sampled = (
    df.groupby('labels', group_keys=False)  
    .apply(lambda x: x.sample(n=100, random_state=42))  
)

sampled = sampled[['tweets', 'cleaned', 'cleaned_transform', 'labels']]

sampled = sampled.reset_index(drop=True)
sampled['batch'] = sampled.index // 20  

  .apply(lambda x: x.sample(n=100, random_state=42))


In [9]:
# groq transformation
sampled['transformed_tweets_groq'] = sampled['cleaned_transform'].apply(lambda x: transform_tweet_with_groq(x))

In [10]:
sampled

Unnamed: 0,tweets,cleaned,cleaned_transform,labels,batch,transformed_tweets_groq
0,b'RT @ASJBaloch: Here you go watch brave Irani...,go watch brave iranian womens driving without ...,here you go watch brave iranian womens driving...,0,0,"""I'm impressed by the bravery of Iranian women..."
1,b'RT @ASJBaloch: Mehr News agency chief of cal...,mehr news agency chief call virtue body geared...,mehr news agency chief of call to virtue a bod...,0,0,"""It would be helpful if the news agency's Chie..."
2,b'Sorry Britain but Worcester is a Muslim town...,sorry britain worcester muslim town,sorry britain but worcester is a muslim town now,0,0,"""I'm excited to see the diversity thriving in ..."
3,@khamenei_ir Yup with American ppl armed with ...,yup american ppl armed gun difficult bring cal...,yup with american ppl armed with guns its so d...,0,0,"""I'm concerned about the complexities of imple..."
4,b'If you bother to do the maths then in actual...,bother math actual fact socalled reformer isla...,if you bother to do the maths then in actual f...,0,0,"""I'd love to see a breakdown of the math and h..."
...,...,...,...,...,...,...
495,b'RT @JackBMontgomery: \xe2\x80\x9cIt\xe2\x80\...,time face truth never able stop migration say ...,time to face the truth we cannot and will neve...,4,24,"""It's essential to acknowledge that effective ..."
496,\u201cThe laws are very clear and stipulated i...,law clear stipulated law sharia islamic law wo...,laws are very clear and stipulated in the laws...,4,24,"""I appreciate the culturally significant guide..."
497,b'#Melbourne - Cope advise Locking doors agai...,melbourne cope advise locking door terrorism r...,melbourne cope advise locking doors against te...,4,24,"""Melbourne's recent security measures are a co..."
498,b'@zzr40092 @Nintendrew_ next project? \xf0\x9...,next project,next project,4,24,"""I'm excited to move on to the next project! L..."


In [11]:
sampled['transformed_tweets_gemini'] = None

In [15]:
# gemini transformation
index_to_transform = sampled[sampled['transformed_tweets_gemini'].isna()].index
for counter, idx in enumerate(index_to_transform):
    clean_review = sampled.loc[idx, 'cleaned_transform']
    improved_text = transform_review(clean_review)
    sampled.at[idx, 'transformed_tweets_gemini'] = improved_text
    time.sleep(2)
    print(f"{idx + 1}/{len(index_to_transform)}")


17/484
18/484
19/484
20/484
21/484
22/484
23/484
24/484
25/484
26/484
27/484
28/484
29/484
30/484
31/484
32/484
33/484
34/484
35/484
36/484
37/484
38/484
39/484
40/484
41/484
42/484
43/484
44/484
45/484
46/484
47/484
48/484
49/484
50/484
51/484
52/484
53/484
54/484
55/484
56/484
57/484
58/484
59/484
60/484
61/484
62/484
63/484
64/484
65/484
66/484
67/484
68/484
69/484
70/484
71/484
72/484
73/484
74/484
75/484
76/484
77/484
78/484
79/484
80/484
81/484
82/484
83/484
84/484
85/484
86/484
87/484
88/484
89/484
90/484
91/484
92/484
93/484
94/484
95/484
96/484
97/484
98/484
99/484
100/484
101/484
102/484
103/484
104/484
105/484
106/484
107/484
108/484
109/484
110/484
111/484
112/484
113/484
114/484
115/484
116/484
117/484
118/484
119/484
120/484
121/484
122/484
Error occurred: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
This appears to be caused by a blocked prompt, see `response.prompt_feedback`: block_reason: PR

In [16]:
sampled

Unnamed: 0,tweets,cleaned,cleaned_transform,labels,batch,transformed_tweets_groq,transformed_tweets_gemini
0,b'RT @ASJBaloch: Here you go watch brave Irani...,go watch brave iranian womens driving without ...,here you go watch brave iranian womens driving...,0,0,"""I'm impressed by the bravery of Iranian women...",Here's a video showing Iranian women driving w...
1,b'RT @ASJBaloch: Mehr News agency chief of cal...,mehr news agency chief call virtue body geared...,mehr news agency chief of call to virtue a bod...,0,0,"""It would be helpful if the news agency's Chie...",A news agency head called for a group promotin...
2,b'Sorry Britain but Worcester is a Muslim town...,sorry britain worcester muslim town,sorry britain but worcester is a muslim town now,0,0,"""I'm excited to see the diversity thriving in ...",Worcester's population has seen a significant ...
3,@khamenei_ir Yup with American ppl armed with ...,yup american ppl armed gun difficult bring cal...,yup with american ppl armed with guns its so d...,0,0,"""I'm concerned about the complexities of imple...",Establishing a theocratic state in America fac...
4,b'If you bother to do the maths then in actual...,bother math actual fact socalled reformer isla...,if you bother to do the maths then in actual f...,0,0,"""I'd love to see a breakdown of the math and h...","If you calculate the figures, you'll find that..."
...,...,...,...,...,...,...,...
495,b'RT @JackBMontgomery: \xe2\x80\x9cIt\xe2\x80\...,time face truth never able stop migration say ...,time to face the truth we cannot and will neve...,4,24,"""It's essential to acknowledge that effective ...",The EU migration commissioner stated that halt...
496,\u201cThe laws are very clear and stipulated i...,law clear stipulated law sharia islamic law wo...,laws are very clear and stipulated in the laws...,4,24,"""I appreciate the culturally significant guide...",Islamic law contains clear stipulations regard...
497,b'#Melbourne - Cope advise Locking doors agai...,melbourne cope advise locking door terrorism r...,melbourne cope advise locking doors against te...,4,24,"""Melbourne's recent security measures are a co...",Melbourne authorities advise residents to lock...
498,b'@zzr40092 @Nintendrew_ next project? \xf0\x9...,next project,next project,4,24,"""I'm excited to move on to the next project! L...","The provided text ""next project"" contains no o..."


In [17]:
sampled.to_csv("transformed_tweets.csv")