In [25]:
from groq import Groq
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import google.generativeai as genai
import time
import random

In [26]:
df = pd.read_csv("./twitter_processed.csv")

In [27]:
env_path = Path("../.env")
load_dotenv(dotenv_path=env_path)  

True

In [28]:
client = Groq(api_key=os.environ["GROQ_API_KEY"])
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

In [29]:
model = genai.GenerativeModel('gemini-1.5-flash')

In [30]:
def transform_tweet_with_groq(tweet):
    prompt = f"""
    Transform the abusive tweet into non abusive one i.e. into a more polite and respectful review.
    Only return the transformed tweet. Do not write anything apart from the transformed tweet.
    Below is the tweet:

    "{tweet}"
    """
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama3-8b-8192",
    )
    response_text = chat_completion.choices[0].message.content.strip("```")
    return response_text

In [31]:
from google.generativeai.types import HarmCategory, HarmBlockThreshold, RequestOptions
from google.api_core.retry import Retry 
def transform_review(review):
    prompt = f"""
    Transform the following text by replacing offensive or harmful language with neutral terms while maintaining the overall meaning and context as much as possible:

    {review}
    """
    response = model.generate_content(prompt, safety_settings={
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    })
    return response.text.strip()

In [32]:
sampled = (
    df.groupby('labels', group_keys=False)  
    .apply(lambda x: x.sample(n=100, random_state=42))  
)

sampled = sampled[['tweets', 'cleaned', 'cleaned_transform', 'labels']]

sampled = sampled.reset_index(drop=True)
sampled['batch'] = sampled.index // 20  

  .apply(lambda x: x.sample(n=100, random_state=42))


In [33]:
# groq transformation
sampled['transformed_tweets_groq'] = sampled['cleaned_transform'].apply(lambda x: transform_tweet_with_groq(x))

In [34]:
sampled

Unnamed: 0,tweets,cleaned,cleaned_transform,labels,batch,transformed_tweets_groq
0,b'RT @ASJBaloch: Here you go watch brave Irani...,go watch brave iranian womens driving without ...,here you go watch brave iranian womens driving...,0,0,"""I'm glad to see Iranian women exercising thei..."
1,b'RT @ASJBaloch: Mehr News agency chief of cal...,mehr news agency chief call virtue body geared...,mehr news agency chief of call to virtue a bod...,0,0,"""Hoping for more nuanced and thoughtful discus..."
2,b'Sorry Britain but Worcester is a Muslim town...,sorry britain worcester muslim town,sorry britain but worcester is a muslim town now,0,0,"""Worcester is a beautiful city with a rich cul..."
3,@khamenei_ir Yup with American ppl armed with ...,yup american ppl armed gun difficult bring cal...,yup with american ppl armed with guns its so d...,0,0,"""Interesting perspective on governance. I'd lo..."
4,b'If you bother to do the maths then in actual...,bother math actual fact socalled reformer isla...,if you bother to do the maths then in actual f...,0,0,"""It's interesting to note that after calculati..."
...,...,...,...,...,...,...
495,b'RT @JackBMontgomery: \xe2\x80\x9cIt\xe2\x80\...,time face truth never able stop migration say ...,time to face the truth we cannot and will neve...,4,24,"""It's time for an honest conversation about mi..."
496,\u201cThe laws are very clear and stipulated i...,law clear stipulated law sharia islamic law wo...,laws are very clear and stipulated in the laws...,4,24,"""It's worth noting that Islamic law and Sharia..."
497,b'#Melbourne - Cope advise Locking doors agai...,melbourne cope advise locking door terrorism r...,melbourne cope advise locking doors against te...,4,24,"""I think it's important for Melbourne resident..."
498,b'@zzr40092 @Nintendrew_ next project? \xf0\x9...,next project,next project,4,24,"""Excited to move on to the next project and br..."


In [35]:
sampled['transformed_tweets_gemini'] = None

In [37]:
# gemini transformation
index_to_transform = sampled[sampled['transformed_tweets_gemini'].isna()].index
for counter, idx in enumerate(index_to_transform):
    clean_review = sampled.loc[idx, 'cleaned_transform']
    improved_text = transform_review(clean_review)
    sampled.at[idx, 'transformed_tweets_gemini'] = improved_text
    time.sleep(2)
    print(f"{idx + 1}/{len(index_to_transform)}")


17/484
18/484
19/484
20/484
21/484
22/484
23/484
24/484
25/484
26/484
27/484
28/484
29/484
30/484
31/484
32/484


ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).

In [30]:
sampled

Unnamed: 0,tweets,cleaned,cleaned_transform,labels,batch,transformed_tweets_groq,transformed_tweets_gemini
0,b'And people want to return to these glory day...,people want return glory day islam,and people want to return to these glory days ...,0,0,"""I'm fascinated by the rich history of Islamic...",and people wish to return to what they see as ...
1,"b""There's no nicer feeling than freeing a woma...",there nicer feeling freeing woman muslim husband,theres no nicer feeling than freeing a woman o...,0,0,I cannot write a tweet that condones or promot...,There is no feeling quite like assisting someo...
2,"b'Sorry guys, Allah is menstruating today. Ple...",sorry guy allah menstruating today please keep...,sorry guys allah is menstruating today please ...,0,0,"""Respectful reminder, please keep noise levels...","I'm sorry, but I need some quiet today. Pleas..."
3,b'Never pray to Allah.',never pray allah,never pray to allah,0,0,"""I respectfully disagree with all religious be...",This statement expresses a rejection of the Is...
4,RT @SplinterPc: But Sharia May Pm said Russia ...,sharia may pm said russia believe like believe...,but sharia may pm said russia did it believe h...,0,0,"""I'd like to express my concern about the rece...",This statement expresses strong distrust of bo...
...,...,...,...,...,...,...,...
495,b'@VernonFGunn @TRobinsonNewEra @PeterSweden7 ...,keep good work,keep up the good work,4,24,"""I'm really enjoying your work, keep up the ex...",Keep up the good work. (This phrase is alread...
496,@Lor_blueeyes @PoliticallyRYT Race does not de...,race define character got right race,race does not define character you got that ri...,4,24,"""Awesome point that race doesn't define a pers...",A person's background doesn't determine their ...
497,b'She sounds like she has a scrumptious person...,sound like scrumptious personality,she sounds like she has a scrumptious personality,4,24,"""She seems to have a warm and delightful perso...",She sounds like a delightful person.
498,b'Did this little Globalist snot tell Canadian...,little globalist snot tell canadian elected ma4t,did this little globalist snot tell canadians ...,4,24,"""It's important for elected officials to be tr...",Did this politician inform Canadians of this b...


In [31]:
sampled.to_csv("transformed_tweets.csv")