In [1]:
import pandas as pd
import requests
import logging
from tenacity import retry, stop_after_attempt, wait_exponential

# Define the retry decorator with exponential backoff
@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=4, max=10))
def analyze_perspective(text, api_key):
    url = "https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze"
    data = {
        'comment': {'text': text},
        'languages': ['en'],
        'requestedAttributes': {
            'TOXICITY': {}, 'SEVERE_TOXICITY': {}, 'INSULT': {}, 'PROFANITY': {}, 
            'THREAT': {}, 'IDENTITY_ATTACK': {}
        }
    }
    try:
        response = requests.post(url, params={'key': api_key}, json=data)
        
        # Check if the request was successful
        response.raise_for_status()
        
        result = response.json()
        scores = {attr: result['attributeScores'][attr]['summaryScore']['value'] for attr in result['attributeScores']}
        return scores
    
    except requests.exceptions.HTTPError as e:
        if response.status_code == 429:  # Rate limit exceeded
            logging.warning(f"Rate limit exceeded. Retrying... Error: {str(e)}")
            raise  # This will trigger the retry
        else:
            logging.error(f"HTTP error: {str(e)}")
            raise
    except Exception as e:
        logging.error(f"Error calling Perspective API: {str(e)}")
        raise

def check_toxicity(data, api_key, phrase = 'final_phrase'):
    new_columns = [
        'toxicity', 'severe_toxicity', 'insult', 'profanity', 
        'threat', 'identity_attack'
    ]
    
    for col in new_columns:
        if col not in data.columns:
            data[col] = None

    for index, row in data.iterrows():
        if index % 500 == 0 and index != 0:
            data.to_parquet('backup.pq')
            logging.info(f"Processed {index} rows. Backup saved.")
        
        text = row[phrase]
        
        try:
            results = analyze_perspective(text, api_key)
            if results:
                data.at[index, 'toxicity'] = results.get('TOXICITY')
                data.at[index, 'severe_toxicity'] = results.get('SEVERE_TOXICITY')
                data.at[index, 'insult'] = results.get('INSULT')
                data.at[index, 'profanity'] = results.get('PROFANITY')
                data.at[index, 'threat'] = results.get('THREAT')
                data.at[index, 'identity_attack'] = results.get('IDENTITY_ATTACK')
                
        except Exception as e:
            logging.error(f"Error processing row {index}: {str(e)}")
            continue

    data.to_parquet('final_backup.pq')
    logging.info("Processing completed. Final backup saved.")

In [4]:
# Example usage
api_key = "xxx"  # Replace with your actual Perspective API key

data = pd.DataFrame({'final_phrase': ["Your text to analyze here", "Another example text", "I hate you"]})
check_toxicity(data, api_key)

In [5]:
print(data)

                final_phrase  toxicity severe_toxicity    insult profanity  \
0  Your text to analyze here  0.016462        0.001044  0.008976  0.013644   
1       Another example text  0.009676        0.000782  0.007475  0.012175   
2                 I hate you  0.682712         0.03949  0.437884  0.335177   

     threat identity_attack  
0  0.007405        0.003589  
1  0.006745        0.002442  
2  0.043956        0.288834  
