In [1]:
import pandas as pd

df = pd.read_csv('Data/trump_tweets_reasons.csv')

In [2]:
df.head(20)

Unnamed: 0,text_id,code_id,code_name,text,original_code,replicated_code,model_code,reason
0,1,1,HSTG,A great guy (with great ratings)! https://t.co...,No,No,No,"There are no hashtags used in this tweet, only..."
1,1,2,ATSN,A great guy (with great ratings)! https://t.co...,No,No,No,There are no at signs (@) present in this twee...
2,1,3,CRIT,A great guy (with great ratings)! https://t.co...,No,No,No,The tweet does not criticize another person or...
3,1,4,MEDI,A great guy (with great ratings)! https://t.co...,No,No,No,This tweet does not contain any derogatory or ...
4,1,5,FAMY,A great guy (with great ratings)! https://t.co...,No,No,No,The tweet does not reference any members of Do...
5,1,6,PLCE,A great guy (with great ratings)! https://t.co...,No,No,No,The tweet does not reference the police or ind...
6,1,7,MAGA,A great guy (with great ratings)! https://t.co...,No,No,No,The tweet does not reference Donald Trump's 20...
7,1,8,CAPT,A great guy (with great ratings)! https://t.co...,No,No,No,The tweet does not contain any words in all ca...
8,1,9,INDV,A great guy (with great ratings)! https://t.co...,Yes,Yes,Yes,"The tweet references an individual person, eve..."
9,1,10,MARG,A great guy (with great ratings)! https://t.co...,No,No,No,The tweet does not reference any marginalized ...


In [26]:
import pandas as pd
import numpy as np
import requests
import os
import re
from tqdm import tqdm
import time
from dotenv import load_dotenv
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from queue import Queue
from threading import Lock

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Load environment variables
load_dotenv()

# Together API endpoint and key
endpoint = 'https://api.together.xyz/inference'
TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')

def prepare_prompt(text, code):
    """Prepare prompt for a specific code and text using Llama 3.3 instruct format."""
    codebook = {
        'HSTG': "Are hashtags used in this tweet? Exclude hyperlinks from this decision.",
        'ATSN': "Are at signs (\"@\") used in this tweet? Include \"@\" that are present in retweets. Exclude hyperlinks from this decision.",
        'CRIT': "Does Donald Trump criticize another person or idea in this tweet? If the author suggests at any point in the tweet that some person or entity did something wrong, code 'Yes'.",
        'MEDI': "Does Donald Trump make derogatory or condescending statements about the news media in this tweet?",
        'FAMY': "Does Donald Trump reference members of his immediate family in this tweet? This includes mentions of his wife, children, grandchildren, parents, or siblings.",
        'PLCE': "Does this tweet reference the police? This includes any mention of law enforcement, police departments, officers, or related terms.",
        'MAGA': "Does this tweet reference Donald Trump's 2016 campaign slogan 'Make America Great Again' or its abbreviation 'MAGA'?",
        'CAPT': "Are there words that contain only capital letters in this tweet? Words must be at least 2 letters long and exclude proper nouns.",
        'INDV': "Does this tweet reference an individual person? Exclude self-references to Donald Trump. Include references to specific people by name, title, or clear contextual reference.",
        'MARG': "Does the tweet explicitly reference a marginalized group or groups? This includes references to racial, ethnic, religious, gender, sexual orientation, disability, or other historically marginalized communities.",
        'INTN': "Does this tweet reference international topics outside of USA? This includes foreign countries, international relations, global events, or foreign leaders.",
        'PRTY': "Does this tweet reference US political parties? This includes mentions of Democrats, Republicans, or other political parties, as well as clear references to party affiliations.",
        'IMMG': "Does this tweet reference immigration in the US? This includes any mention of immigration policy, border control, refugees, asylum seekers, or related topics."
    }
    
    return f"""[INST] Please analyze this tweet for code {code}:

TWEET TEXT:
{text}

DEFINITION:
{codebook[code]}

Respond with these exact sections:

Key Elements:
[List relevant elements from the tweet]

Analysis:
[Explain how elements relate to definition]

Decision:
Only write "Yes" or "No" - nothing else

Reason:
Write exactly one clear sentence explaining the decision, ending with a period.

Important: Keep all responses simple and direct. No step-by-step format, no extra formatting.
[/INST]"""

def clean_response_text(text):
    """Clean the response text by removing unwanted artifacts and formatting while preserving section headers."""
    # Remove common LLM artifacts
    artifacts = [
        r'\[/?INST\]',          # [INST] tags
        r'\$\\boxed{.*?}\$',    # LaTeX boxed answers
        r'Step \d+:.*?(?=\n|$)', # Step-by-step instructions
        r'The final answer is:.*?(?=\n|$)',
        r'<rewritten_response>.*?(?=\n|$)',
        r'becomes.*?(?=\n|$)'
    ]
    
    cleaned = text
    for pattern in artifacts:
        cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.MULTILINE)
    
    # Split into lines and clean each line
    cleaned_lines = []
    for line in cleaned.split('\n'):
        line = line.strip()
        if not line:
            continue
        # Remove headers and formatting
        line = line.replace('##', '').replace('#', '')
        line = line.strip('*_ ')
        if line:
            cleaned_lines.append(line)
            
    return '\n'.join(cleaned_lines)

def extract_decision_and_reason(response_text):
    """Extract decision and reason from the structured response with improved reliability and robust error handling."""
    logging.debug(f"Processing response text: {response_text}")
    try:
        if not response_text:
            return "Error", "Empty response received"
            
        # Split into sections while preserving multi-line content
        sections = {}
        current_section = None
        current_content = []
        
        for line in response_text.split('\n'):
            line = line.strip()
            if not line:
                continue
                
            if line.startswith(('Key Elements:', 'Analysis:', 'Decision:', 'Reason:')):
                if current_section and current_content:
                    sections[current_section] = ' '.join(current_content).strip()
                current_section = line.split(':')[0].strip()
                current_content = [line.split(':', 1)[1].strip() if ':' in line else '']
            else:
                current_content.append(line)
        
                    # Add the last section and log for debugging
        if current_section and current_content:
            sections[current_section] = ' '.join(current_content).strip()
            logging.debug(f"Parsed section {current_section}: {sections[current_section]}")
        
        # Extract decision with more robust parsing
        decision = None
        if 'Decision' in sections:
            decision_text = sections['Decision'].lower().strip()
            # Clean the decision text more aggressively
            decision_text = re.sub(r'[^a-z]', '', decision_text)
            if decision_text == 'yes':
                decision = 'Yes'
            elif decision_text == 'no':
                decision = 'No'
            
            # If still no decision, try looking for yes/no anywhere in the text
            if not decision:
                if any(word in decision_text for word in ['yes', 'yep', 'yeah']):
                    decision = 'Yes'
                elif any(word in decision_text for word in ['no', 'nope', 'nah']):
                    decision = 'No'
        
        # Get reason from the Reason section
        reason = sections.get('Reason', '')
        
        # If reason is missing or incomplete, try Analysis section
        if not reason or len(reason) < 20:
            analysis = sections.get('Analysis', '')
            if analysis:
                sentences = [s.strip() for s in analysis.split('.') if s.strip()]
                if sentences:
                    # Find a good sentence with explanatory words
                    for sentence in sentences:
                        if len(sentence) >= 20 and any(word in sentence.lower() for word in ['because', 'since', 'as', 'therefore', 'hence', 'indicates', 'shows', 'contains', 'lacks', 'does not']):
                            reason = sentence
                            break
                    # If no good sentence found, take the first complete one
                    if not reason:
                        reason = next((s for s in sentences if len(s) >= 20), sentences[0])
        
        # If still no good reason, fallback to Key Elements
        if not reason or len(reason) < 20:
            elements = sections.get('Key Elements', '')
            reason = f"Based on the analysis of {elements}, this decision was reached"
        
        # Final validation
        if not decision:
            return "Error", "Could not find clear Yes/No decision"
            
        # Ensure reason ends with proper punctuation
        if not reason.endswith(('.', '!', '?')):
            reason += '.'
            
        return decision.capitalize(), reason
        
    except Exception as e:
        logging.error(f"Error parsing response: {str(e)}")
        return "Error", str(e)

def classify_text(text, code, retry_count=3):
    """Classify text with retries."""
    for attempt in range(retry_count):
        try:
            response = requests.post(
                endpoint,
                headers={
                    "Authorization": f"Bearer {TOGETHER_API_KEY}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
                    "prompt": prepare_prompt(text, code),
                    "max_tokens": 400,
                    "temperature": 0.1,
                    "top_p": 0.7,
                    "top_k": 40,
                    "repetition_penalty": 1.1
                }
            )
            response.raise_for_status()
            
            response_json = response.json()
            response_text = (
                response_json.get('output', {}).get('choices', [{}])[0].get('text', '')
                if isinstance(response_json.get('output'), dict)
                else response_json.get('output', '')
            )
            
            return extract_decision_and_reason(clean_response_text(response_text))
            
        except Exception as e:
            if attempt == retry_count - 1:
                return "Error", str(e)
            time.sleep(2 ** attempt)
    
    return "Error", "Maximum retries exceeded"

def process_single_tweet(args):
    """Process a single tweet for parallel execution."""
    idx, row = args
    try:
        decision, reason = classify_text(row['text'], row['code_name'])
        return {
            'index': idx,
            'prediction': decision,
            'reasoning': reason,
            'success': decision not in ["Error", None]
        }
    except Exception as e:
        logging.error(f"Error processing row {idx}: {str(e)}")
        return {
            'index': idx,
            'prediction': "Error",
            'reasoning': str(e),
            'success': False
        }

def process_tweets(df, max_workers=10):
    """Process tweets in parallel with true concurrency."""
    df_processed = df.copy()
    df_processed['model_prediction'] = None
    df_processed['model_reasoning'] = None
    
    success_count = 0
    error_count = 0
    
    # Create list of (index, row) tuples for parallel processing
    tasks = list(df_processed.iterrows())
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(process_single_tweet, task) for task in tasks]
        
        with tqdm(total=len(df_processed), desc="Processing tweets") as pbar:
            for future in as_completed(futures):
                result = future.result()
                df_processed.at[result['index'], 'model_prediction'] = result['prediction']
                df_processed.at[result['index'], 'model_reasoning'] = result['reasoning']
                
                if result['success']:
                    success_count += 1
                else:
                    error_count += 1
                    
                pbar.update(1)
    
    logging.info(f"\nProcessing complete!")
    logging.info(f"Successfully processed: {success_count} rows")
    logging.info(f"Errors: {error_count} rows")
    
    return df_processed

# Example usage with true parallel processing
df_results = process_tweets(df, max_workers=5)

Processing tweets: 100%|██████████| 1300/1300 [37:27<00:00,  1.73s/it] 
2024-12-23 13:48:13,904 - INFO - 
Processing complete!
2024-12-23 13:48:13,905 - INFO - Successfully processed: 1289 rows
2024-12-23 13:48:13,906 - INFO - Errors: 11 rows


In [27]:
df_results

Unnamed: 0,text_id,code_id,code_name,text,original_code,replicated_code,model_code,reason,model_prediction,model_reasoning
0,1,1,HSTG,A great guy (with great ratings)! https://t.co/S0S6xqrjSC,No,No,No,"There are no hashtags used in this tweet, only a hyperlink. The codebook specifically states to exclude hyperlinks from the decision of whether hashtags are used. Therefore, the most applicable code is ""No.""",No,"The tweet does not include any hashtags because it lacks the """" symbol preceding any word or phrase."
1,1,2,ATSN,A great guy (with great ratings)! https://t.co/S0S6xqrjSC,No,No,No,"There are no at signs (@) present in this tweet, only a hyperlink. Therefore, the code for at signs is not applicable.",No,"The ""@"" symbol in the tweet is only found within a hyperlink, thus it doesn't meet the criteria defined for."
2,1,3,CRIT,A great guy (with great ratings)! https://t.co/S0S6xqrjSC,No,No,No,"The tweet does not criticize another person or idea, it simply praises someone. Therefore, the code 'No' is the most applicable.",No,"The tweet does not contain any criticism of another person or idea, but rather expresses praise for someone."
3,1,4,MEDI,A great guy (with great ratings)! https://t.co/S0S6xqrjSC,No,No,No,"This tweet does not contain any derogatory or condescending statements about the news media or individual journalists. It is simply a positive comment about someone with good ratings. Therefore, the code is ""No"".",No,"The tone of the tweet appears to be positive and supportive, indicating that Donald Trump is not making a derogatory or condescending statement about the news media in this instance."
4,1,5,FAMY,A great guy (with great ratings)! https://t.co/S0S6xqrjSC,No,No,No,The tweet does not reference any members of Donald Trump's immediate family.,No,The tweet does not contain any references to Donald Trump's immediate family members.
...,...,...,...,...,...,...,...,...,...,...
1295,100,9,INDV,RT @realDonaldTrump: National Pearl Harbor Remembrance Day - “A day that will live in infamy!” December 7 1941,No,No,No,"The tweet does not reference any individual person, but rather a historical event. Therefore, the code for referencing an individual person does not apply.",Yes,"The tweet contains a quote from President Franklin D. Roosevelt, clearly referencing an individual person through his famous speech."
1296,100,10,MARG,RT @realDonaldTrump: National Pearl Harbor Remembrance Day - “A day that will live in infamy!” December 7 1941,No,No,No,The tweet does not reference any marginalized group or groups. It is a remembrance tweet for National Pearl Harbor Remembrance Day.,No,The tweet does not contain an explicit reference to any marginalized group.
1297,100,11,INTN,RT @realDonaldTrump: National Pearl Harbor Remembrance Day - “A day that will live in infamy!” December 7 1941,No,No,No,"The tweet does not reference any international topics outside of the USA, other countries, or international leaders. It only references a historical event that occurred within the USA. Therefore, the code ""No"" is the most applicable.",Yes,"This tweet indirectly references an international topic because Pearl Harbor was attacked by Japan, making it an event related to international relations."
1298,100,12,PRTY,RT @realDonaldTrump: National Pearl Harbor Remembrance Day - “A day that will live in infamy!” December 7 1941,No,No,No,This tweet does not reference any US political parties or ideologies. It is a commemoration of National Pearl Harbor Remembrance Day.,No,"There is no direct reference to US political parties such as Democrats, Republicans, or any other party affiliation in the tweet."


In [8]:
pd.set_option('display.max_colwidth', None)
print(df_results)

    text_id  code_id code_name  \
0         1        1      HSTG   
1         1        2      ATSN   
2         1        3      CRIT   
3         1        4      MEDI   
4         1        5      FAMY   
5         1        6      PLCE   
6         1        7      MAGA   
7         1        8      CAPT   
8         1        9      INDV   
9         1       10      MARG   
10        1       11      INTN   
11        1       12      PRTY   
12        1       13      IMMG   
13        2        1      HSTG   
14        2        2      ATSN   
15        2        3      CRIT   
16        2        4      MEDI   
17        2        5      FAMY   
18        2        6      PLCE   
19        2        7      MAGA   

                                                                                                                            text  \
0                                                                      A great guy (with great ratings)! https://t.co/S0S6xqrjSC   
1                    

In [31]:
df_results.to_csv('results_csvs/tweet_analysis_llama_33.csv', index=False)

In [2]:
df_results=pd.read_csv('results_csvs/tweet_analysis_results.csv')

In [30]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, cohen_kappa_score, confusion_matrix

# First, clean the data by filling NaN values
df_results = df_results.fillna('No')  # or whatever default value makes sense

# Make sure all values are strings
columns_to_convert = ['original_code', 'replicated_code', 'model_code', 'model_prediction']
for col in columns_to_convert:
    df_results[col] = df_results[col].astype(str)

def perform_similarity_analysis(df):
    # Rest of the code remains the same
    comparisons = [
        ('model_code', 'model_prediction', 'Original Model vs New Model'),
        ('original_code', 'model_code', 'Original Code vs Original Model'),
        ('original_code', 'model_prediction', 'Original Code vs New Model'),
        ('replicated_code', 'model_code', 'Replicated Code vs Original Model'),
        ('replicated_code', 'model_prediction', 'Replicated Code vs New Model')
    ]
    
    results = []
    
    for col1, col2, name in comparisons:
        accuracy = accuracy_score(df[col1], df[col2])
        kappa = cohen_kappa_score(df[col1], df[col2])
        matching_cases = (df[col1] == df[col2]).sum()
        total_cases = len(df)
        match_percentage = (matching_cases / total_cases) * 100
        
        results.append({
            'Comparison': name,
            'Accuracy': accuracy,
            'Kappa Score': kappa,
            'Matching Cases': matching_cases,
            'Total Cases': total_cases,
            'Match Percentage': match_percentage
        })
    
    results_df = pd.DataFrame(results)
    
    print("Similarity Analysis Summary:")
    print("=" * 100)
    print(results_df.to_string(index=False))
    
    print("\nDetailed Analysis:")
    print("=" * 100)
    
    for col1, col2, name in comparisons:
        matrix = confusion_matrix(df[col1], df[col2])
        print(f"\nConfusion Matrix for {name}:")
        print(f"Categories: {sorted(df[col1].unique().tolist())}")
        print(matrix)

# Run the analysis
perform_similarity_analysis(df_results)

Similarity Analysis Summary:
                       Comparison  Accuracy  Kappa Score  Matching Cases  Total Cases  Match Percentage
      Original Model vs New Model  0.833846     0.550007            1084         1300         83.384615
  Original Code vs Original Model  0.853846     0.546323            1110         1300         85.384615
       Original Code vs New Model  0.901538     0.700487            1172         1300         90.153846
Replicated Code vs Original Model  0.863846     0.572559            1123         1300         86.384615
     Replicated Code vs New Model  0.893846     0.673539            1162         1300         89.384615

Detailed Analysis:

Confusion Matrix for Original Model vs New Model:
Categories: ['No', 'Yes']
[[  0   0   0]
 [  5 880 105]
 [  6 100 204]]

Confusion Matrix for Original Code vs Original Model:
Categories: ['No', 'Yes']
[[946 146]
 [ 44 164]]

Confusion Matrix for Original Code vs New Model:
Categories: ['No', 'Yes']
[[  0   0   0]
 [  7 974