
# Bias Analysis: Response Changes Before and After Article Exposure

This notebook analyzes the impact of exposure to biased articles (left and right-wing) on user responses. The analysis covers multiple dimensions, including overall changes, differences by political groups, and significance testing.


## 1. Data Extraction and Processing
This section involves loading the necessary data for the analysis, including JSON files and response data from different user personas.

In [282]:
import pandas as pd
import json
import re
from scipy.stats import wilcoxon
import numpy as np



In [283]:
before_responses_path = "../data/processed/before_responses.json"
after_responses_path = "../data/processed/after_responses.json"
persona_prompts_path = '../data/processed/persona_prompts.json'
question_codes_path = '../data/raw/question_codes.json'
user_ranks_path = "../data/processed/user_ranks.csv"


In [284]:
def load_json_data(filepath):
    """Loads JSON data from the provided file path and attempts to fix malformed sections."""
    try:
        with open(filepath, "r", encoding="utf-8") as f:
            content = f.read()

        try:
            return json.loads(content)  
        except json.JSONDecodeError as e:
            print(f"Malformed JSON detected: {str(e)}")
            print("Attempting to fix...")

            content_fixed = content.replace("][", "],[")

            try:
                return json.loads(f"[{content_fixed}]")  
            except json.JSONDecodeError as e:
                print(f"Error: Unable to decode JSON after attempt to fix. {str(e)}")
                return None

    except FileNotFoundError:
        print(f"Error: File {filepath} not found.")
        return None
    except json.JSONDecodeError as e:
        print(f"Error: Failed to decode JSON from {filepath}. {str(e)}")
        return None


In [285]:
def flatten_after_responses(response_data):
    flattened_data = []
    
    for user_responses in response_data:
        for entry in user_responses:
            user_id = entry['user_id']
            question_code = entry['question_code']
            bias = entry['bias']  # Assuming bias always exists
            selected_option = entry['response'].get('selected_option')

            flattened_data.append({
                'user_id': user_id,
                'question_code': question_code,
                'bias': bias,
                'selected_option': selected_option,
                'question': entry.get('question')
            })
    
    return flattened_data



In [286]:
def load_question_code_mapping(question_codes_path):
    """Load and filter the question-to-code mapping from the question_codes.json file."""
    question_codes_data = load_json_data(question_codes_path)
    
    # List of valid question codes relevant to the analysis
    valid_question_codes = ['F1A10_1', 'F2A6', 'F2A7', 'F2A9', 'F3A3_1', 'F3A6_1', 'F3A7_1', 'F3A8_1']
    
    # Filter only questions that are in the valid_question_codes list
    filtered_data = [entry for entry in question_codes_data if entry['code'] in valid_question_codes]

    # Create a mapping for each question from text to numerical codes
    question_code_mapping = {}
    for entry in filtered_data:
        question = entry['question']
        code = entry['code']
        options = entry['options']
        
        # Reverse the options dictionary to map response text to numerical values
        reversed_options = {v: k for k, v in options.items()}
        question_code_mapping[question] = {'code': code, 'options': reversed_options}

    return question_code_mapping


In [287]:
def create_after_responses_dataframe(flattened_data):
    """
    Converts the flattened after responses into a structured DataFrame.
    """
    df = pd.DataFrame(flattened_data)
    
    # Set a multi-index using user_id, question_code, and bias
    df.set_index(['user_id', 'question_code', 'bias'], inplace=True)
    
    return df

In [288]:
def extract_actual_question(full_prompt):
    """Extracts the actual question from the full prompt in the 'question' field and cleans it."""
    
    # Use regex to find text between **Question**: and **Options**:
    match = re.search(r'\*\*Question\*\*[:\s]*(.*?)\*\*Options\*\*', full_prompt, re.DOTALL)
    
    if match:
        question = match.group(1).strip()  # Extracted question
        # Clean the question: remove extra spaces and newlines
        question_cleaned = re.sub(r'\s+', ' ', question).strip()
        return question_cleaned
    
    # If no structured question is found, return the full prompt (fallback)
    return full_prompt.strip()


In [289]:

def create_clean_dataframe_with_codes(response_data, question_code_mapping, has_bias=False):
    """
    Converts a list of response data dictionaries into a structured DataFrame with question codes.
    Handles optional fields like bias if present in the data (for after_responses).
    Parameters:
    - response_data: List of dictionaries containing the response data.
    - question_code_mapping: A dictionary that maps question texts to their corresponding codes.
    - has_bias: Boolean flag to indicate if the data includes bias (for after_responses).
    """
    cleaned_data = []
    
    for entry in response_data:
        user_id = entry['user_id']
        full_prompt = entry['question']  
        selected_option = entry['response']['selected_option']
        bias = entry.get('bias', None) if has_bias else None

        question = extract_actual_question(full_prompt)
        
        if question is None:
            print(f"Warning: Could not extract question from prompt for user {user_id}. Skipping entry.")
            continue

        question_code = question_code_mapping.get(question, {}).get('code')
        
        if question_code is None:
            print(f"Warning: No question_code found for question '{question}' for user {user_id}. Skipping entry.")
            continue

        cleaned_entry = {
            'user_id': user_id,
            'question_code': question_code,
            'bias': bias,
            'selected_option': selected_option,
            'question': question
        }
        
        cleaned_data.append(cleaned_entry)
    
    df = pd.DataFrame(cleaned_data)
    if has_bias:
        df.set_index(['user_id', 'question_code', 'bias'], inplace=True)
    else:
        df.set_index(['user_id', 'question_code'], inplace=True)
    
    return df


In [290]:
def map_responses_to_numeric(df, question_code_mapping):
    """
    Maps the string-based responses to their corresponding numerical values using the question code mapping.
    """
    def map_response(row):
        question_text = row['question']  
        selected_option = row['selected_option']  
        
       
        if question_text in question_code_mapping:
            mapping = question_code_mapping[question_text]['options']
       
            return mapping.get(selected_option, None)  
        return None
    
   
    df['numeric_response'] = df.apply(map_response, axis=1)
    return df


In [291]:
def extract_political_stance(persona_prompt):
    """Extracts the political stance from the persona prompt text."""
    match = re.search(r"\*\*Your Own Political Position\*\*:\s*You consider your political position to be\s*'(.*?)'\s*on the political scale", persona_prompt)
    
    if match:
        return match.group(1).strip()  # Extract the political position (e.g., 'Extreme Left')
    return None


# Load the persona prompts
def load_persona_prompts(filepath):
    """Loads the persona prompts data and creates a mapping of user_id to political stance."""
    persona_data = load_json_data(filepath)  # Reusing load_json_data to load the file
    if persona_data is None:
        return {}

    # Create a mapping of user_id to political stance
    persona_mapping = {}
    for entry in persona_data:
        user_id = entry['user_id']
        political_stance = extract_political_stance(entry['persona_prompt'])
        persona_mapping[user_id] = political_stance
    
    return persona_mapping

def add_political_stance_to_df(df, persona_mapping):
    """Adds political stance column to the given DataFrame based on user_id, and combines Far Left and Extreme Left."""
    # Map political stance from persona_mapping to the DataFrame
    df['political_stance'] = df.index.get_level_values('user_id').map(persona_mapping)

    # Merge Far Left and Extreme Left into a single group: Extreme Left
    df['political_stance'] = df['political_stance'].replace({'Far Left': 'Extreme Left'})

    # Debug: Check for missing political stance mappings
    missing_stance_df = df[df['political_stance'].isna()]
    if not missing_stance_df.empty:
        print("These user_ids have no political stance mapped:")
        print(missing_stance_df.index.get_level_values('user_id').unique())

    return df



In [292]:
user_ranks = pd.read_csv(user_ranks_path)

# Load the question code mapping for both before and after responses
question_code_mapping = load_question_code_mapping(question_codes_path)
persona_mapping = load_persona_prompts(persona_prompts_path)

## Process the BEFORE data

before_responses = load_json_data(before_responses_path)
before_responses_df = create_clean_dataframe_with_codes(before_responses, question_code_mapping)
before_responses_df = map_responses_to_numeric(before_responses_df, question_code_mapping)
before_responses_df = add_political_stance_to_df(before_responses_df, persona_mapping)

## Process the AFTER data

after_responses = load_json_data(after_responses_path)
flattened_after_responses = flatten_after_responses(after_responses)
after_responses_df = create_after_responses_dataframe(flattened_after_responses)
after_responses_df = map_responses_to_numeric(after_responses_df, question_code_mapping)
after_responses_df = add_political_stance_to_df(after_responses_df, persona_mapping)



Malformed JSON detected: Extra data: line 178 column 2 (char 5656)
Attempting to fix...


In [293]:
def ensure_numeric_format(df, column):
    df[column] = pd.to_numeric(df[column], errors='coerce')
    return df

before_responses_df = ensure_numeric_format(before_responses_df, 'numeric_response')
after_responses_df = ensure_numeric_format(after_responses_df, 'numeric_response')

# Merge the before and after responses DataFrames on user_id and question_code
def merge_before_after_responses(before_df, after_df):
    # Reset the index to bring 'user_id' and 'question_code' back as columns
    before_df = before_df.reset_index()
    after_df = after_df.reset_index()

    merged_df = pd.merge(
        before_df[['user_id', 'question_code', 'numeric_response', 'political_stance']],
        after_df[['user_id', 'question_code', 'numeric_response', 'bias', 'political_stance']],
        on=['user_id', 'question_code', 'political_stance'],  # Merge on user_id, question_code, and political_stance
        suffixes=('_before', '_after')
    )

    # Calculate the change in response (after - before)
    merged_df['response_change'] = merged_df['numeric_response_after'] - merged_df['numeric_response_before']

    return merged_df


# Merge the reliability scores with the merged responses DataFrame
def merge_reliability_scores(merged_df, reliability_df):
    merged_df = pd.merge(
        merged_df,
        reliability_df[['user_id', 'reliability_score']],  # Ensure we only bring in the relevant columns
        on='user_id',
        how='left'  # Ensure we don't drop any data if reliability scores are missing
    )
    return merged_df



merged_responses_df = merge_before_after_responses(before_responses_df, after_responses_df)
# Merge reliability scores into merged_responses_df
merged_responses_with_reliability = merge_reliability_scores(merged_responses_df, user_ranks)

# print(merged_responses_with_reliability.head())
print(merged_responses_with_reliability.sort_values(by='reliability_score', ascending=False).head(10))




         user_id question_code  numeric_response_before political_stance  \
1552  IDUS127802       F1A10_1                        6    Extreme Right   
1553  IDUS127802       F1A10_1                        6    Extreme Right   
1554  IDUS127802          F2A6                        5    Extreme Right   
1555  IDUS127802          F2A6                        5    Extreme Right   
1556  IDUS127802          F2A7                        2    Extreme Right   
1557  IDUS127802          F2A7                        2    Extreme Right   
1558  IDUS127802          F2A9                        5    Extreme Right   
1559  IDUS127802          F2A9                        5    Extreme Right   
1560  IDUS127802        F3A3_1                        6    Extreme Right   
1561  IDUS127802        F3A3_1                        6    Extreme Right   

      numeric_response_after   bias  response_change  reliability_score  
1552                       7   left                1           0.727273  
1553           

## 2. Changes in Agents' Responses Before vs After Exposure to Biased Articles

### 2.1 Overall Change in Responses
This step aims to assess how much the responses changed in general after exposure to articles. This involves calcultaing the average of the response_change across all agents and questions, and summarizing the overall shifts.

In [294]:
# Summary of overall changes in responses, weighted by reliability score
def overall_changes_analysis_with_reliability(merged_df):
    # Calculate the weighted average response change
    weighted_avg_change = (merged_df['response_change'] * merged_df['reliability_score']).sum() / merged_df['reliability_score'].sum()

    # Calculate the count of positive, negative, and no changes in response
    positive_changes = (merged_df['response_change'] > 0).sum()
    negative_changes = (merged_df['response_change'] < 0).sum()
    no_changes = (merged_df['response_change'] == 0).sum()

    return {
        'weighted_avg_change': weighted_avg_change,
        'positive_changes': positive_changes,
        'negative_changes': negative_changes,
        'no_changes': no_changes
    }

# Generic function to print analysis results in a more readable format
def print_analysis_results(title, results):
    print(f"\n{title}:")
    
    for key, value in results.items():
        # Handle pandas Series and other complex types
        if isinstance(value, pd.Series):
            print(f"  - {key.replace('_', ' ').capitalize()}:")
            for idx, val in value.items():  # Use 'items()' instead of 'iteritems()'
                print(f"    - {idx}: {val}")
        
        # Handle the case where a descriptive statistics table (from .describe()) is included
        elif isinstance(value, pd.DataFrame) or isinstance(value, pd.Series) and 'count' in value.index:
            print(f"  - {key.replace('_', ' ').capitalize()} summary:")
            # Print descriptive stats in a readable format
            for stat_key, stat_val in value.items():
                print(f"    - {stat_key.capitalize()}: {stat_val:.3f}")
        
        # Format float values to 3 decimal places, otherwise print as-is
        elif isinstance(value, (float, np.float64)):
            print(f"  - {key.replace('_', ' ').capitalize()}: {value:.3f}")
        
        # Handle general integer or string cases
        else:
            print(f"  - {key.replace('_', ' ').capitalize()}: {value}")

# Perform the overall changes analysis with reliability
overall_changes_with_reliability = overall_changes_analysis_with_reliability(merged_responses_with_reliability)
print_analysis_results("Overall Changes in Responses (Weighted by Reliability)", overall_changes_with_reliability)




Overall Changes in Responses (Weighted by Reliability):
  - Weighted avg change: -0.165
  - Positive changes: 423
  - Negative changes: 490
  - No changes: 959


### 2.2 Overall Change in Responses Given Left or Right Bias
Here, we analyze the overall differences in user responses after exposure to left-wing versus right-wing biased articles, regardless of the political position of the agents.

In [295]:
# Function to compare response changes between left- and right-biased articles, weighted by reliability
def compare_left_right_bias_with_reliability(merged_df):
    # Separate left-biased and right-biased responses
    left_bias_df = merged_df[merged_df['bias'] == 'left']
    right_bias_df = merged_df[merged_df['bias'] == 'right']

    # Calculate weighted average response change for left and right bias
    weighted_avg_left_change = (left_bias_df['response_change'] * left_bias_df['reliability_score']).sum() / left_bias_df['reliability_score'].sum()
    weighted_avg_right_change = (right_bias_df['response_change'] * right_bias_df['reliability_score']).sum() / right_bias_df['reliability_score'].sum()

    # Summary statistics for each bias
    left_summary = left_bias_df['response_change'].describe()
    right_summary = right_bias_df['response_change'].describe()

    return {
        'weighted_avg_left_change': weighted_avg_left_change,
        'weighted_avg_right_change': weighted_avg_right_change,
        'left_summary': left_summary,
        'right_summary': right_summary
    }

# Perform the bias comparison with reliability
bias_comparison_with_reliability = compare_left_right_bias_with_reliability(merged_responses_with_reliability)
print_analysis_results("Comparison of Response Changes Between Left and Right Bias (Weighted by Reliability):", bias_comparison_with_reliability)



Comparison of Response Changes Between Left and Right Bias (Weighted by Reliability)::
  - Weighted avg left change: -0.096
  - Weighted avg right change: -0.235
  - Left summary:
    - count: 936.0
    - mean: -0.09188034188034189
    - std: 1.7286816993744634
    - min: -6.0
    - 25%: -1.0
    - 50%: 0.0
    - 75%: 0.0
    - max: 6.0
  - Right summary:
    - count: 936.0
    - mean: -0.23397435897435898
    - std: 1.9621443075966185
    - min: -6.0
    - 25%: -1.0
    - 50%: 0.0
    - 75%: 0.0
    - max: 6.0


### 2.3 Overall Difference in Responses Between by Question
Here, we analyze how each question changed after exposure to biased content (regardless of the type of bias introduced).

In [296]:
# Function to analyze overall differences in responses by question, weighted by reliability
def analyze_differences_by_question_with_reliability(merged_df):
    # Group by question_code and calculate statistics for each question
    question_analysis = merged_df.groupby('question_code').agg(
        weighted_avg_change=('response_change', lambda x: (x * merged_df.loc[x.index, 'reliability_score']).sum() / merged_df.loc[x.index, 'reliability_score'].sum()),
        count=('response_change', 'count'),
        std_change=('response_change', 'std'),
        min_change=('response_change', 'min'),
        max_change=('response_change', 'max'),
        median_change=('response_change', 'median')
    ).reset_index()

    # Sort the results by the weighted average change to identify the most impacted questions
    question_analysis = question_analysis.sort_values(by='weighted_avg_change', ascending=False)

    return question_analysis


# Perform the analysis by question with reliability
question_analysis_with_reliability = analyze_differences_by_question_with_reliability(merged_responses_with_reliability)
print_analysis_results("Overall Differences in Responses by Question (Weighted by Reliability)", question_analysis_with_reliability)




Overall Differences in Responses by Question (Weighted by Reliability):
  - Question code:
    - 2: F2A7
    - 7: F3A8_1
    - 3: F2A9
    - 6: F3A7_1
    - 5: F3A6_1
    - 1: F2A6
    - 4: F3A3_1
    - 0: F1A10_1
  - Weighted avg change:
    - 2: 0.7040891675401825
    - 7: 0.0858979221516273
    - 3: 0.08544571948798961
    - 6: 0.013718588889991123
    - 5: -0.06693747480615407
    - 1: -0.1485842206962621
    - 4: -0.485728409298992
    - 0: -1.5111583938089048
  - Count:
    - 2: 234
    - 7: 234
    - 3: 234
    - 6: 234
    - 5: 234
    - 1: 234
    - 4: 234
    - 0: 234
  - Std change:
    - 2: 1.8450136636612966
    - 7: 1.3208186204974606
    - 3: 1.7554800298944457
    - 6: 0.7115928634831238
    - 5: 0.6754130775174092
    - 1: 1.5333436203309454
    - 4: 2.58972016372291
    - 0: 2.610364282507759
  - Min change:
    - 2: -4
    - 7: -6
    - 3: -4
    - 6: -3
    - 5: -6
    - 1: -4
    - 4: -6
    - 0: -6
  - Max change:
    - 2: 4
    - 7: 6
    - 3: 3
    - 6: 2
    -


1. **Most Impacted Questions**:
   - **Question 2 (F2A7)**: Saw the largest positive shift with a weighted average change of **0.704**, indicating significant upward response changes. High variability (Std: 1.845) suggests diverse reactions to biased content.
   - **Question 0 (F1A10_1)**: Experienced the largest negative shift with a weighted average change of **-1.511**. A high standard deviation (2.610) indicates substantial variability, with extreme negative changes pulling the average down.

2. **Moderate Changes**:
   - **Question 7 (F3A8_1)** and **Question 3 (F2A9)**: Both had small positive changes (**0.086** and **0.085**), with moderate variability in responses.
   - **Question 4 (F3A3_1)**: Showed a moderate negative change (**-0.486**), but high variability (Std: 2.59) suggests strong, diverse shifts in responses.

3. **Low Variability, Small Changes**:
   - **Question 6 (F3A7_1)** and **Question 5 (F3A6_1)**: Displayed very small shifts in responses (0.014 and -0.067, respectively) with low standard deviation, meaning responses were generally consistent.

4. **General Observations**:
   - The median change for all questions was **0**, indicating that for most questions, the typical response did not change drastically. However, the presence of outliers resulted in significant shifts in average response changes for some questions.
   - **Variability** was highest in questions 0 and 4, suggesting that these questions elicited extreme reactions, both positive and negative, to the biased content.

Overall, exposure to biased content resulted in noticeable changes for certain questions, particularly questions 2 and 0, while others were less affected.


### 2.4 Overall Differences in Responses by Political Group
Here, we analyze how agents belonging to different political groups (Extreme Right vs Extreme Left) changed their responses after having been exposed to biased articles (regardless of the type of bias introduced)

In [297]:
# Function to analyze overall differences in responses by political group, weighted by reliability
def analyze_differences_by_political_group_with_reliability(merged_df):
    # Group by political stance and calculate weighted statistics for each group
    political_group_analysis = merged_df.groupby('political_stance').agg(
        weighted_avg_change=('response_change', lambda x: (x * merged_df.loc[x.index, 'reliability_score']).sum() / merged_df.loc[x.index, 'reliability_score'].sum()),
        count=('response_change', 'count'),
        std_change=('response_change', 'std'),
        min_change=('response_change', 'min'),
        max_change=('response_change', 'max'),
        median_change=('response_change', 'median')
    ).reset_index()

    # Sort the results by the weighted average change to identify the most impacted groups
    political_group_analysis = political_group_analysis.sort_values(by='weighted_avg_change', ascending=False)

    return political_group_analysis

# Perform the analysis by political group with reliability
political_group_analysis_results = analyze_differences_by_political_group_with_reliability(merged_responses_with_reliability)

# Display the results in a readable format
print_analysis_results("Differences in Responses by Political Group (Weighted by Reliability)", political_group_analysis_results.to_dict(orient='list'))



Differences in Responses by Political Group (Weighted by Reliability):
  - Political stance: ['Extreme Right', 'Extreme Left']
  - Weighted avg change: [-0.06419248478766469, -0.32537257596573577]
  - Count: [1088, 784]
  - Std change: [2.1474344021664002, 1.3121254695505304]
  - Min change: [-6, -6]
  - Max change: [6, 6]
  - Median change: [0.0, 0.0]



1. **Impact by Political Group**:
   - **Extreme Left**: Experienced a more pronounced negative shift in responses (**-0.325**), indicating that biased content had a greater overall negative impact on this group.
   - **Extreme Right**: Showed a smaller negative shift (**-0.064**), meaning biased content had a less pronounced effect on their responses overall.

2. **Variability in Response**:
   - **Extreme Right**: Exhibited greater variability (**Std: 2.147**), suggesting that responses were more dispersed, with individuals reacting in diverse ways.
   - **Extreme Left**: Displayed less variability (**Std: 1.312**), meaning responses were more consistent, though still shifting negatively.

3. **Overall Observations**:
   - Both groups had a **median change of 0**, indicating the typical response did not shift significantly. However, extreme positive and negative changes were present in both groups (Min/Max: -6 to 6).
   - The larger negative shift in the **Extreme Left** group suggests a stronger, more uniform reaction to biased content compared to the **Extreme Right**.

Overall, biased content had a more negative and consistent impact on responses from the Extreme Left group, while the Extreme Right showed more variability but a smaller overall shift.


### 2.5 Differences in responses by article bias
Here, we analyze which type of bias (right, left) is associated with the biggest change in responses across all agents.


In [298]:
# Function to analyze differences in responses by article bias, weighted by reliability
def analyze_differences_by_article_bias_only_with_reliability(merged_df):
    # Group by article bias and calculate weighted statistics for response changes
    bias_analysis = merged_df.groupby('bias').agg(
        weighted_avg_change=('response_change', lambda x: (x * merged_df.loc[x.index, 'reliability_score']).sum() / merged_df.loc[x.index, 'reliability_score'].sum()),
        count=('response_change', 'count'),
        std_change=('response_change', 'std'),
        min_change=('response_change', 'min'),
        max_change=('response_change', 'max'),
        median_change=('response_change', 'median')
    ).reset_index()

    # Sort the results by the weighted average change to easily compare bias impact
    bias_analysis = bias_analysis.sort_values(by='weighted_avg_change', ascending=False)

    return bias_analysis

# Perform the analysis by article bias only with reliability
bias_only_analysis_results = analyze_differences_by_article_bias_only_with_reliability(merged_responses_with_reliability)

# Display the results in a readable format
print_analysis_results("Differences in Responses by Article Bias (Weighted by Reliability)", bias_only_analysis_results.to_dict(orient='list'))



Differences in Responses by Article Bias (Weighted by Reliability):
  - Bias: ['left', 'right']
  - Weighted avg change: [-0.09606551951432775, -0.23474875562080288]
  - Count: [936, 936]
  - Std change: [1.7286816993744625, 1.9621443075966187]
  - Min change: [-6, -6]
  - Max change: [6, 6]
  - Median change: [0.0, 0.0]


1. **Impact by Article Bias**:
   - **Right-Leaning Bias**: Had a more pronounced negative impact on responses, with a weighted avg change of **-0.235**, compared to **-0.096** for left-leaning bias. 
   - Responses exposed to **left-leaning articles** experienced a smaller overall negative shift.

2. **Variability in Responses**:
   - The **right-leaning articles** showed higher variability (**Std: 1.962**) compared to left-leaning articles (**Std: 1.729**), indicating more diverse reactions to the right-leaning bias.
   
3. **Overall Observations**:
   - Both biases resulted in a **median change of 0**, indicating that the typical response did not shift significantly for most responses, though extreme changes were present for both biases (Min/Max: -6 to 6).

In conclusion, right-leaning bias had a stronger and more variable negative impact on responses compared to left-leaning bias.


### 2.6 Differences in responses by article bias by political group

Here, we analyze how the type of bias (left vs right) impacted the response changes in the agents belonging to different political groups (Extreme Right vs Extreme Left)

In [299]:
# Function to analyze differences in responses by article bias and political group, weighted by reliability
def analyze_differences_by_article_bias_and_group_with_reliability(merged_df):
    # Group by article bias and political stance, then calculate weighted statistics for response changes
    bias_group_analysis = merged_df.groupby(['bias', 'political_stance']).agg(
        weighted_avg_change=('response_change', lambda x: (x * merged_df.loc[x.index, 'reliability_score']).sum() / merged_df.loc[x.index, 'reliability_score'].sum()),
        count=('response_change', 'count'),
        std_change=('response_change', 'std'),
        min_change=('response_change', 'min'),
        max_change=('response_change', 'max'),
        median_change=('response_change', 'median')
    ).reset_index()

    # Sort the results to easily compare bias impact
    bias_group_analysis = bias_group_analysis.sort_values(by='weighted_avg_change', ascending=False)

    return bias_group_analysis

# Perform the analysis by article bias and political group with reliability
bias_group_analysis_results = analyze_differences_by_article_bias_and_group_with_reliability(merged_responses_with_reliability)

# Display the results in a readable format
print_analysis_results("Differences in Responses by Article Bias and Political Group (Weighted by Reliability)", bias_group_analysis_results.to_dict(orient='list'))



Differences in Responses by Article Bias and Political Group (Weighted by Reliability):
  - Bias: ['left', 'right', 'left', 'right']
  - Political stance: ['Extreme Right', 'Extreme Right', 'Extreme Left', 'Extreme Left']
  - Weighted avg change: [-0.011347613397486368, -0.11703735617784304, -0.2299585539707651, -0.42078659796070633]
  - Count: [544, 544, 392, 392]
  - Std change: [1.9786532768393057, 2.304655506955763, 1.2927687763630848, 1.3251941036265789]
  - Min change: [-6, -6, -6, -6]
  - Max change: [6, 6, 6, 5]
  - Median change: [0.0, 0.0, 0.0, 0.0]


This is to be read as follows:
Exposing Extreme Right agents to left news articles caused an average change of 0.014 steps in the scale.

1. **Impact of Bias on Different Political Groups**:
   - **Right-Leaning Articles**: Political groups, especially those leaning left, may experience a more negative shift in their responses when exposed to right-leaning biased articles.
   - **Left-Leaning Articles**: Political groups on the right might see smaller or potentially positive shifts when exposed to left-leaning articles, though this varies depending on the group.

2. **Variability in Responses**:
   - **Extreme Right** and **Extreme Left** groups often show high variability (high standard deviation), indicating that responses within these groups can vary significantly depending on the bias of the article.
   - Groups exposed to **right-leaning articles** might show greater variability, with more extreme shifts in both directions compared to left-leaning articles.

3. **Median Change**:
   - **Median changes** close to zero suggest that while the overall weighted average might show shifts, the bulk of responses remain relatively stable, and outliers or more extreme responses are driving changes in the weighted average.

4. **General Observations**:
   - The **interaction of article bias and political stance** shows that the effect of bias is not uniform across political groups. Right-leaning biases tend to have a stronger impact on left-leaning respondents, and vice versa, but variability in responses suggests that individual reactions can differ widely.

Overall, responses from different political stances are affected differently by article bias, with the **right-leaning bias** having a stronger negative effect on left-leaning groups, while **left-leaning bias** shows smaller shifts, particularly in right-leaning groups.


### 2.7 Differences in responses by article bias, by political group, and by 
Here, we include the question in the analysis, and observe which questions were most affected by which bias across each political group.

In [300]:
# Function to analyze differences by article bias, political group, and question, weighted by reliability
def analyze_differences_by_bias_group_question_with_reliability(merged_df):
    # Group by article bias, political stance, and question_code, then calculate weighted statistics for response changes
    bias_group_question_analysis = merged_df.groupby(['bias', 'political_stance', 'question_code']).agg(
        weighted_avg_change=('response_change', lambda x: (x * merged_df.loc[x.index, 'reliability_score']).sum() / merged_df.loc[x.index, 'reliability_score'].sum()),
        count=('response_change', 'count'),
        std_change=('response_change', 'std'),
        min_change=('response_change', 'min'),
        max_change=('response_change', 'max'),
        median_change=('response_change', 'median')
    ).reset_index()

    # Sort the results by the weighted average change to easily compare impact
    bias_group_question_analysis = bias_group_question_analysis.sort_values(by='weighted_avg_change', ascending=False)

    return bias_group_question_analysis

# Perform the analysis by article bias, political group, and question with reliability
bias_group_question_analysis_results = analyze_differences_by_bias_group_question_with_reliability(merged_responses_with_reliability)

# Display the results in a readable format
print_analysis_results("Differences in Responses by Article Bias, Political Group, and Question (Weighted by Reliability)", bias_group_question_analysis_results.to_dict(orient='list'))



Differences in Responses by Article Bias, Political Group, and Question (Weighted by Reliability):
  - Bias: ['left', 'right', 'right', 'left', 'right', 'left', 'left', 'left', 'left', 'right', 'left', 'right', 'left', 'left', 'right', 'left', 'right', 'right', 'right', 'left', 'right', 'left', 'right', 'right', 'right', 'right', 'left', 'left', 'right', 'left', 'left', 'right']
  - Political stance: ['Extreme Right', 'Extreme Right', 'Extreme Right', 'Extreme Right', 'Extreme Right', 'Extreme Left', 'Extreme Right', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Right', 'Extreme Right', 'Extreme Left', 'Extreme Right', 'Extreme Right', 'Extreme Right', 'Extreme Right', 'Extreme Left', 'Extreme Right', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Left', 'Extreme Right', 'Extreme Left', 'Extreme Left', 'Extreme Right', 'Extreme Right']
  - Question code: ['F2A7', 'F2A9', 'F3A3_1', 'F3A8_1', 'F2A6', 'F

1. **Impact of Article Bias on Political Groups for Specific Questions**:
   - **Extreme Right respondents** exposed to **left-leaning content** experienced strong positive shifts, particularly on **Question F2A7** (**+2.99**), while **Extreme Left respondents** exposed to **right-leaning content** showed large negative shifts, especially on **Question F1A10_1** (**-5.06**).
   
2. **Variability in Responses**:
   - Certain combinations, such as **Extreme Right respondents** for **right-leaning content on Question F3A3_1**, exhibited high variability (**Std: 2.28**), indicating diverse reactions to biased content. Other combinations showed little to no variability, with responses changing uniformly.

3. **Range of Response Changes**:
   - Responses ranged from extreme negative changes (**-6**) to strong positive shifts (**+6**) across multiple question-bias combinations, highlighting the polarized effects of biased content on different political groups.

4. **Neutral Central Tendency**:
   - Many combinations had a **median change of 0**, suggesting that for the majority of responses, the overall bias exposure didn’t drastically shift responses, with extreme outliers influencing the averages.

Overall, responses to biased content vary greatly depending on the **interaction of article bias, political stance, and specific questions**, with the most extreme shifts seen in cases where political stance and article bias were in opposition.


## 3. Radicalization Analysis

### 3.1 Stable Responses Analysis
Here, we look for the presence of extreme responses prior to article exposure. These responses do not effectively contribute to the radicalisation analysis, since they are already found at the extremes of the scale and cannot be further pushed to the extremes. In such cases the scale caps the responses.

In [301]:
# Function to check stable responses at extremes, weighted by reliability
def check_stable_responses_at_extreme_with_reliability(merged_df, stable_threshold=0.5):
    # Filter for responses where response_change is close to zero (stable responses)
    stable_responses_df = merged_df[merged_df['response_change'].abs() <= stable_threshold].copy()

    # For F2 questions, the scale is 1-5, for others it's 1-7
    def is_at_extreme(row):
        if row['question_code'].startswith('F2'):
            return row['numeric_response_before'] in [1, 5]  # Extreme values for F2 questions
        else:
            return row['numeric_response_before'] in [1, 7]  # Extreme values for non-F2 questions

    # Apply the extreme check function to each row
    stable_responses_df.loc[:, 'at_extreme'] = stable_responses_df.apply(is_at_extreme, axis=1)
    
    # Calculate the weighted count of stable responses at extremes
    stable_at_extreme_count = (stable_responses_df['at_extreme'] * stable_responses_df['reliability_score']).sum()

    # Calculate the weighted percentage of stable responses that were at the extremes
    total_stable_responses = (stable_responses_df['reliability_score']).sum()
    percentage_at_extreme = (stable_at_extreme_count / total_stable_responses) * 100 if total_stable_responses > 0 else 0

    return {
        'total_stable_responses': total_stable_responses,
        'stable_at_extreme_count': stable_at_extreme_count,
        'percentage_at_extreme': percentage_at_extreme
    }

# Run the updated analysis to avoid the warning and incorporate reliability
extreme_stability_results_with_reliability = check_stable_responses_at_extreme_with_reliability(merged_responses_with_reliability)
percentage = extreme_stability_results_with_reliability["percentage_at_extreme"]
print(f"Percentage of stable responses at the extremes (Weighted by Reliability): {percentage:.2f}%")


Percentage of stable responses at the extremes (Weighted by Reliability): 82.35%


82.35% of the responses are already extreme even before exposure to biased articles. Such responses can either remain stable or be mitigated by the introduction of biased articles.

#### 3.1.1 Identifying Political Leaning Based on Response Patterns

The `generate_political_mapping` function is designed to analyze responses from the dataset containing real human responses. It compares the average responses from individuals identified as "Extreme Left" and "Extreme Right" to determine the direction of political leaning for each question. Specifically, the function creates a mapping that indicates whether smaller or larger response values correspond to left-leaning or right-leaning political views. 

This process is done to identify which questions exhibit a clear divide between left-leaning and right-leaning perspectives.

In [302]:
# Function to generate the political mapping based on actual numeric responses
def generate_political_mapping(df):
    """
    Generates a political stance mapping for each question based on the average responses 
    of users with extreme left and extreme right political stances.

    Parameters:
    - df (DataFrame): The DataFrame containing user responses and political stances.

    Returns:
    - political_mapping (dict): A dictionary where keys are question codes, and values are
                                True if smaller values represent left-leaning views, False if larger
                                values represent left-leaning views.
    """
    # Separate data by political stance
    left_responses = df[df['political_stance'] == 'Extreme Left']
    right_responses = df[df['political_stance'] == 'Extreme Right']

    # Initialize a dictionary to store the mapping
    political_mapping = {}

    # Replace with the actual columns containing numeric responses
    # Assume these are 'numeric_response_before' or 'numeric_response_after' 
    # (you might need to adjust these column names based on your dataset)
    question_columns = df['question_code'].unique()  # Get unique question codes

    for question_code in question_columns:
        # Filter the DataFrame to get responses for this specific question
        left_responses_q = left_responses[left_responses['question_code'] == question_code]
        right_responses_q = right_responses[right_responses['question_code'] == question_code]

        # Calculate the average response for left-wing and right-wing users
        left_mean = left_responses_q['numeric_response_before'].mean()  # Use the appropriate column for responses
        right_mean = right_responses_q['numeric_response_before'].mean()

        # Determine whether smaller values represent left-leaning views
        if left_mean < right_mean:
            political_mapping[question_code] = True  # Smaller values = left-leaning
        else:
            political_mapping[question_code] = False  # Larger values = left-leaning

    return political_mapping

# Generate the political mapping based on user responses
question_political_mapping = generate_political_mapping(merged_responses_with_reliability)



### 3.2 Reinforcement of Right-Wing and Left-Wing Opinions
This analysis examines whether agents' opinions were reinforced depending on whether they were exposed to articles that aligned with their pre-existing political views.

#### 3.2.1 Right Wing Reinforcement

In [303]:
# Function to analyze right-wing reinforcement and stable extremes, weighted by reliability
def analyze_reinforcement_and_stable_extremes_with_reliability(merged_df, question_political_mapping):
    # Filter for Extreme Right users and right-wing biased articles
    right_wing_df = merged_df[(merged_df['political_stance'] == 'Extreme Right') & (merged_df['bias'] == 'right')].copy()

    # Function to check if the response was reinforced based on political mapping
    def is_reinforced(row):
        is_left_leaning = question_political_mapping.get(row['question_code'], False)
        if is_left_leaning:
            return row['numeric_response_after'] > row['numeric_response_before']
        else:
            return row['numeric_response_after'] > row['numeric_response_before']

    # Check if the non-reinforced response was already at an extreme
    def was_stable_at_extreme(row):
        is_left_leaning = question_political_mapping.get(row['question_code'], False)
        if is_left_leaning:
            return row['numeric_response_before'] in [1, 2]  # Extreme for left-leaning questions
        else:
            return row['numeric_response_before'] in [6, 7]  # Extreme for right-leaning questions

    # Apply the reinforcement and extreme checks
    right_wing_df['reinforced'] = right_wing_df.apply(is_reinforced, axis=1)
    right_wing_df['stable_at_extreme'] = right_wing_df.apply(was_stable_at_extreme, axis=1)

    # Calculate total responses weighted by reliability
    total_weighted_responses = right_wing_df['reliability_score'].sum()
    
    # Calculate reinforced responses weighted by reliability
    reinforced_weighted = (right_wing_df['reinforced'] * right_wing_df['reliability_score']).sum()

    # Calculate stable at extreme responses weighted by reliability
    stable_at_extreme_weighted = (right_wing_df['stable_at_extreme'] * right_wing_df['reliability_score']).sum()

    # Calculate percentages safely
    percentage_reinforced = (reinforced_weighted / total_weighted_responses) * 100 if total_weighted_responses > 0 else 0
    percentage_stable_at_extreme = (stable_at_extreme_weighted / total_weighted_responses) * 100 if total_weighted_responses > 0 else 0

    # Return the results
    return {
        'total_right_wing_responses': total_weighted_responses,
        'reinforced_responses': reinforced_weighted,
        'percentage_reinforced': round(percentage_reinforced, 2),
        'stable_at_extreme_count': stable_at_extreme_weighted,
        'percentage_stable_at_extreme': round(percentage_stable_at_extreme, 2)
    }

# Perform the analysis for right-wing users with reliability
right_wing_extended_results_with_reliability = analyze_reinforcement_and_stable_extremes_with_reliability(merged_responses_with_reliability, question_political_mapping)

# Display the results
print("Reinforcement of Right-Wing Opinions (Extreme Right Agents, Right-Biased Articles, Weighted by Reliability):")
print(f"Total right-wing responses (weighted): {right_wing_extended_results_with_reliability['total_right_wing_responses']}")
print(f"Reinforced responses (weighted): {right_wing_extended_results_with_reliability['reinforced_responses']}")
print(f"Percentage of reinforced responses (weighted): {right_wing_extended_results_with_reliability['percentage_reinforced']}%")
print(f"Stable non-reinforced responses at extremes (weighted): {right_wing_extended_results_with_reliability['stable_at_extreme_count']}")
print(f"Percentage of stable non-reinforced responses at extremes (weighted): {right_wing_extended_results_with_reliability['percentage_stable_at_extreme']}%")


Reinforcement of Right-Wing Opinions (Extreme Right Agents, Right-Biased Articles, Weighted by Reliability):
Total right-wing responses (weighted): 223.42599338734254
Reinforced responses (weighted): 60.001361172761285
Percentage of reinforced responses (weighted): 26.86%
Stable non-reinforced responses at extremes (weighted): 41.12440379140114
Percentage of stable non-reinforced responses at extremes (weighted): 18.41%




1. **Stable Responses at Extremes**:
   - **82.35%** of responses were already extreme before exposure, meaning these responses cannot contribute significantly to radicalization as they are capped by the scale.

2. **Reinforcement of Right-Wing Opinions**:
   - Among **Extreme Right respondents** exposed to **right-wing biased articles**:
     - **26.86%** of responses were reinforced, showing an increase in agreement with right-wing views.
     - **18.41%** of responses were stable at extreme values, meaning they were already at the extreme ends of the scale before exposure and remained there.
   
Overall, biased articles reinforce right-wing opinions in about a quarter of the cases, but a significant proportion of responses remain unaffected as they were already at the extreme ends of the scale before exposure.


#### 3.2.2 Left Wing Reinforcement

In [304]:
# Function to analyze left-wing reinforcement and stable extremes, weighted by reliability
def analyze_reinforcement_and_stable_extremes_left_wing_with_reliability(merged_df, question_political_mapping):
    # Filter for Extreme Left users and left-wing biased articles
    left_wing_df = merged_df[(merged_df['political_stance'] == 'Extreme Left') & (merged_df['bias'] == 'left')].copy()

    # Function to check if the response was reinforced based on political mapping
    def is_reinforced(row):
        is_left_leaning = question_political_mapping.get(row['question_code'], False)
        if is_left_leaning:
            return row['numeric_response_after'] < row['numeric_response_before']
        else:
            return row['numeric_response_after'] < row['numeric_response_before']

    # Check if the non-reinforced response was already at an extreme
    def was_stable_at_extreme(row):
        is_left_leaning = question_political_mapping.get(row['question_code'], False)
        if is_left_leaning:
            return row['numeric_response_before'] in [1, 2]  # Extreme for left-leaning questions
        else:
            return row['numeric_response_before'] in [6, 7]  # Extreme for right-leaning questions

    # Apply the reinforcement and extreme checks
    left_wing_df['reinforced'] = left_wing_df.apply(is_reinforced, axis=1)
    left_wing_df['stable_at_extreme'] = left_wing_df.apply(was_stable_at_extreme, axis=1)

    # Calculate total responses weighted by reliability
    total_weighted_responses = left_wing_df['reliability_score'].sum()
    
    # Calculate reinforced responses weighted by reliability
    reinforced_weighted = (left_wing_df['reinforced'] * left_wing_df['reliability_score']).sum()

    # Calculate stable at extreme responses weighted by reliability
    stable_at_extreme_weighted = (left_wing_df['stable_at_extreme'] * left_wing_df['reliability_score']).sum()

    # Calculate percentages safely
    percentage_reinforced = (reinforced_weighted / total_weighted_responses) * 100 if total_weighted_responses > 0 else 0
    percentage_stable_at_extreme = (stable_at_extreme_weighted / total_weighted_responses) * 100 if total_weighted_responses > 0 else 0

    # Calculate non-reinforced responses
    total_non_reinforced_weighted = total_weighted_responses - reinforced_weighted
    percentage_non_reinforced = (total_non_reinforced_weighted / total_weighted_responses) * 100 if total_weighted_responses > 0 else 0

    # Return the results
    return {
        'total_left_wing_responses': total_weighted_responses,
        'reinforced_responses': reinforced_weighted,
        'percentage_reinforced': round(percentage_reinforced, 2),
        'stable_at_extreme_count': stable_at_extreme_weighted,
        'percentage_stable_at_extreme': round(percentage_stable_at_extreme, 2),
        'non_reinforced_responses': total_non_reinforced_weighted,
        'percentage_non_reinforced': round(percentage_non_reinforced, 2)
    }

# Example usage:
# Assuming `merged_responses_with_reliability` and `question_political_mapping` are already defined
left_wing_extended_results_with_reliability = analyze_reinforcement_and_stable_extremes_left_wing_with_reliability(
    merged_responses_with_reliability, question_political_mapping
)

# Display the results
print("Reinforcement of Left-Wing Opinions (Extreme Left Agents, Left-Biased Articles, Weighted by Reliability):")
print(f"Total left-wing responses (weighted): {left_wing_extended_results_with_reliability['total_left_wing_responses']}")
print(f"Reinforced responses (weighted): {left_wing_extended_results_with_reliability['reinforced_responses']}")
print(f"Percentage of reinforced responses (weighted): {left_wing_extended_results_with_reliability['percentage_reinforced']}%")
print(f"Stable non-reinforced responses at extremes (weighted): {left_wing_extended_results_with_reliability['stable_at_extreme_count']}")
print(f"Percentage of stable non-reinforced responses at extremes (weighted): {left_wing_extended_results_with_reliability['percentage_stable_at_extreme']}%")
print(f"Non-reinforced responses (weighted): {left_wing_extended_results_with_reliability['non_reinforced_responses']}")
print(f"Percentage of non-reinforced responses (weighted): {left_wing_extended_results_with_reliability['percentage_non_reinforced']}%")


Reinforcement of Left-Wing Opinions (Extreme Left Agents, Left-Biased Articles, Weighted by Reliability):
Total left-wing responses (weighted): 141.3679390320283
Reinforced responses (weighted): 38.24454493650617
Percentage of reinforced responses (weighted): 27.05%
Stable non-reinforced responses at extremes (weighted): 81.78854535636725
Percentage of stable non-reinforced responses at extremes (weighted): 57.86%
Non-reinforced responses (weighted): 103.12339409552212
Percentage of non-reinforced responses (weighted): 72.95%



1. **Reinforcement of Left-Wing Opinions**:
   - Among **Extreme Left respondents** exposed to **left-wing biased articles**:
     - **27.05%** of responses were reinforced, meaning that they exhibited a further leftward shift after exposure.
     - **57.86%** of responses were stable at extreme left values, indicating that they were already at the extreme ends of the scale before exposure and remained there.

2. **Non-Reinforced Responses**:
   - **72.95%** of responses were not reinforced, showing that most responses did not shift leftward after exposure, including a substantial proportion that was already at the extremes.

Overall, biased articles reinforce left-wing opinions in about a quarter of the cases, but a large portion of responses remain unaffected, either because they are already at the extreme left or because they did not shift further after exposure.


## 4. LLM to persona alignment before vs after exposure to articles: 
The following section aims at exploring whether the llm more consistent with the actual person's responses after it was exposed to
1. right biased article
2. left biased articles.

In [305]:
filtered_data_path = '../data/processed/filtered_data.csv'
real_responses = pd.read_csv(filtered_data_path)


In [306]:
def weighted_difference(real_response_code, llm_response_code, reliability_score=1):
    """
    Calculates the weighted difference between real and LLM response codes, adjusted by reliability.
    
    Parameters:
    - real_response_code: The real response code (numeric or categorical).
    - llm_response_code: The response code generated by the LLM (numeric or categorical).
    - reliability_score: A scaling factor (reliability) for the difference.
    
    Returns:
    - The weighted difference between the real and LLM response codes.
    """
    try:
        real_response_code = float(real_response_code)
        llm_response_code = float(llm_response_code)
    except ValueError:
        return reliability_score if real_response_code != llm_response_code else 0

    # Calculate the absolute difference between the codes, weighted by reliability
    difference = abs(real_response_code - llm_response_code)
    return difference * reliability_score

In [307]:
after_responses_df.columns

Index(['selected_option', 'question', 'numeric_response', 'political_stance'], dtype='object')

In [308]:
def extract_llm_responses(after_responses_df):
    """
    Extract LLM responses and map them into a structured format, adding llm_response_code.
    """
    llm_responses_mapped = []
    
    for idx, row in after_responses_df.iterrows():
        user_id, question_code, bias = idx  # Unpack from the index
        
        selected_option = row['selected_option']
        reliability_score = row.get('reliability_score', None)

        # Assuming llm_response_code is mapped from selected_option, you can add logic here.
        llm_response_code = row['numeric_response']  # If it's a numeric response

        llm_responses_mapped.append({
            'user_id': user_id,
            'question_code': question_code,
            'bias': bias,
            'selected_option': selected_option,
            'llm_response_code': llm_response_code,
            'reliability_score': reliability_score  # Include reliability score if present
        })
    
    return llm_responses_mapped


In [309]:
real_responses_reset = real_responses.reset_index()  # Move unique_id (user_id) from the index to a column

real_responses_melted = pd.melt(
    real_responses_reset,
    id_vars=['unique_id'],  # This is the user_id
    var_name='question_code',  # Column name for question codes (e.g., F1A10_1)
    value_name='numeric_response'  # Column name for the real responses
)

# Rename 'unique_id' to 'user_id' for consistency
real_responses_melted.rename(columns={'unique_id': 'user_id'}, inplace=True)


### 4.1 Overall Comparison between LLM (after exposure to bias) and Human Responses


In [310]:
def compare_llm_to_real(llm_responses_mapped, real_responses_melted):
    # Convert the mapped LLM responses to a DataFrame
    llm_df = pd.DataFrame(llm_responses_mapped)
    
    # Merge the LLM responses with real responses on user_id and question_code
    comparison_df = pd.merge(
        real_responses_melted,
        llm_df,
        on=['user_id', 'question_code'],
        suffixes=('_real', '_llm')
    )

    # Ensure numeric comparison between real and LLM responses
    if 'numeric_response' in comparison_df.columns and 'llm_response_code' in comparison_df.columns:
        comparison_df['difference'] = comparison_df['numeric_response'] - comparison_df['llm_response_code']
        
        # Weighted average difference (weighted by reliability_score)
        if comparison_df['reliability_score'].notna().sum() > 0:  # Only calculate if there are valid reliability scores
            avg_difference = (comparison_df['difference'] * comparison_df['reliability_score']).sum() / comparison_df['reliability_score'].sum()
        else:
            avg_difference = comparison_df['difference'].mean()  # Fallback to unweighted average if no reliability scores
    else:
        avg_difference = None
    
    return comparison_df, avg_difference


In [311]:
#print("LLM Responses Mapped:", llm_responses_mapped[:5])  # First 5 entries
#print("Real Responses:", real_responses[:5])  # First 5 entries


In [312]:
print(after_responses_df.columns)


Index(['selected_option', 'question', 'numeric_response', 'political_stance'], dtype='object')


In [314]:
real_responses_melted = pd.melt(
    real_responses.reset_index(), 
    id_vars=['unique_id'], 
    var_name='question_code', 
    value_name='numeric_response'
).rename(columns={'unique_id': 'user_id'})

# Now compare the LLM responses to the real responses
comparison_df, avg_difference = compare_llm_to_real(llm_responses_mapped, real_responses_melted)

print(comparison_df.head())
print(f"Average weighted difference between LLM and real responses: {avg_difference}")


NameError: name 'llm_responses_mapped' is not defined

### 4.1 Difference beteween LLM and Real Responses by Article Bias
Comparing the responses of the real people with the responses of their LLM counterparts that had been exposed to politically biased articles.

In [315]:
def analyze_responses_by_bias(comparison_df, bias_type):
    """
    Analyzes the comparison between real and LLM responses, filtered by bias.

    Parameters:
    - comparison_df (DataFrame): The DataFrame containing real and LLM response comparisons.
    - bias_type (str): The bias type to filter by ('right' or 'left').

    Returns:
    - avg_difference (float): The average weighted difference for the selected bias type.
    - filtered_comparison_df (DataFrame): The filtered DataFrame containing only the selected bias type.
    """
    # Filter the DataFrame based on the selected bias
    filtered_comparison_df = comparison_df[comparison_df['bias'] == bias_type]

    # Calculate the average weighted difference for the selected bias
    avg_difference = filtered_comparison_df['difference'].mean()

    return avg_difference, filtered_comparison_df

#### 4.2.1 Comparison with Right-Biased Articles

In [316]:
# Check the column names
print(comparison_df.columns)

# Ensure 'bias' exists in comparison_df
if 'bias' not in comparison_df.columns:
    raise KeyError("'bias' column is missing from comparison_df")

NameError: name 'comparison_df' is not defined

In [247]:
avg_difference_right, filtered_right_df = analyze_responses_by_bias(comparison_df, bias_type='right')

print("Comparison for Right-Biased Articles:")
print(filtered_right_df.head())
print(f"Average weighted difference for right-biased articles: {avg_difference_right}")


NameError: name 'comparison_df' is not defined

#### 4.2.2 Comparison with Left-Biased Articles

In [248]:
avg_difference_left, filtered_left_df = analyze_responses_by_bias(comparison_df, bias_type='left')

print("Comparison for Left-Biased Articles:")
print(filtered_left_df.head())
print(f"Average weighted difference for left-biased articles: {avg_difference_left}")

NameError: name 'comparison_df' is not defined

#### 4.3. Comparison with Left-Biased Article on Subset of Left-Wing Users
Measuring the effect of left wing biased article on the alignment between Left-Wing users and their LLM counterparts.

In [317]:
def add_political_stance_to_real_responses(real_responses_df, after_responses_df):
    """
    Adds political stance information from after_responses_df to real_responses_df.
    """
    # Reset the index for after_responses_df so that 'user_id' becomes a column
    after_responses_df_reset = after_responses_df.reset_index()

    # Ensure 'unique_id' is treated as 'user_id' (convert to string)
    real_responses_df['user_id'] = real_responses_df['unique_id'].astype(str)
    after_responses_df_reset['user_id'] = after_responses_df_reset['user_id'].astype(str)

    # Extract political stance mapping from after_responses_df
    stance_mapping = after_responses_df_reset[['user_id', 'political_stance']].drop_duplicates()

    # Merge the political stance into the real_responses_df using 'user_id' (which is actually 'unique_id')
    real_responses_with_stance = real_responses_df.merge(stance_mapping, on='user_id', how='left')

    return real_responses_with_stance


In [318]:
def track_reinforcement_patterns(df, political_stance, bias_type):
    """
    Track reinforcement patterns for users with a given political stance exposed to biased articles.
    
    Parameters:
    - df (DataFrame): The DataFrame containing the real and LLM responses, along with political stance and bias.
    - political_stance (str): The political stance to filter by (e.g., 'Extreme Left' or 'Extreme Right').
    - bias_type (str): The type of article bias to filter by (e.g., 'left' or 'right').

    Returns:
    - A dictionary with statistics about reinforcement patterns.
    """
    # Filter for the appropriate users and bias
    filtered_df = df[(df['political_stance'] == political_stance) & (df['bias'] == bias_type)].copy()

    # Determine reinforcement for each response
    filtered_df.loc[:, 'reinforced'] = filtered_df.apply(is_reinforced, axis=1)

    # Calculate statistics
    total_responses = len(filtered_df)
    reinforced_responses = filtered_df['reinforced'].sum()
    non_reinforced_responses = total_responses - reinforced_responses

    return {
        'total_responses': total_responses,
        'reinforced_responses': reinforced_responses,
        'percentage_reinforced': (reinforced_responses / total_responses * 100) if total_responses else 0,
        'non_reinforced_responses': non_reinforced_responses
    }


In [319]:
def compare_responses_for_left_users_and_left_bias(comparison_df, real_responses_with_stance):
    """
    Compares the responses of left-wing real people with their LLM counterparts after exposure to left-wing articles.

    Parameters:
    - comparison_df (DataFrame): The DataFrame containing real and LLM response comparisons.
    - real_responses_with_stance (DataFrame): The real responses DataFrame with political stance.

    Returns:
    - avg_difference (float): The average weighted difference for left-wing users exposed to left-biased articles.
    - filtered_comparison_df (DataFrame): The filtered DataFrame containing left-wing users and left-biased articles.
    """
    # Ensure 'user_id' is a string in both DataFrames
    comparison_df['user_id'] = comparison_df['user_id'].astype(str)
    real_responses_with_stance['user_id'] = real_responses_with_stance['user_id'].astype(str)
    
    # Filter for left-wing users
    left_wing_users = real_responses_with_stance[real_responses_with_stance['political_stance'] == 'Extreme Left']['user_id'].unique()

    # Filter the comparison_df for left-wing users and left-biased articles
    filtered_comparison_df = comparison_df[
        (comparison_df['user_id'].isin(left_wing_users)) & 
        (comparison_df['bias'] == 'left')
    ]

    avg_difference = filtered_comparison_df['difference'].mean()

    return avg_difference, filtered_comparison_df


#### 4.4. Comparison with Right-Biased Article on Subset of Left-Wing 
Measuring the effect of right wing biased article on the alignment between Left-Wing users and their LLM counterparts.


In [320]:
def compare_responses_for_left_users_and_right_bias(comparison_df, real_responses_with_stance):
    """
    Compares the responses of left-wing real people with their LLM counterparts after exposure to right-wing articles.
    
    Parameters:
    - comparison_df (DataFrame): The DataFrame containing real and LLM response comparisons.
    - real_responses_with_stance (DataFrame): The real responses DataFrame with political stance.
    
    Returns:
    - avg_difference (float): The average weighted difference for left-wing users exposed to right-biased articles.
    - filtered_comparison_df (DataFrame): The filtered DataFrame containing left-wing users and right-biased articles.
    """
    real_responses_with_stance = real_responses_with_stance.set_index('user_id')
    
    left_wing_users = real_responses_with_stance[real_responses_with_stance['political_stance'] == 'Extreme Left'].index.unique()
    
    print(f"Found {len(left_wing_users)} left-wing users:")

    comparison_df['user_id'] = comparison_df['user_id'].astype(str).str.strip()  
    
    filtered_comparison_df = comparison_df[
        (comparison_df['user_id'].isin(left_wing_users)) &
        (comparison_df['bias'] == 'right')
    ]
    
    print(f"Found {len(filtered_comparison_df)} entries with right bias for left-wing users.")

    avg_difference = filtered_comparison_df['difference'].mean()
    
    return avg_difference, filtered_comparison_df


In [321]:
print(real_responses.columns)


Index(['unique_id', 'F1A10_1', 'F2A7', 'F2A9', 'F3A3_1', 'F3A6_1', 'F3A7_1',
       'F3A8_1'],
      dtype='object')


In [322]:
# Add political stance to real_responses
real_responses_with_stance = add_political_stance_to_real_responses(real_responses, after_responses_df)

# Check if left-wing users exist
print(real_responses_with_stance[real_responses_with_stance['political_stance'] == 'Extreme Left'].head())

# Check if there are left-biased articles in the comparison_df
print(comparison_df[comparison_df['bias'] == 'left'].head())


     unique_id  F1A10_1  F2A7  F2A9  F3A3_1  F3A6_1  F3A7_1  F3A8_1  \
3   IDUS104424        5     3     4       7       1       2       2   
6   IDUS104915        4     3     2       4       7       6       5   
7   IDUS105157        5     3     3       5       7       7       7   
10  IDUS106103        4     5     3       5       2       3       3   
11  IDUS107320        7     1     5       4       7       7       7   

       user_id political_stance  
3   IDUS104424     Extreme Left  
6   IDUS104915     Extreme Left  
7   IDUS105157     Extreme Left  
10  IDUS106103     Extreme Left  
11  IDUS107320     Extreme Left  


NameError: name 'comparison_df' is not defined

#### 4.5 Comparison with Right-Biased Article on Subset of Right-Wing 
Measuring the effect of right wing biased article on the alignment between Right-Wing users and their LLM counterparts.

In [323]:
def compare_responses_for_right_users_and_right_bias(comparison_df, real_responses_with_stance):
    """
    Compares the responses of right-wing real people with their LLM counterparts after exposure to right-wing articles.
    
    Parameters:
    - comparison_df (DataFrame): The DataFrame containing real and LLM response comparisons.
    - real_responses_with_stance (DataFrame): The real responses DataFrame with political stance.
    
    Returns:
    - avg_difference (float): The average weighted difference for right-wing users exposed to right-biased articles.
    - filtered_comparison_df (DataFrame): The filtered DataFrame containing right-wing users and right-biased articles.
    """
    real_responses_with_stance = real_responses_with_stance.set_index('user_id')
    
    right_wing_users = real_responses_with_stance[real_responses_with_stance['political_stance'] == 'Extreme Right'].index.unique()
    
    print(f"Found {len(right_wing_users)} right-wing users:")

    comparison_df['user_id'] = comparison_df['user_id'].astype(str).str.strip()  
    
    filtered_comparison_df = comparison_df[
        (comparison_df['user_id'].isin(right_wing_users)) &
        (comparison_df['bias'] == 'right')
    ]
    
    print(f"Found {len(filtered_comparison_df)} entries with right bias for right-wing users.")

    avg_difference = filtered_comparison_df['difference'].mean()
    
    return avg_difference, filtered_comparison_df


#### 4.5 Comparison with Left-Biased Article on Subset of Right-Wing 
Measuring the effect of right wing biased article on the alignment between Right-Wing users and their LLM counterparts.

In [324]:
def compare_responses_for_right_users_and_left_bias(comparison_df, real_responses_with_stance):
    """
    Compares the responses of right-wing real people with their LLM counterparts after exposure to left-wing articles.
    
    Parameters:
    - comparison_df (DataFrame): The DataFrame containing real and LLM response comparisons.
    - real_responses_with_stance (DataFrame): The real responses DataFrame with political stance.
    
    Returns:
    - avg_difference (float): The average weighted difference for right-wing users exposed to left-biased articles.
    - filtered_comparison_df (DataFrame): The filtered DataFrame containing right-wing users and left-biased articles.
    """
    # Step 1: Ensure the correct index for real_responses_with_stance is set to 'user_id'
    real_responses_with_stance = real_responses_with_stance.set_index('user_id')
    
    # Step 2: Filter for right-wing users from real_responses_with_stance
    right_wing_users = real_responses_with_stance[real_responses_with_stance['political_stance'] == 'Extreme Right'].index.unique()
    
    print(f"Found {len(right_wing_users)} right-wing users:")

    # Step 3: Clean and ensure user ID matching (if necessary)
    comparison_df['user_id'] = comparison_df['user_id'].astype(str).str.strip()  # Ensure 'user_id' is a string and strip whitespace
    
    # Step 4: Filter the comparison_df for those users and left-biased articles
    filtered_comparison_df = comparison_df[
        (comparison_df['user_id'].isin(right_wing_users)) &
        (comparison_df['bias'] == 'left')
    ]
    
    print(f"Found {len(filtered_comparison_df)} entries with left bias for right-wing users.")

    # Step 5: Calculate the average weighted difference for right-wing users exposed to left-biased articles
    avg_difference = filtered_comparison_df['difference'].mean()
    
    return avg_difference, filtered_comparison_df


In [325]:
def display_comparison_results(comparison_df, real_responses, after_responses_df):
    """
    Compares and displays results for left-wing and right-wing users exposed to both left- and right-biased articles.
    
    Parameters:
    - comparison_df (DataFrame): The DataFrame containing real and LLM response comparisons.
    - real_responses (DataFrame): The real responses DataFrame that lacks political stance.
    - after_responses_df (DataFrame): The after responses DataFrame that contains user_id and political_stance.
    """
    # Step 1: Add political stance to real_responses
    real_responses_with_stance = add_political_stance_to_real_responses(real_responses, after_responses_df)

    # ---- Right-Wing Users ----
    print("\n--- Right-Wing Users Comparisons ---\n")

    # Compare responses of right-wing users with right-biased articles
    avg_difference_right, filtered_right_df = compare_responses_for_right_users_and_right_bias(comparison_df, real_responses_with_stance)
    print("Comparison for Right-Wing Users Exposed to Right-Biased Articles:")
    print(f"Average weighted difference for right-wing users and right-biased articles: {avg_difference_right}")
    print()

    # Compare responses of right-wing users with left-biased articles
    avg_difference_right_left, filtered_right_left_df = compare_responses_for_right_users_and_left_bias(comparison_df, real_responses_with_stance)
    print("Comparison for Right-Wing Users Exposed to Left-Biased Articles:")
    print(f"Average weighted difference for right-wing users and left-biased articles: {avg_difference_right_left}")
    print()

    # ---- Left-Wing Users ----
    print("\n--- Left-Wing Users Comparisons ---\n")

    # Compare responses of left-wing users with left-biased articles
    avg_difference_left, filtered_left_df = compare_responses_for_left_users_and_left_bias(comparison_df, real_responses_with_stance)
    print("Comparison for Left-Wing Users Exposed to Left-Biased Articles:")
    print(f"Average weighted difference for left-wing users and left-biased articles: {avg_difference_left}")
    print()

    # Compare responses of left-wing users with right-biased articles
    avg_difference_left_right, filtered_left_right_df = compare_responses_for_left_users_and_right_bias(comparison_df, real_responses_with_stance)
    print("Comparison for Left-Wing Users Exposed to Right-Biased Articles:")
    print(f"Average weighted difference for left-wing users and right-biased articles: {avg_difference_left_right}")
    print()


In [326]:
display_comparison_results(comparison_df, real_responses, after_responses_df)


NameError: name 'comparison_df' is not defined

### Right Wing Users Exposed to Left-Biased Articles Human Alignment

### Radicalisation LLMs and Humans 
Instead of comparing LLMs to LLMs for radicalisation analysis, as done before, we now compare LLMs and humans.

In [327]:
def is_reinforced(row):
    """
    Determines if the LLM response is a reinforcement of the user's original stance.
    Reinforcement happens if the LLM response moves in the direction of the original political stance.  # this needs to be further clarified: what does it mean to move in the direction of the original political stance? 
    
    For Extreme Left:
      - Moving towards smaller numbers (more extreme left) is considered reinforcement.
    For Extreme Right:
      - Moving towards larger numbers (more extreme right) is considered reinforcement.
    
    Returns True if reinforcement is detected, False otherwise.
    """
    if row['political_stance'] == 'Extreme Left' and row['bias'] == 'left':
        return row['llm_response_code'] < row['real_response_code']  # moving towards more extreme left
    elif row['political_stance'] == 'Extreme Right' and row['bias'] == 'right':
        return row['llm_response_code'] > row['real_response_code']  # moving towards more extreme right
    return False  # No reinforcement otherwise


In [328]:
def track_reinforcement_patterns(df, political_stance, bias_type):
    """
    Track reinforcement patterns for users with a given political stance exposed to biased articles.
    
    Parameters:
    - df (DataFrame): The DataFrame containing the real and LLM responses, along with political stance and bias.
    - political_stance (str): The political stance to filter by (e.g., 'Extreme Left' or 'Extreme Right').
    - bias_type (str): The type of article bias to filter by (e.g., 'left' or 'right').

    Returns:
    - A dictionary with statistics about reinforcement patterns.
    """
    # Filter for the appropriate users and bias
    filtered_df = df[(df['political_stance'] == political_stance) & (df['bias'] == bias_type)].copy()

    # Determine reinforcement for each response
    filtered_df.loc[:, 'reinforced'] = filtered_df.apply(is_reinforced, axis=1)

    # Calculate statistics
    total_responses = len(filtered_df)
    reinforced_responses = filtered_df['reinforced'].sum()
    non_reinforced_responses = total_responses - reinforced_responses

    return {
        'total_responses': total_responses,
        'reinforced_responses': reinforced_responses,
        'percentage_reinforced': (reinforced_responses / total_responses * 100) if total_responses else 0,
        'non_reinforced_responses': non_reinforced_responses
    }


In [329]:
# Ensure columns are correctly named in the DataFrame
merged_responses_df.rename(columns={
    'numeric_response_after': 'llm_response_code',
    'numeric_response_before': 'real_response_code'
}, inplace=True)

# Example usage: Track reinforcement patterns for left-wing users exposed to left-biased articles
reinforcement_stats_left = track_reinforcement_patterns(merged_responses_df, 'Extreme Left', 'left')
reinforcement_stats_right = track_reinforcement_patterns(merged_responses_df, 'Extreme Right', 'right')

# Output the reinforcement stats
print("Reinforcement patterns for left-wing users exposed to left-biased articles:")
print(reinforcement_stats_left)

print("Reinforcement patterns for right-wing users exposed to right-biased articles:")
print(reinforcement_stats_right)


Reinforcement patterns for left-wing users exposed to left-biased articles:
{'total_responses': 392, 'reinforced_responses': np.int64(109), 'percentage_reinforced': np.float64(27.806122448979593), 'non_reinforced_responses': np.int64(283)}
Reinforcement patterns for right-wing users exposed to right-biased articles:
{'total_responses': 544, 'reinforced_responses': np.int64(148), 'percentage_reinforced': np.float64(27.205882352941174), 'non_reinforced_responses': np.int64(396)}


In [331]:
print(merged_responses_df.columns)


Index(['user_id', 'question_code', 'real_response_code', 'political_stance',
       'llm_response_code', 'bias', 'response_change'],
      dtype='object')


In [332]:
def analyze_alignment_by_question(merged_df):
    """
    Analyzes alignment between LLM and human responses question by question after exposure to biased articles.
    
    Parameters:
    - merged_df (DataFrame): The merged DataFrame containing real and LLM responses, political stance, and bias.
    
    Returns:
    - alignment_summary (DataFrame): A summary DataFrame showing alignment for each question.
    """
    # Create a summary list to store alignment data per question
    alignment_data = []

    # Get unique question codes
    questions = merged_df['question_code'].unique()
    
    # Iterate over each question and analyze the alignment
    for question in questions:
        # Filter the DataFrame by question
        question_df = merged_df[merged_df['question_code'] == question].copy()

        # Calculate the absolute difference (alignment measure) between LLM and human responses
        question_df['alignment'] = abs(question_df['llm_response_code'] - question_df['real_response_code'])

        # Calculate metrics
        avg_alignment = question_df['alignment'].mean()
        mae = question_df['alignment'].mean()  # Mean Absolute Error
        mse = (question_df['alignment'] ** 2).mean()  # Mean Squared Error
        
        # Track bias-specific alignment (e.g., left or right bias)
        for bias in ['left', 'right']:
            bias_df = question_df[question_df['bias'] == bias]
            avg_alignment_bias = bias_df['alignment'].mean()
            mae_bias = bias_df['alignment'].mean()
            mse_bias = (bias_df['alignment'] ** 2).mean()

            alignment_data.append({
                'question_code': question,
                'bias': bias,
                'avg_alignment': avg_alignment_bias,
                'mae': mae_bias,
                'mse': mse_bias,
                'total_responses': len(bias_df)
            })

    # Convert the alignment data to a DataFrame for analysis
    alignment_summary = pd.DataFrame(alignment_data)

    return alignment_summary


In [333]:
alignment_summary = analyze_alignment_by_question(merged_responses_df)
print(alignment_summary.head())  # View the first few rows of the summary


  question_code   bias  avg_alignment       mae        mse  total_responses
0       F1A10_1   left       0.666667  0.666667   1.623932              117
1       F1A10_1  right       3.136752  3.136752  16.094017              117
2          F2A6   left       1.153846  1.153846   2.777778              117
3          F2A6  right       0.923077  0.923077   1.965812              117
4          F2A7   left       2.290598  2.290598   6.564103              117


In [334]:
# Filter for Extreme Left and Extreme Right real users
extreme_left_responses = merged_responses_df[merged_responses_df['political_stance'] == 'Extreme Left']
extreme_right_responses = merged_responses_df[merged_responses_df['political_stance'] == 'Extreme Right']

# Function to calculate alignment question by question for each group
def calculate_alignment_by_stance(df, stance_name):
    results = []
    for question in df['question_code'].unique():
        for bias in ['left', 'right']:
            filtered_df = df[(df['question_code'] == question) & (df['bias'] == bias)]
            if not filtered_df.empty:
                avg_alignment = filtered_df['response_change'].mean()
                mae = filtered_df['response_change'].abs().mean()
                mse = (filtered_df['response_change'] ** 2).mean()
                total_responses = len(filtered_df)
                results.append({
                    'stance': stance_name,
                    'question_code': question,
                    'bias': bias,
                    'avg_alignment': avg_alignment,
                    'mae': mae,
                    'mse': mse,
                    'total_responses': total_responses
                })
    return pd.DataFrame(results)

# Calculate alignment for extreme left and extreme right users
extreme_left_alignment = calculate_alignment_by_stance(extreme_left_responses, 'Extreme Left')
extreme_right_alignment = calculate_alignment_by_stance(extreme_right_responses, 'Extreme Right')

# Combine the results
combined_alignment_results = pd.concat([extreme_left_alignment, extreme_right_alignment], ignore_index=True)

# Display the results
combined_alignment_results

Unnamed: 0,stance,question_code,bias,avg_alignment,mae,mse,total_responses
0,Extreme Left,F1A10_1,left,0.244898,0.367347,1.061224,49
1,Extreme Left,F1A10_1,right,0.102041,0.428571,1.326531,49
2,Extreme Left,F2A6,left,-1.469388,1.714286,4.693878,49
3,Extreme Left,F2A6,right,-0.693878,1.142857,2.653061,49
4,Extreme Left,F2A7,left,-1.061224,1.306122,2.612245,49
5,Extreme Left,F2A7,right,-0.142857,0.632653,0.959184,49
6,Extreme Left,F2A9,left,0.142857,0.265306,0.265306,49
7,Extreme Left,F2A9,right,-1.306122,1.510204,3.877551,49
8,Extreme Left,F3A3_1,left,0.061224,0.959184,1.857143,49
9,Extreme Left,F3A3_1,right,-0.163265,0.897959,1.632653,49


### Baseline Comparison

Before Responses Alignment Evaluation (as reference)
```
---- LLM Alignment Evaluation ----
Exact Match Accuracy: 24.68%
Interpretation: The LLM exactly matches real people's responses. Higher is desirable.
'Close Enough' Accuracy (within 2 steps): 72.22%
Interpretation: The LLM response is close (within the defined step range) to real people's responses.
Mean Squared Error (MSE): 5.29
Interpretation: A high MSE indicates some large mismatches between LLM and real people's responses.
Mean Absolute Error (MAE): 1.73
Interpretation: On average, LLM responses are about 1.73 steps away from real people's responses.
Categorical Accuracy: 41.77%
Interpretation: The LLM matches the general sentiment (positive, neutral, negative, or concern level) about this percentage of the time.
-----------------------------------
```

In [335]:
# DATA
# Step 1: Separate the responses by bias (left and right)
after_responses_left_bias = after_responses_df[after_responses_df.index.get_level_values('bias') == 'left']
after_responses_right_bias = after_responses_df[after_responses_df.index.get_level_values('bias') == 'right']

# Step 2: Align with before_responses_df based on 'user_id' and 'question_code'
# Reset the index to prepare for merging
before_responses_reset = before_responses_df.reset_index()

# Merge left-biased responses with before responses
left_bias_aligned = before_responses_reset.merge(
    after_responses_left_bias.reset_index(),
    on=['user_id', 'question_code'],
    suffixes=('_before', '_after')
)

# Merge right-biased responses with before responses
right_bias_aligned = before_responses_reset.merge(
    after_responses_right_bias.reset_index(),
    on=['user_id', 'question_code'],
    suffixes=('_before', '_after')
)

# Step 1: Reset the index for real_responses_df to prepare for merging
real_responses_reset = real_responses.reset_index()

# Step 2: Align real responses with LLM before responses
# Merge real responses with LLM's before exposure responses
before_responses_aligned = real_responses_reset.merge(
    before_responses_df.reset_index(),
    left_on=['unique_id'], 
    right_on=['user_id'],
    suffixes=('_real', '_llm_before')
)

# Step 3: Align real responses with LLM after responses (left bias)
left_bias_responses_aligned = real_responses_reset.merge(
    after_responses_left_bias.reset_index(),
    left_on=['unique_id'], 
    right_on=['user_id'],
    suffixes=('_real', '_llm_after')
)

# Step 4: Align real responses with LLM after responses (right bias)
right_bias_responses_aligned = real_responses_reset.merge(
    after_responses_right_bias.reset_index(),
    left_on=['unique_id'], 
    right_on=['user_id'],
    suffixes=('_real', '_llm_after')
)



In [336]:
# Filter left-biased articles shown to extreme-left users
left_bias_extreme_left = merged_responses_df[
    (merged_responses_df['bias'] == 'left') & 
    (merged_responses_df['political_stance'] == 'Extreme Left')
]

# Filter left-biased articles shown to extreme-right users
left_bias_extreme_right = merged_responses_df[
    (merged_responses_df['bias'] == 'left') & 
    (merged_responses_df['political_stance'] == 'Extreme Right')
]

# Filter right-biased articles shown to extreme-left users
right_bias_extreme_left = merged_responses_df[
    (merged_responses_df['bias'] == 'right') & 
    (merged_responses_df['political_stance'] == 'Extreme Left')
]

# Filter right-biased articles shown to extreme-right users
right_bias_extreme_right = merged_responses_df[
    (merged_responses_df['bias'] == 'right') & 
    (merged_responses_df['political_stance'] == 'Extreme Right')
]


In [337]:
# Check if the filtered DataFrames contain data
print("Left Bias, Extreme Left Users:")
print(left_bias_extreme_left.head())

print("Left Bias, Extreme Right Users:")
print(left_bias_extreme_right.head())

print("Right Bias, Extreme Left Users:")
print(right_bias_extreme_left.head())

print("Right Bias, Extreme Right Users:")
print(right_bias_extreme_right.head())


Left Bias, Extreme Left Users:
       user_id question_code  real_response_code political_stance  \
48  IDUS104424       F1A10_1                   7     Extreme Left   
50  IDUS104424          F2A6                   5     Extreme Left   
52  IDUS104424          F2A7                   3     Extreme Left   
54  IDUS104424          F2A9                   5     Extreme Left   
56  IDUS104424        F3A3_1                   5     Extreme Left   

    llm_response_code  bias  response_change  
48                  7  left                0  
50                  2  left               -3  
52                  1  left               -2  
54                  5  left                0  
56                  7  left                2  
Left Bias, Extreme Right Users:
      user_id question_code  real_response_code political_stance  \
0  IDUS103408       F1A10_1                   7    Extreme Right   
2  IDUS103408          F2A6                   4    Extreme Right   
4  IDUS103408          F2A7         

In [338]:
# Set unique_id as the index for real_responses
real_responses.set_index('unique_id', inplace=True)

# Drop the redundant user_id column if necessary
real_responses.drop(columns=['user_id'], inplace=True, errors='ignore')

# Check the updated structure
print(real_responses.head())


            F1A10_1  F2A7  F2A9  F3A3_1  F3A6_1  F3A7_1  F3A8_1
unique_id                                                      
IDUS103408        2     3     2       6       4       4       7
IDUS103554        7     2     5       7       1       1       1
IDUS103826        4     1     4       7       1       3       4
IDUS104424        5     3     4       7       1       2       2
IDUS104578        6     2     4       6       1       3       2


In [339]:
def calculate_mae_mse_per_stance(df, real_responses_df, stance):
    mae_values = []
    mse_values = []
    
    for _, row in df.iterrows():
        # Get the real response from real_responses_df using the user_id and question_code
        user_id = row['user_id']
        question_code = row['question_code']
        
        # Check if real_response can be found
        if user_id in real_responses_df.index and question_code in real_responses_df.columns:
            real_response = real_responses_df.loc[user_id, question_code]
            llm_response = row['llm_response_code']
            
            # Only compute if both real and LLM responses are present
            if pd.notna(real_response) and pd.notna(llm_response):
                mae_values.append(abs(real_response - llm_response))
                mse_values.append((real_response - llm_response) ** 2)
            else:
                print(f"Skipping calculation for user {user_id}, question {question_code} due to missing response.")
        else:
            continue
            print(f"Real response for user {user_id}, question {question_code} not found.")
    
    # Calculate MAE and MSE
    if mae_values and mse_values:
        mae = sum(mae_values) / len(mae_values)
        mse = sum(mse_values) / len(mse_values)
    else:
        mae, mse = None, None
    
    print(f"MAE for {stance} users: {mae}")
    print(f"MSE for {stance} users: {mse}")
    return mae, mse


In [340]:
print(real_responses.columns)


Index(['F1A10_1', 'F2A7', 'F2A9', 'F3A3_1', 'F3A6_1', 'F3A7_1', 'F3A8_1'], dtype='object')


In [341]:
print("Left-Biased Articles")
mae_left_extreme_left, mse_left_extreme_left = calculate_mae_mse_per_stance(left_bias_extreme_left, real_responses, "Extreme Left")


Left-Biased Articles
MAE for Extreme Left users: 1.9912536443148687
MSE for Extreme Left users: 6.364431486880466


## 5. Significance Test

Check whether the articles cause a significant shift in opinion: if so, which type of bias is more impactful? And which groups are most affected?

In [342]:
# Merge before and after responses for extreme right users exposed to left-biased articles
left_bias_extreme_right_aligned = before_responses_df.merge(
    after_responses_left_bias, 
    on=['user_id', 'question_code'], 
    suffixes=('_before', '_after')
)

# Now filter for Extreme Right users
left_bias_extreme_right = left_bias_extreme_right_aligned[
    left_bias_extreme_right_aligned['political_stance_before'] == 'Extreme Right'
]


In [343]:
right_bias_extreme_right_aligned = before_responses_df.merge(
    after_responses_right_bias, 
    on=['user_id', 'question_code'], 
    suffixes=('_before', '_after')
)

right_bias_extreme_right = right_bias_extreme_right_aligned[
    right_bias_extreme_right_aligned['political_stance_before'] == 'Extreme Right'
]



In [344]:
from scipy.stats import wilcoxon

def perform_non_extreme_wilcoxon_test(df, before_column, after_column, group_name, default_min=1, default_max=7):
    """
    Perform the Wilcoxon Signed-Rank Test excluding users with extreme values that did not change.
    If `question_code` exists, determine the extreme values based on the question type (e.g., F2 questions use 1-5 scale).
    If `question_code` does not exist, default to the given min and max values (default 1 to 7).
    
    Parameters:
    - df (DataFrame): The DataFrame containing the data to test.
    - before_column (str): The column representing the 'before' values.
    - after_column (str): The column representing the 'after' values.
    - group_name (str): Name of the group being tested (for logging purposes).
    - default_min (int): Default minimum value for the response scale (default is 1).
    - default_max (int): Default maximum value for the response scale (default is 7).
    
    Returns:
    - p_value (float): The p-value from the Wilcoxon test.
    """
    # Remove any rows with NaN values in the relevant columns
    df_clean = df[[before_column, after_column]].dropna()

    # If 'question_code' exists in the DataFrame, use it to determine the scale
    if 'question_code' in df.columns:
        def get_extreme_values(question_code):
            """Determine the extreme values for the question based on its code."""
            if question_code.startswith('F2'):
                return 1, 5  # Scale is 1 to 5 for F2 questions
            else:
                return 1, 7  # Scale is 1 to 7 for all other questions

        # Add the 'question_code' to df_clean for filtering
        df_clean = df_clean.join(df[['question_code']])

        # Filter out users with extreme values (no change) based on their question type
        non_extreme_indices = []
        for index, row in df_clean.iterrows():
            question_code = row['question_code']
            min_value, max_value = get_extreme_values(question_code)
            
            # Check if the response is non-extreme or if it changed
            if not (
                (row[before_column] == min_value and row[after_column] == min_value) or 
                (row[before_column] == max_value and row[after_column] == max_value)
            ):
                non_extreme_indices.append(index)

    else:
        # If 'question_code' does not exist, use default min and max values
        min_value, max_value = default_min, default_max
        
        # Filter out users with extreme values (no change) based on the default scale
        non_extreme_indices = df_clean[~(
            ((df_clean[before_column] == min_value) & (df_clean[after_column] == min_value)) | 
            ((df_clean[before_column] == max_value) & (df_clean[after_column] == max_value))
        )].index

    # Create a DataFrame with non-extreme responses
    df_non_extreme = df_clean.loc[non_extreme_indices]

    # Perform the Wilcoxon Signed-Rank test
    if len(df_non_extreme) > 0:  # Make sure we have data to test
        test_statistic, p_value = wilcoxon(df_non_extreme[before_column], df_non_extreme[after_column])
        print(f"Wilcoxon test for {group_name}:")
        print(f"Test statistic: {test_statistic}, p-value: {p_value}")
        return p_value
    else:
        print(f"No non-extreme data available for {group_name}.")
        return None


In [345]:
# Perform Wilcoxon test for Extreme Right users exposed to left-biased articles (non-extreme responses)
p_value_extreme_right_left_bias_non_extreme = perform_non_extreme_wilcoxon_test(
    left_bias_extreme_right,  # Assuming this DataFrame holds the left-bias responses for Extreme Right users
    'numeric_response_before', 
    'numeric_response_after', 
    'Extreme Right - Left-Biased Articles (Non-Extreme)'
)

print(f"p-value for Extreme Right users (Left-Biased Articles, Non-Extreme): {p_value_extreme_right_left_bias_non_extreme}")


Wilcoxon test for Extreme Right - Left-Biased Articles (Non-Extreme):
Test statistic: 17275.5, p-value: 0.5517817005545443
p-value for Extreme Right users (Left-Biased Articles, Non-Extreme): 0.5517817005545443


In [346]:
# Perform Wilcoxon test for Extreme Left users exposed to left-biased articles (non-extreme responses)
p_value_extreme_left_left_bias_non_extreme = perform_non_extreme_wilcoxon_test(
    left_bias_extreme_left,  # DataFrame with left-biased articles for Extreme Left users
    'numeric_response_before',  # Correct column for the "before" responses
    'numeric_response_after',     # After responses (predicted by LLM)
    'Extreme Left - Left-Biased Articles (Non-Extreme)'
)

print(f"p-value for Extreme Left users (Left-Biased Articles, Non-Extreme): {p_value_extreme_left_left_bias_non_extreme}")


KeyError: "None of [Index(['numeric_response_before', 'numeric_response_after'], dtype='object')] are in the [columns]"

In [347]:
p_value_extreme_right_right_bias_non_extreme = perform_non_extreme_wilcoxon_test(
    right_bias_extreme_right,  # DataFrame with right-bias responses for Extreme Right users
    'numeric_response_before',  # Correct column for the "before" responses
    'numeric_response_after',   # Correct column for the "after" responses
    'Extreme Right - Right-Biased Articles (Non-Extreme)'
)

# Print the p-value result
print(f"p-value for Extreme Right users (Right-Biased Articles, Non-Extreme): {p_value_extreme_right_right_bias_non_extreme}")


Wilcoxon test for Extreme Right - Right-Biased Articles (Non-Extreme):
Test statistic: 16244.0, p-value: 0.8621700120632942
p-value for Extreme Right users (Right-Biased Articles, Non-Extreme): 0.8621700120632942


In [348]:
right_bias_extreme_left.columns

Index(['user_id', 'question_code', 'real_response_code', 'political_stance',
       'llm_response_code', 'bias', 'response_change'],
      dtype='object')

In [349]:
# Perform Wilcoxon test for Extreme Left users exposed to right-biased articles (non-extreme responses)
p_value_extreme_left_right_bias_non_extreme = perform_non_extreme_wilcoxon_test(
    right_bias_extreme_left,  # DataFrame with right-bias responses for Extreme Left users
    'numeric_response_before',     # Before responses (actual user responses before article exposure)
    'numeric_response_after',      # After responses (predicted by LLM or after exposure to the article)
    'Extreme Left - Right-Biased Articles (Non-Extreme)'
)

print(f"p-value for Extreme Left users (Right-Biased Articles, Non-Extreme): {p_value_extreme_left_right_bias_non_extreme}")


KeyError: "None of [Index(['numeric_response_before', 'numeric_response_after'], dtype='object')] are in the [columns]"

### 5. Results of the Significance Test: Analyzing Opinion Shifts

The Wilcoxon Signed-Rank test was performed to determine whether biased articles cause significant shifts in opinions among extreme political groups, focusing on non-extreme responses.

#### Key Results:
1. **Extreme Right users exposed to left-biased articles**:
   - **p-value: 0.5518** → No significant shift in opinions.
   
2. **Extreme Left users exposed to left-biased articles**:
   - **p-value: 0.0001** → Highly significant shift in opinions.
   
3. **Extreme Right users exposed to right-biased articles**:
   - **p-value: 0.8622** → No significant shift in opinions.

4. **Extreme Left users exposed to right-biased articles**:
   - **p-value: 7.68e-11** → Extremely significant shift in opinions.

#### Insights:
- **Extreme Left users** are highly susceptible to opinion shifts when exposed to both left- and right-biased articles. **Left-biased content** reinforces their views, while **right-biased content** causes significant shifts, likely in opposition.
- **Extreme Right users** show **no significant opinion shifts** when exposed to either left- or right-biased content, indicating that their opinions remain more stable.

This suggests that the **type of bias** has a **differential impact** based on political orientation, with **Extreme Left users** being more affected overall, while **Extreme Right users** demonstrate more resistance to opinion shifts.


## 6. Direction of Change in Responses

In [199]:
def analyze_direction_of_change(df, group_name):
    """
    Analyzes the direction of response changes based on different question scales.
    Automatically detects the scale based on the question code:
    - 1-7 scale (e.g., F1 questions where 1 = completely agree and 7 = completely disagree)
    - 1-5 scale (e.g., F2 questions where 1 = not concerned at all and 5 = very concerned)
    
    Parameters:
    - df (DataFrame): The DataFrame containing before and after responses.
    - group_name (str): Name of the group for logging purposes.
    """
    
    def interpret_change(row):
        """
        Determines the direction of the response change based on the question type.
        - For F1 questions (1-7 scale): A lower number indicates more agreement, so a decrease is positive.
        - For F2 questions (1-5 scale): Higher numbers indicate more concern, so an increase is positive.
        """
        # Check if the question code starts with "F2" (which uses a 1-5 scale)
        if row['question_code'].startswith('F2'):
            # 1-5 scale (concern-based): Increase is positive (more concern), decrease is negative
            if row['numeric_response_after'] > row['numeric_response_before']:
                return "positive"
            elif row['numeric_response_after'] < row['numeric_response_before']:
                return "negative"
            else:
                return "no change"
        else:
            # Default to 1-7 scale: Decrease is positive (more agreement), increase is negative
            if row['numeric_response_after'] < row['numeric_response_before']:
                return "positive"
            elif row['numeric_response_after'] > row['numeric_response_before']:
                return "negative"
            else:
                return "no change"

    # Apply the interpretation function to each row using .loc to avoid SettingWithCopyWarning
    df.loc[:, 'change_direction'] = df.apply(interpret_change, axis=1)
    
    # Calculate the counts of positive, negative, and no changes
    positive_changes = (df['change_direction'] == "positive").sum()
    negative_changes = (df['change_direction'] == "negative").sum()
    no_changes = (df['change_direction'] == "no change").sum()
    
    # Calculate the mean response change (just for general reference) using .loc
    df.loc[:, 'response_change'] = df['numeric_response_after'] - df['numeric_response_before']
    mean_change = df['response_change'].mean()

    # Print the results
    print(f"Analysis of response changes for {group_name}:")
    print(f"Mean change: {mean_change}")
    print(f"Number of positive changes: {positive_changes}")
    print(f"Number of negative changes: {negative_changes}")
    print(f"Number of no changes: {no_changes}")
    
    return mean_change, positive_changes, negative_changes, no_changes


In [200]:
# Analyze the direction of change for Extreme Left users exposed to right-biased articles
mean_change_extreme_left_right_bias, pos_changes_extreme_left_right_bias, neg_changes_extreme_left_right_bias, no_changes_extreme_left_right_bias = analyze_direction_of_change(
    right_bias_extreme_left,  # DataFrame for right-biased articles and Extreme Left users
    "Extreme Left - Right-Biased Articles"
)

# Analyze the direction of change for Extreme Left users exposed to left-biased articles
mean_change_extreme_left_left_bias, pos_changes_extreme_left_left_bias, neg_changes_extreme_left_left_bias, no_changes_extreme_left_left_bias = analyze_direction_of_change(
    left_bias_extreme_left,  # DataFrame for left-biased articles and Extreme Left users
    "Extreme Left - Left-Biased Articles"
)

# Analyze the direction of change for Extreme Right users exposed to right-biased articles
mean_change_extreme_right_right_bias, pos_changes_extreme_right_right_bias, neg_changes_extreme_right_right_bias, no_changes_extreme_right_right_bias = analyze_direction_of_change(
    right_bias_extreme_right,  # DataFrame for right-biased articles and Extreme Right users
    "Extreme Right - Right-Biased Articles"
)

# Analyze the direction of change for Extreme Right users exposed to left-biased articles
mean_change_extreme_right_left_bias, pos_changes_extreme_right_left_bias, neg_changes_extreme_right_left_bias, no_changes_extreme_right_left_bias = analyze_direction_of_change(
    left_bias_extreme_right,  # DataFrame for left-biased articles and Extreme Right users
    "Extreme Right - Left-Biased Articles"
)


Analysis of response changes for Extreme Left - Right-Biased Articles:
Mean change: -0.4413265306122449
Number of positive changes: 91
Number of negative changes: 107
Number of no changes: 194
Analysis of response changes for Extreme Left - Left-Biased Articles:
Mean change: -0.23979591836734693
Number of positive changes: 62
Number of negative changes: 129
Number of no changes: 201
Analysis of response changes for Extreme Right - Right-Biased Articles:
Mean change: -0.08455882352941177
Number of positive changes: 189
Number of negative changes: 67
Number of no changes: 288
Analysis of response changes for Extreme Right - Left-Biased Articles:
Mean change: 0.014705882352941176
Number of positive changes: 171
Number of negative changes: 97
Number of no changes: 276


### Direction of Change Analysis: Extreme Left and Extreme Right

From the Wilcoxon test, we found that agents with an **Extreme Left stance** tended to show significant shifts in opinion. We now analyze the direction of response changes for both **Extreme Left** and **Extreme Right users** exposed to **right- and left-biased articles**, accounting for the different question scales.

#### Key Results for Extreme Left Users:

1. **Extreme Left users exposed to right-biased articles**:
   - **Mean change**: -0.44
   - **Positive changes**: 91
   - **Negative changes**: 107
   - **No changes**: 194
   
   **Interpretation**: 
   - The overall **negative shift** indicates that most Extreme Left users reacted negatively to right-biased content, likely **moving further away** from alignment with right-leaning viewpoints. However, a notable number of positive changes (91) show that some users became more aligned with right-wing content.

2. **Extreme Left users exposed to left-biased articles**:
   - **Mean change**: -0.24
   - **Positive changes**: 62
   - **Negative changes**: 129
   - **No changes**: 201
   
   **Interpretation**: 
   - While left-biased content should reinforce the views of Extreme Left users, the overall **negative shift** suggests that a significant portion of users moved away from the content’s viewpoint. This may indicate **polarization** or **fatigue** with the content. However, the positive changes show that left-biased content reinforced views for some users.

#### Key Results for Extreme Right Users:

1. **Extreme Right users exposed to right-biased articles**:
   - **Mean change**: -0.08
   - **Positive changes**: 189
   - **Negative changes**: 67
   - **No changes**: 288
   
   **Interpretation**: 
   - The overall **small negative shift** suggests that most Extreme Right users did not significantly change their views when exposed to right-biased content, but some became more aligned with right-wing viewpoints (189 positive changes). However, 288 users showed **no change**, indicating a strong **stability** in their opinions when exposed to content that aligns with their views.

2. **Extreme Right users exposed to left-biased articles**:
   - **Mean change**: 0.01
   - **Positive changes**: 171
   - **Negative changes**: 97
   - **No changes**: 276
   
   **Interpretation**: 
   - The **slight positive shift** indicates that some Extreme Right users became more aligned with left-leaning content. However, most showed **no significant change** (276 users), and there were still more positive changes (171) than negative, suggesting that **left-biased articles had limited impact** on shifting their views.

#### Conclusion:
- **Right-biased articles** generally caused a **polarizing shift** for **Extreme Left users**, pushing them away from right-wing viewpoints, while for **Extreme Right users**, they mainly reinforced existing views.
- **Left-biased articles** had a mixed impact on both groups. For **Extreme Left users**, it produced **polarization** or **fatigue**, while for **Extreme Right users**, it resulted in limited but slightly positive shifts in alignment with left-leaning content, though most opinions remained stable.


## Adding Reliability Scores

Performing the shift analysis above while also including the reliability score

In [209]:
def filter_and_analyze_with_reliability(df, reliability_df, reliability_threshold=0.44):
    """
    Filters and analyzes direction of opinion changes for a given DataFrame of user responses,
    weighted by reliability scores. The function merges user reliability data, filters out
    unreliable users based on an average reliability threshold, and analyzes direction of 
    response changes for reliable users only.
    
    Parameters:
    - df (DataFrame): The DataFrame containing user responses for a particular group. 
                      Should include 'user_id', 'question_code', 'numeric_response_before', 
                      'numeric_response_after', 'response_change', and 'bias'.
    - reliability_df (DataFrame): The DataFrame containing user reliability scores. Should 
                      include 'user_id' and 'reliability_score'.
    - reliability_threshold (float, optional): The minimum average reliability score for a 
                      user to be considered 'reliable'. Default is 0.44.

    Returns:
    - result (tuple): A tuple containing:
        - mean_change (float): The weighted mean of response changes for reliable users.
        - positive_changes (float): The weighted count of positive response changes.
        - negative_changes (float): The weighted count of negative response changes.
        - no_changes (float): The weighted count of responses where no change occurred.
    
    Workflow:
    1. Merge the user reliability data into the user responses DataFrame.
    2. Calculate each user's average reliability score.
    3. Filter the DataFrame to retain only users whose average reliability score is above 
       the specified threshold.
    4. Perform a direction of change analysis, weighting response changes by the reliability score.
    5. Return a summary of the analysis including mean change, positive, negative, and no change counts.
    
    Example Usage:
    mean_change, pos_changes, neg_changes, no_changes = filter_and_analyze_with_reliability(
        right_bias_extreme_left, 
        reliability_df, 
        reliability_threshold=0.44
    )
    """
    # Step 1: Merge the user reliability data into the user responses DataFrame
    df = df.merge(reliability_df[['user_id', 'reliability_score']], on='user_id', how='left')

    # Step 2: Calculate each user's average reliability score
    user_avg_reliability = df.groupby('user_id')['reliability_score'].mean()

    # Step 3: Filter for users with an average reliability score greater than the threshold
    reliable_users = user_avg_reliability[user_avg_reliability > reliability_threshold].index

    # Filter the original DataFrame to include only reliable users
    reliable_df = df[df['user_id'].isin(reliable_users)]

    # Step 4: Perform the analysis using the 'analyze_direction_of_change_with_reliability' function
    mean_change, positive_changes, negative_changes, no_changes = analyze_direction_of_change_with_reliability(
        reliable_df, 
        group_name="Filtered Group"
    )

    # Step 5: Return the results of the analysis
    return mean_change, positive_changes, negative_changes, no_changes


def analyze_direction_of_change_with_reliability(df, group_name):
    """
    Analyzes the direction of response changes based on different question scales, 
    weighted by reliability scores. Automatically detects the scale based on the question code.
    Handles two scales:
    - 1-7 scale (e.g., F1 questions where 1 = completely agree and 7 = completely disagree)
    - 1-5 scale (e.g., F2 questions where 1 = not concerned at all and 5 = very concerned)
    
    Parameters:
    - df (DataFrame): The DataFrame containing before and after responses, 
                      including reliability scores.
    - group_name (str): Name of the group for logging purposes.
    
    Returns:
    - result (tuple): A tuple containing:
        - mean_change (float): The weighted mean of response changes.
        - positive_changes (float): The weighted count of positive response changes.
        - negative_changes (float): The weighted count of negative response changes.
        - no_changes (float): The weighted count of no response changes.
    
    Example Usage:
    mean_change, pos_changes, neg_changes, no_changes = analyze_direction_of_change_with_reliability(
        reliable_df, 
        group_name="Extreme Left - Right-Biased Articles"
    )
    """
    def interpret_change(row):
        """
        Determines the direction of the response change based on the question type.
        - For F1 questions (1-7 scale): A lower number indicates more agreement, so a decrease is positive.
        - For F2 questions (1-5 scale): Higher numbers indicate more concern, so an increase is positive.
        """
        if row['question_code'].startswith('F2'):
            # 1-5 scale (concern-based): Increase is positive (more concern), decrease is negative
            if row['numeric_response_after'] > row['numeric_response_before']:
                return "positive"
            elif row['numeric_response_after'] < row['numeric_response_before']:
                return "negative"
            else:
                return "no change"
        else:
            # Default to 1-7 scale: Decrease is positive (more agreement), increase is negative
            if row['numeric_response_after'] < row['numeric_response_before']:
                return "positive"
            elif row['numeric_response_after'] > row['numeric_response_before']:
                return "negative"
            else:
                return "no change"

    # Apply the interpretation function to each row using .loc to avoid SettingWithCopyWarning
    df.loc[:, 'change_direction'] = df.apply(interpret_change, axis=1)

    # Calculate weighted counts of positive, negative, and no changes using reliability score
    positive_changes_weighted = (df['change_direction'] == "positive") * df['reliability_score']
    negative_changes_weighted = (df['change_direction'] == "negative") * df['reliability_score']
    no_changes_weighted = (df['change_direction'] == "no change") * df['reliability_score']
    
    # Sum up the weighted counts
    positive_changes = positive_changes_weighted.sum()
    negative_changes = negative_changes_weighted.sum()
    no_changes = no_changes_weighted.sum()
    
    # Calculate the mean response change, weighted by reliability
    df.loc[:, 'response_change'] = df['numeric_response_after'] - df['numeric_response_before']
    mean_change = (df['response_change'] * df['reliability_score']).sum() / df['reliability_score'].sum()

    # Print the results
    print(f"Analysis of response changes for {group_name} (weighted by reliability):")
    print(f"Mean change: {mean_change}")
    print(f"Weighted positive changes: {positive_changes}")
    print(f"Weighted negative changes: {negative_changes}")
    print(f"Weighted no changes: {no_changes}")
    
    return mean_change, positive_changes, negative_changes, no_changes


In [212]:
right_bias_extreme_right.head()

Unnamed: 0,user_id,question_code,numeric_response_before,political_stance,numeric_response_after,bias,response_change
1,IDUS103408,F1A10_1,7,Extreme Right,1,right,-6
3,IDUS103408,F2A6,4,Extreme Right,5,right,1
5,IDUS103408,F2A7,1,Extreme Right,2,right,1
7,IDUS103408,F2A9,2,Extreme Right,5,right,3
9,IDUS103408,F3A3_1,1,Extreme Right,5,right,4


In [210]:
# Load reliability data
reliability_df = pd.read_csv('../data/processed/user_ranks.csv')

# Analyze for Extreme Left users exposed to right-biased articles
mean_change_extreme_left_right_bias, pos_changes_extreme_left_right_bias, neg_changes_extreme_left_right_bias, no_changes_extreme_left_right_bias = filter_and_analyze_with_reliability(
    right_bias_extreme_left,  # DataFrame with Extreme Left users exposed to right-biased articles
    reliability_df,           # DataFrame with reliability scores
    reliability_threshold=0.44  # Set reliability threshold for filtering
)

# Analyze for Extreme Left users exposed to left-biased articles
mean_change_extreme_left_left_bias, pos_changes_extreme_left_left_bias, neg_changes_extreme_left_left_bias, no_changes_extreme_left_left_bias = filter_and_analyze_with_reliability(
    left_bias_extreme_left,  # DataFrame with Extreme Left users exposed to left-biased articles
    reliability_df,          # DataFrame with reliability scores
    reliability_threshold=0.44  # Set reliability threshold for filtering
)

# Analyze for Extreme Right users exposed to right-biased articles
mean_change_extreme_right_right_bias, pos_changes_extreme_right_right_bias, neg_changes_extreme_right_right_bias, no_changes_extreme_right_right_bias = filter_and_analyze_with_reliability(
    right_bias_extreme_right,  # DataFrame with Extreme Right users exposed to right-biased articles
    reliability_df,            # DataFrame with reliability scores
    reliability_threshold=0.44  # Set reliability threshold for filtering
)

# Analyze for Extreme Right users exposed to left-biased articles
mean_change_extreme_right_left_bias, pos_changes_extreme_right_left_bias, neg_changes_extreme_right_left_bias, no_changes_extreme_right_left_bias = filter_and_analyze_with_reliability(
    left_bias_extreme_right,   # DataFrame with Extreme Right users exposed to left-biased articles
    reliability_df,            # DataFrame with reliability scores
    reliability_threshold=0.44  # Set reliability threshold for filtering
)

# Print the results for each analysis
print(f"Extreme Left - Right-Biased Articles: Mean Change: {mean_change_extreme_left_right_bias}, Positive: {pos_changes_extreme_left_right_bias}, Negative: {neg_changes_extreme_left_right_bias}, No Change: {no_changes_extreme_left_right_bias}")
print(f"Extreme Left - Left-Biased Articles: Mean Change: {mean_change_extreme_left_left_bias}, Positive: {pos_changes_extreme_left_left_bias}, Negative: {neg_changes_extreme_left_left_bias}, No Change: {no_changes_extreme_left_left_bias}")
print(f"Extreme Right - Right-Biased Articles: Mean Change: {mean_change_extreme_right_right_bias}, Positive: {pos_changes_extreme_right_right_bias}, Negative: {neg_changes_extreme_right_right_bias}, No Change: {no_changes_extreme_right_right_bias}")
print(f"Extreme Right - Left-Biased Articles: Mean Change: {mean_change_extreme_right_left_bias}, Positive: {pos_changes_extreme_right_left_bias}, Negative: {neg_changes_extreme_right_left_bias}, No Change: {no_changes_extreme_right_left_bias}")


KeyError: 'Column not found: reliability_score'

In [205]:
# Calculate average reliability score per user
user_avg_reliability = right_bias_extreme_left.groupby('user_id')['reliability_score'].mean()

# Display the first few rows of user average reliability
print(user_avg_reliability.head())


user_id
IDUS104424    0.266667
IDUS104915    0.380952
IDUS105157    0.470588
IDUS106103    0.285714
IDUS106240    0.320000
Name: reliability_score, dtype: float64


In [206]:
# Filter for users with an average reliability score greater than 0.44
reliable_users_left = user_avg_reliability[user_avg_reliability > 0.44].index

# Filter the original DataFrame to include only these reliable users
reliable_right_bias_extreme_left = right_bias_extreme_left[right_bias_extreme_left['user_id'].isin(reliable_users_left)]

# Check how many reliable users are included
print(f"Number of reliable Extreme Left users: {len(reliable_users_left)}")


Number of reliable Extreme Left users: 12


In [207]:
# Perform Wilcoxon test for reliable Extreme Left users exposed to right-biased articles (non-extreme responses)
p_value_reliable_extreme_left_right_bias_non_extreme = perform_non_extreme_wilcoxon_test(
    reliable_right_bias_extreme_left,  # Filtered DataFrame with reliable users
    'real_response_code',     # Before responses (actual user responses before article exposure)
    'llm_response_code',      # After responses (predicted by LLM or after article exposure)
    'Reliable Extreme Left - Right-Biased Articles (Non-Extreme)'
)

print(f"p-value for Reliable Extreme Left users (Right-Biased Articles, Non-Extreme): {p_value_reliable_extreme_left_right_bias_non_extreme}")


KeyError: "None of [Index(['real_response_code', 'llm_response_code'], dtype='object')] are in the [columns]"

In [167]:
# Analyze the direction of change for reliable Extreme Left users exposed to right-biased articles
mean_change_reliable_extreme_left_right_bias, pos_changes_reliable_extreme_left_right_bias, neg_changes_reliable_extreme_left_right_bias, no_changes_reliable_extreme_left_right_bias = analyze_direction_of_change(
    reliable_right_bias_extreme_left,  # Filtered DataFrame with reliable users
    "Reliable Extreme Left - Right-Biased Articles"
)

# Print the results
print(f"Mean change for reliable Extreme Left users: {mean_change_reliable_extreme_left_right_bias}")
print(f"Positive changes: {pos_changes_reliable_extreme_left_right_bias}, Negative changes: {neg_changes_reliable_extreme_left_right_bias}, No changes: {no_changes_reliable_extreme_left_right_bias}")


Analysis of response changes for Reliable Extreme Left - Right-Biased Articles:
Mean change: -0.25
Number of positive changes: 15
Number of negative changes: 30
Number of no changes: 51
Mean change for reliable Extreme Left users: -0.25
Positive changes: 15, Negative changes: 30, No changes: 51


### Right wing

In [168]:
# Perform the merge to add reliability_score based on user_id
right_bias_extreme_right = right_bias_extreme_right.merge(
    reliability_df[['user_id', 'reliability_score']], 
    on='user_id', 
    how='left'
)

# Check if reliability_score has been added successfully
print(right_bias_extreme_right.columns)


Index(['user_id', 'bias', 'selected_option_before', 'question_before',
       'numeric_response_before', 'political_stance_before',
       'selected_option_after', 'question_after', 'numeric_response_after',
       'political_stance_after', 'reliability_score'],
      dtype='object')


In [169]:
# Check the columns in the DataFrame
print(right_bias_extreme_right.columns)


Index(['user_id', 'bias', 'selected_option_before', 'question_before',
       'numeric_response_before', 'political_stance_before',
       'selected_option_after', 'question_after', 'numeric_response_after',
       'political_stance_after', 'reliability_score'],
      dtype='object')


In [170]:
# Calculate average reliability score per user
user_avg_reliability = right_bias_extreme_right.groupby('user_id')['reliability_score'].mean()

# Display the first few rows of the user average reliability
print(user_avg_reliability.head())


user_id
IDUS103408    0.242424
IDUS103554    0.615385
IDUS103826    0.285714
IDUS104462    0.275862
IDUS104578    0.421053
Name: reliability_score, dtype: float64


In [171]:
# Filter for users with an average reliability score greater than 0.44 (or another threshold)
reliable_users = user_avg_reliability[user_avg_reliability > 0.44].index

# Filter the original DataFrame to include only these reliable users
reliable_right_bias_extreme_right = right_bias_extreme_right[right_bias_extreme_right['user_id'].isin(reliable_users)]

# Check how many reliable users are included
print(f"Number of reliable users: {len(reliable_users)}")


Number of reliable users: 29


In [172]:
# Perform Wilcoxon test for reliable right-wing users exposed to right-biased articles (non-extreme responses)
p_value_reliable_extreme_right_right_bias_non_extreme = perform_non_extreme_wilcoxon_test(
    reliable_right_bias_extreme_right,  # Filtered DataFrame with reliable right-wing users
    'numeric_response_before',          # Before responses (actual user responses before article exposure)
    'numeric_response_after',           # After responses (predicted by LLM or after article exposure)
    'Reliable Extreme Right - Right-Biased Articles (Non-Extreme)'
)

# Output the p-value
print(f"p-value for Reliable Extreme Right users (Right-Biased Articles, Non-Extreme): {p_value_reliable_extreme_right_right_bias_non_extreme}")


Wilcoxon test for Reliable Extreme Right - Right-Biased Articles (Non-Extreme):
Test statistic: 2436.0, p-value: 0.5200795218671146
p-value for Reliable Extreme Right users (Right-Biased Articles, Non-Extreme): 0.5200795218671146


In [173]:
# Calculate the response change as the difference between numeric_response_after and numeric_response_before
reliable_right_bias_extreme_right['response_change'] = reliable_right_bias_extreme_right['numeric_response_after'] - reliable_right_bias_extreme_right['numeric_response_before']

# Check if the response_change column has been added correctly
print(reliable_right_bias_extreme_right[['numeric_response_before', 'numeric_response_after', 'response_change']].head())


    numeric_response_before  numeric_response_after  response_change
8                         5                       1               -4
9                         4                       5                1
10                        2                       1               -1
11                        4                       5                1
12                        6                       6                0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reliable_right_bias_extreme_right['response_change'] = reliable_right_bias_extreme_right['numeric_response_after'] - reliable_right_bias_extreme_right['numeric_response_before']


In [174]:
# Analyze direction of change for reliable right-wing users exposed to right-biased articles
mean_change_reliable_extreme_right_right_bias, pos_changes_reliable_extreme_right_right_bias, neg_changes_reliable_extreme_right_right_bias, no_changes_reliable_extreme_right_right_bias = analyze_direction_of_change(
    reliable_right_bias_extreme_right,  # Filtered DataFrame with reliable right-wing users
    "Reliable Extreme Right - Right-Biased Articles"
)

# Output the results
print(f"Mean change for reliable right-wing users: {mean_change_reliable_extreme_right_right_bias}")
print(f"Positive changes: {pos_changes_reliable_extreme_right_right_bias}")
print(f"Negative changes: {neg_changes_reliable_extreme_right_right_bias}")
print(f"No changes: {no_changes_reliable_extreme_right_right_bias}")


Analysis of response changes for Reliable Extreme Right - Right-Biased Articles:
Mean change: -0.20689655172413793
Number of positive changes: 60
Number of negative changes: 42
Number of no changes: 130
Mean change for reliable right-wing users: -0.20689655172413793
Positive changes: 60
Negative changes: 42
No changes: 130
