In [58]:
import pandas as pd
import re
from collections import Counter
from citeproc import CitationStylesStyle, CitationStylesBibliography
from citeproc import CitationItem

In [59]:
FACEBOOK = pd.read_csv(r"C:\Users\patri\OneDrive - University of Toronto\SEXTORTION\UTORONTO-SEXTORTION-RESEARCH\SAFETY\CSV\SAFETY FEATURES - Facebook.csv")
INSTAGRAM = pd.read_csv(r"C:\Users\patri\OneDrive - University of Toronto\SEXTORTION\UTORONTO-SEXTORTION-RESEARCH\SAFETY\CSV\SAFETY FEATURES - Instagram.csv")
TIKTOK = pd.read_csv(r"C:\Users\patri\OneDrive - University of Toronto\SEXTORTION\UTORONTO-SEXTORTION-RESEARCH\SAFETY\CSV\SAFETY FEATURES - TikTok.csv")
TWITTER = pd.read_csv(r"C:\Users\patri\OneDrive - University of Toronto\SEXTORTION\UTORONTO-SEXTORTION-RESEARCH\SAFETY\CSV\SAFETY FEATURES - Twitter.csv")

In [60]:
EXTRACTED_NUMBERS_COLUMN = "Extracted Numbers"


def extract_numbers_with_period_from_text(text):
    """Extract numbers followed immediately by a period from a given text string."""
    return [int(num) for num in re.findall(r'\b(\d+)\.', str(text))]


def extract_numbers_from_row(row, target_columns):
    """
    Extract numbers with periods immediately after them from multiple columns in a row.
    :param row: A single row from the DataFrame.
    :param target_columns: List of column names in the row to process.
    :return: A concatenated list of extracted numbers from all specified columns.
    """
    numbers = []
    for col in target_columns:
        numbers.extend(extract_numbers_with_period_from_text(row[col]))
    return numbers


def extract_numbers_from_columns(df, target_columns):
    """
    Extract numbers with periods immediately after them from the specified columns of a DataFrame
    and add them as a new column.
    :param df: The DataFrame to process.
    :param target_columns: List of columns from which to extract numbers.
    :return: Updated DataFrame with a new column for the extracted numbers.
    """
    df[EXTRACTED_NUMBERS_COLUMN] = df.apply(
        lambda row: extract_numbers_from_row(row, target_columns), axis=1
    )
    return df


def process_all_dataframes(dataframes, target_columns):
    """
    Process multiple DataFrames to extract numbers (with trailing periods) from the specified columns.
    :param dataframes: List of DataFrames to process.
    :param target_columns: Column names from which to extract numbers in each DataFrame.
    :return: List of processed DataFrames with an added extracted numbers column.
    """
    return [extract_numbers_from_columns(df, target_columns) for df in dataframes]


# Process the DataFrames
dataframes = [FACEBOOK, INSTAGRAM, TIKTOK, TWITTER]
columns_to_check = ['Safety Feature', 'Description', 'Purpose', 'Motivation']
processed_dataframes = process_all_dataframes(dataframes, columns_to_check)


In [61]:
def replace_numbers_with_citations(df, extracted_numbers_column="Extracted Numbers", target_columns=None):
    """
    Replace numbers in text columns with corresponding citations.

    :param df: DataFrame containing the data to process.
    :param extracted_numbers_column: Column containing numbers extracted, if necessary.
    :param target_columns: List of DataFrame column names where numbers should be replaced.
    :return: Updated DataFrame with citations replacing numbers.
    """
    # Citation legend for replacement
    citation_mapping = {
        1: "U.S. Department of Veterans Affairs. 2025. “Social Media, the Safe Way.” DigitalVA (blog). 2025. https://digital.va.gov/cyber-spot/social-media-the-safe-way/.",
        2: "Berkovich, Sari. 2023. “The History of Trust and Safety | Evolution of Online Protection.” ActiveFence. September 14, 2023. https://www.activefence.com/blog/the-history-of-trust-and-safety/.",
        3: "“Facebook Safety Check.” 2024. In Wikipedia. https://en.wikipedia.org/w/index.php?title=Facebook_Safety_Check&oldid=1257904682.",
        4: "Gleit, Naomi, Sharon Zeng, and Peter Cottle. 2014. “Introducing Safety Check.” Meta (blog). October 16, 2014. https://about.fb.com/news/2014/10/introducing-safety-check/.",
        5: "Dowdy, Tracey. 2016. “Facebook Updates Safety Check Feature.” The Online Mom (blog). December 29, 2016. https://www.theonlinemom.com/facebook-updates-safety-check-feature/.",
        6: "NNEDV and Facebook. 2017. “A Guide to Staying Safe on Facebook.” Meta. https://about.fb.com/wp-content/uploads/2017/12/a-guide-to-staying-safe-on-facebook.pdf."
    }

    def replace_in_text(text):
        """
        Replace numbered references in a string with citations.
        """
        if not isinstance(text, str):
            return text

        # Extract numbers and replace with corresponding citations
        numbers = extract_numbers_with_period_from_text(text)
        for number in numbers:
            citation = citation_mapping.get(int(number.strip('.')))  # Map number to citation
            if citation:
                # Use regex to replace occurrences of the number followed by a period
                text = re.sub(rf'\b{number}\.', citation, text)
        return text

    if target_columns is None:
        target_columns = []

    # Apply replacements to the specified columns
    for col in target_columns:
        df[col] = df[col].apply(replace_in_text)
    return df

In [62]:
def replace_numbers_with_citations(df, extracted_numbers_column="Extracted Numbers", target_columns=None):
    """
    Replace numbers in text columns with corresponding citations.

    :param df: DataFrame containing the data to process.
    :param extracted_numbers_column: Column containing numbers extracted, if necessary.
    :param target_columns: List of DataFrame column names where numbers should be replaced.
    :return: Updated DataFrame with citations replacing numbers.
    """
    # Citation legend for replacement
    citation_mapping = {
        1: "U.S. Department of Veterans Affairs. 2025. “Social Media, the Safe Way.” DigitalVA (blog). 2025. https://digital.va.gov/cyber-spot/social-media-the-safe-way/.",
        2: "Berkovich, Sari. 2023. “The History of Trust and Safety | Evolution of Online Protection.” ActiveFence. September 14, 2023. https://www.activefence.com/blog/the-history-of-trust-and-safety/.",
        3: "“Facebook Safety Check.” 2024. In Wikipedia. https://en.wikipedia.org/w/index.php?title=Facebook_Safety_Check&oldid=1257904682.",
        4: "Gleit, Naomi, Sharon Zeng, and Peter Cottle. 2014. “Introducing Safety Check.” Meta (blog). October 16, 2014. https://about.fb.com/news/2014/10/introducing-safety-check/.",
        5: "Dowdy, Tracey. 2016. “Facebook Updates Safety Check Feature.” The Online Mom (blog). December 29, 2016. https://www.theonlinemom.com/facebook-updates-safety-check-feature/.",
        6: "NNEDV and Facebook. 2017. “A Guide to Staying Safe on Facebook.” Meta. https://about.fb.com/wp-content/uploads/2017/12/a-guide-to-staying-safe-on-facebook.pdf."
    }

    def replace_in_text(text):
        """
        Replace numbered references in a string with citations.
        """
        if not isinstance(text, str):
            return text

        # Extract numbers and replace with corresponding citations
        numbers = extract_numbers_with_period_from_text(text)
        for number in numbers:
            citation = citation_mapping.get(int(number.strip('.')))  # Map number to citation
            if citation:
                # Use regex to replace occurrences of the number followed by a period
                text = re.sub(rf'\b{number}\.', citation, text)
        return text

    if target_columns is None:
        target_columns = []

    # Apply replacements to the specified columns
    for col in target_columns:
        df[col] = df[col].apply(replace_in_text)
    return df