# Linguistic Analysis: pronouns usage

In [None]:
!pip install spacy

In [None]:
!python -m spacy download en_core_web_sm

In [None]:
import pandas as pd

import spacy
from tqdm import tqdm
from collections import Counter

nlp = spacy.load("en_core_web_sm")

## Reading the data

In [None]:
data = pd.read_csv('/data/metahate.csv', sep='\t', names=['label', 'text'])

text_hate = data.loc[data['label'] == 1, 'text'].tolist()
text_no_hate = data.loc[data['label'] == 0, 'text'].tolist()

## Function for counting verbs in text

In [None]:
def get_pronouns_counts(text):
    """
    Count the occurrences of first-person singular, first-person plural, second person, third-person singular, and third-person plural pronouns in the given text.

    Parameters:
    - text (str): Input text to analyze.

    Returns:
    Tuple[int, int, int, int, int]: A tuple containing counts of first-person singular, first-person plural, second person, third-person singular, and third-person plural pronouns.
    """    
    doc = nlp(text)
    pronoun_counter = Counter(token.text.lower() for token in doc if token.pos_ == "PRON")
    
    first_person_singular_count = pronoun_counter["i"]
    first_person_plural_count = pronoun_counter["we"]
    second_person_singular_count = pronoun_counter["you"]
    third_person_singular_count = pronoun_counter["he"] + pronoun_counter["she"] + pronoun_counter["it"]
    third_person_plural_count = pronoun_counter["they"]

    return first_person_singular_count, first_person_plural_count, second_person_singular_count, third_person_singular_count, third_person_plural_count

## Processing the text data

In [None]:
total_first_person_singular_count, total_first_person_plural_count, total_second_person_singular_count, total_third_person_singular_count, total_third_person_plural_count = 0, 0, 0, 0, 0

# Iterating through each text in the 'text_hate' list (and 'text_no_hate' later)
for text in tqdm(text_hate):
    first_person_singular_count, first_person_plural_count, second_person_singular_count, third_person_singular_count, third_person_plural_count, past_count, present_count, future_count = get_pronouns_counts(text)
    
    total_first_person_singular_count += first_person_singular_count
    total_first_person_plural_count += first_person_plural_count
    total_second_person_singular_count += second_person_singular_count
    total_third_person_singular_count += third_person_singular_count
    total_third_person_plural_count += third_person_plural_count

## Calculating the total count and the percentages of each verb tense

In [None]:
total_pronouns = (
    total_first_person_singular_count +
    total_first_person_plural_count +
    total_second_person_singular_count +
    total_third_person_singular_count +
    total_third_person_plural_count
)

first_person_singular_percentage = (
    (total_first_person_singular_count / total_pronouns) * 100
) if total_pronouns > 0 else 0

first_person_plural_percentage = (
    (total_first_person_plural_count / total_pronouns) * 100
) if total_pronouns > 0 else 0

second_person_singular_percentage = (
    (total_second_person_singular_count / total_pronouns) * 100
) if total_pronouns > 0 else 0

third_person_singular_percentage = (
    (total_third_person_singular_count / total_pronouns) * 100
) if total_pronouns > 0 else 0

third_person_plural_percentage = (
    (total_third_person_plural_count / total_pronouns) * 100
) if total_pronouns > 0 else 0

## Printing metrics

In [None]:
print(f"Total Pronouns: {total_pronouns}")
print(f"First Person Singular Percentage: {first_person_singular_percentage:.2f}%")
print(f"First Person Plural Percentage: {first_person_plural_percentage:.2f}%")
print(f"Second Person Singular Percentage: {second_person_singular_percentage:.2f}%")
print(f"Third Person Singular Percentage: {third_person_singular_percentage:.2f}%")
print(f"Third Person Plural Percentage: {third_person_plural_percentage:.2f}%")