# Linguistic Analysis: verbs usage

In [None]:
!pip install spacy

In [None]:
!python -m spacy download en_core_web_sm

In [None]:
import pandas as pd

import spacy
from tqdm import tqdm

nlp = spacy.load("en_core_web_sm")

## Reading the data

In [None]:
data = pd.read_csv('/data/metahate.csv', sep='\t', names=['label', 'text'])

text_hate = data.loc[data['label'] == 1, 'text'].tolist()
text_no_hate = data.loc[data['label'] == 0, 'text'].tolist()

## Function for counting verbs in text

In [None]:
def get_verb_counts(text):
    """
    Count the occurrences of past, present, and future tense verbs in the given text.

    Parameters:
    - text (str): Input text to analyze.

    Returns:
    Tuple[int, int, int]: A tuple containing counts of past, present, and future tense verbs.
    """
    past_count, present_count, future_count = 0, 0, 0
    doc = nlp(text)
    
    for token in doc:
        # Checking the part-of-speech tag of each token
        if token.tag_ == "VBD":  # Past tense
            past_count += 1
        elif token.tag_ == "VBP" or token.tag_ == "VBZ":  # Present tense
            present_count += 1
        elif token.tag_ == "MD":  # Modal (indicating future)
            future_count += 1

    return  past_count, present_count, future_count

## Processing the text data

In [None]:
total_past_count, total_present_count, total_future_count = 0, 0, 0

# Iterating through each text in the 'text_hate' list (and 'text_no_hate' later)
for text in tqdm(text_hate):
    past_count, present_count, future_count = get_verb_counts(text)
    
    total_past_count += past_count
    total_present_count += present_count
    total_future_count += future_count

## Calculating the total count and the percentages of each verb tense

In [None]:
total_verbs = total_past_count + total_present_count + total_future_count

past_percentage = (total_past_count / total_verbs) * 100
present_percentage = (total_present_count / total_verbs) * 100
future_percentage = (total_future_count / total_verbs) * 100

## Printing metrics

In [None]:
print(f"Total Verbs: {total_verbs}")
print(f"Past Tense Percentage: {past_percentage:.2f}%")
print(f"Present Tense Percentage: {present_percentage:.2f}%")
print(f"Future Tense Percentage: {future_percentage:.2f}%")