# Analyzing Women's Suffrage and Anti-Suffrage Rhetoric with Word Frequency Analysis

This notebook demonstrates how to perform basic NLP analysis using tokenization and word frequency to compare pro-suffrage and anti-suffrage texts from the late 19th and early 20th centuries.

In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import matplotlib.pyplot as plt
from collections import Counter

nltk.download("punkt")
nltk.download("stopwords")

## Load the Data

In [None]:
df = pd.read_csv("../data/suffrage_texts.csv")
df.head()

## Preprocess the Text

In [None]:
stop_words = set(stopwords.words("english"))

def preprocess(text):
    tokens = word_tokenize(text.lower())
    return [word for word in tokens if word.isalpha() and word not in stop_words]

df["tokens"] = df["text"].apply(preprocess)
df.head()

## Compare Word Frequencies

In [None]:
pro_words = df[df["stance"] == "pro"]["tokens"].sum()
anti_words = df[df["stance"] == "anti"]["tokens"].sum()

pro_freq = Counter(pro_words).most_common(15)
anti_freq = Counter(anti_words).most_common(15)

pro_freq, anti_freq

## Visualize the Results

In [None]:
def plot_freq(freq_data, title):
    words, counts = zip(*freq_data)
    plt.figure(figsize=(10, 5))
    plt.bar(words, counts)
    plt.xticks(rotation=45)
    plt.title(title)
    plt.ylabel("Frequency")
    plt.tight_layout()
    plt.show()

plot_freq(pro_freq, "Top Words in Pro-Suffrage Texts")
plot_freq(anti_freq, "Top Words in Anti-Suffrage Texts")