In [2]:
def get_top_words(bigram_file, n):
    """
    Reads a bigram score file with format:
    phrase|score
    Returns two lists:
      - democrat_words: top n phrases with the most negative scores (associated with Democrats)
      - republican_words: top n phrases with the most positive scores (associated with Republicans)
    """
    phrases = []
    with open(bigram_file, 'r', encoding='latin-1') as f:
        next(f)  # Skip header/sample line.
        for line in f:
            line = line.strip()
            if line:
                phrase, score_str = line.split('|')
                score = float(score_str)
                # Convert spaces to underscores and lowercase the phrase.
                phrase = phrase.lower().replace(" ", "_")
                phrases.append((phrase, score))
    # Sort for Democrat and Republican words.
    phrases_sorted = sorted(phrases, key=lambda x: x[1])
    democrat_words = [phrase for phrase, score in phrases_sorted[:n]]
    phrases_sorted_desc = sorted(phrases, key=lambda x: -x[1])
    republican_words = [phrase for phrase, score in phrases_sorted_desc[:n]]
    return democrat_words, republican_words

for session in range(97, 115):
    session_str = f"{session:03d}"  # e.g., "097", "098", ... "114"

    bigram_file = f"C:\\Users\\Maxfield Evers\\Desktop\\Thesis\\Data\\gentzkow_work\\phrase_partisanship\\partisan_phrases_{session_str}.txt"

    N=[100]

    for n in N:
        n_str = f"{n}"
        # Get top words for this session from the bigram file.
        dem_words, rep_words = get_top_words(bigram_file, n)
    
        # Optional: deduplicate and keep only the top n words.
        dem_words = list(dict.fromkeys(dem_words))[:n]
        rep_words = list(dict.fromkeys(rep_words))[:n]
        print(f"Top {n_str} Democrat words for session {session_str}:", dem_words)
        print(f"Top {n_str} Republican words for session {session_str}:", rep_words)

Top 100 Democrat words for session 097: ['interest_rate', 'high_interest', 'feder_reserv', 'human_right', 'administr_propos', 'el_salvador', 'reserv_board', 'school_lunch', 'oil_compani', 'nuclear_war', 'unemploy_rate', 'minimum_benefit', 'base_mode', 'senior_citizen', 'million_american', 'secur_benefit', 'mx_missil', 'lunch_program', 'saudi_arabia', 'defens_spend', 'nurs_home', 'oil_gas', 'econom_polici', 'farm_incom', 'safeti_net', 'increas_defens', 'budget_propos', 'econom_assumpt', 'make_sens', 'great_depress', 'monetari_polici', 'billion_dollar', 'world_war', 'equal_right', 'medic_care', 'militari_budget', 'habea_corpus', 'vietnam_veteran', 'organ_crime', 'persian_gulf', 'peopl_countri', 'tax_expenditur', 'militari_aid', 'dens_pack', 'war_ii', 'nation_guard', 'militari_spend', 'billion_deficit', 'defens_budget', 'nuclear_arm', 'unemploy_benefit', 'administr_budget', 'arm_control', 'wast_fraud', 'loan_program', 'sinc_great', 'oil_industri', 'tax_break', 'farmer_home', 'hazard_wast'