In [7]:
!conda install pandas nltk spacy -y
!conda install numpy<2 -y
!conda install h5py -y
!conda install spacy -y
!python -m spacy download en_core_web_sm

Channels:
 - defaults
 - conda-forge
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

zsh:1: no such file or directory: 2
Channels:
 - defaults
 - conda-forge
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Channels:
 - defaults
 - conda-forge
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_w

In [17]:
import spacy
import pandas as pd
from collections import Counter

# Load spaCy model (adjust to the one you're using)
nlp = spacy.load('en_core_web_sm')

# Increase max_length to handle large texts
nlp.max_length = 2000000

class TrumpSpeechCooccurrenceAnalyzer:
    def __init__(self, trump_speeches_file):
        self.trump_speeches_file = trump_speeches_file
        self.target_words = [
            # Crisis Terms
            "emergency", "threat", "disaster", "broken system", 
            "danger", "urgency", "peril", "instability", "at risk", 
            "system failure", "terror",
            
            # Optmistic Terms
            "progress", "prosperity", "innovation", "future", "hope", 
            "advancement", "vision", "american", "national", "pride", 
            "proud", "constitution", "constitutional", "liberty", 
            "america", "justice", "freedom", "values",
            
            # Equality Terms
            "inequality", "equity", "integrity", "equality", 
            "compassion", "civil liberties", "civil rights",
            
            # Economic Terms
            "jobs", "cost", "taxes", "cost-effective", "rising costs", 
            "inflation", "wealth", "financial", "business", 
            "unemployment", "tax", "economy", "economic", 
            "green new deal", "climate", "emission",
            
            # Healthcare and Social Terms
            "health", "health care", 
            
            # Border and Security Terms
            "security", "borders", "immigration", "asylum", 
            "deportation", "undocumented", "fencing", 
            "detention center", "border", "military", "criminal", "arms"
            
            # Political Institutions
            "supreme court", "senate", "bill"
        ] # Example target words
        self.stop_words = set(nlp.Defaults.stop_words)

    def preprocess_text(self, text):
        text = text.lower()
        doc = nlp(text)
        tokens = [
            token.lemma_ for token in doc
            if (token.lemma_ not in self.stop_words 
                and len(token.lemma_) > 2 
                and token.is_alpha)
        ]
        return tokens

    def find_cooccurrences(self, speeches, target_word, window_size=5):
        cooccurrences = Counter()

        for speech in speeches:
            tokens = self.preprocess_text(speech)
            target_indices = [
                i for i, token in enumerate(tokens)
                if token == target_word.lower() or target_word.lower() in token
            ]

            # Loop through each target word index and look for co-occurrences within the window
            for idx in target_indices:
                start = max(0, idx - window_size)
                end = min(len(tokens), idx + window_size + 1)
                window = tokens[start:end]

                for word in window:
                    if word != target_word.lower():
                        cooccurrences[word] += 1
        return cooccurrences

    def analyze_trump_speeches(self):
        # Load the Trump speeches data
        trump_speeches = pd.read_csv(self.trump_speeches_file)

        results = {}
        
        # Analyze Trump speeches for each target word
        for target in self.target_words:
            print(f"Analyzing co-occurrences for {target} in Trump's speeches...")
            results[target] = self.find_cooccurrences(
                trump_speeches['RawText'].tolist(), target
            )

        return results

    def generate_report(self, results):
        # This is a simple report generation method
        report = {}
        for target, cooccurrences in results.items():
            # Sort by the most common co-occurrences
            report[target] = cooccurrences.most_common(10)
        return report


# Run the analysis for Trump
def main_trump():
    analyzer = TrumpSpeechCooccurrenceAnalyzer('trump_speeches.csv')

    # Perform analysis
    trump_results = analyzer.analyze_trump_speeches()

    # Generate and save report
    trump_report = analyzer.generate_report(trump_results)
    print(trump_report)

if __name__ == "__main__":
    main_trump()

Analyzing co-occurrences for emergency in Trump's speeches...
Analyzing co-occurrences for threat in Trump's speeches...
Analyzing co-occurrences for disaster in Trump's speeches...
Analyzing co-occurrences for broken system in Trump's speeches...
Analyzing co-occurrences for danger in Trump's speeches...
Analyzing co-occurrences for urgency in Trump's speeches...
Analyzing co-occurrences for peril in Trump's speeches...
Analyzing co-occurrences for instability in Trump's speeches...
Analyzing co-occurrences for at risk in Trump's speeches...
Analyzing co-occurrences for system failure in Trump's speeches...
Analyzing co-occurrences for terror in Trump's speeches...
Analyzing co-occurrences for progress in Trump's speeches...
Analyzing co-occurrences for prosperity in Trump's speeches...
Analyzing co-occurrences for innovation in Trump's speeches...
Analyzing co-occurrences for future in Trump's speeches...
Analyzing co-occurrences for hope in Trump's speeches...
Analyzing co-occurrenc

In [19]:
import csv

# Example data structure
data = {'emergency': [('national', 143), ('declare', 98), ('federal', 75), ('act', 62), ('order', 61), ('include', 52), ('use', 48), ('president', 44), ('executive', 43), ('fema', 43)], 'threat': [('threaten', 204), ('security', 58), ('foreign', 57), ('national', 46), ('people', 46), ('terrorist', 45), ('citizen', 45), ('america', 41), ('united', 40), ('continue', 35)], 'disaster': [('know', 72), ('nafta', 64), ('country', 60), ('president', 60), ('state', 58), ('want', 57), ('trump', 52), ('major', 50), ('deal', 48), ('china', 43)], 'broken system': [], 'danger': [('dangerous', 343), ('people', 72), ('country', 61), ('know', 54), ('world', 42), ('criminal', 40), ('great', 38), ('city', 35), ('think', 31), ('thing', 31)], 'urgency': [('quickly', 2), ('ask', 2), ('choose', 2), ('greatness', 2), ('year', 2), ('administration', 2), ('historic', 2), ('speed', 2), ('confront', 2), ('problem', 2)], 'peril': [('perilous', 7), ('american', 5), ('new', 5), ('danger', 2), ('ignore', 2), ('grave', 2), ('broad', 2), ('bright', 2), ('star', 2), ('fight', 2)], 'instability': [('people', 2), ('public', 2), ('terrorize', 1), ('innocent', 1), ('community', 1), ('border', 1), ('spread', 1), ('hemisphere', 1), ('week', 1), ('ago', 1)], 'at risk': [], 'system failure': [], 'terror': [('terrorist', 397), ('world', 121), ('terrorism', 120), ('number', 78), ('applause', 73), ('kill', 70), ('soleimani', 66), ('radical', 61), ('american', 60), ('people', 58)], 'progress': [('lot', 80), ('tremendous', 53), ('thank', 51), ('great', 45), ('think', 38), ('year', 37), ('president', 36), ('know', 35), ('country', 26), ('applause', 23)], 'prosperity': [('security', 34), ('american', 32), ('nation', 30), ('record', 26), ('poverty', 25), ('return', 25), ('work', 24), ('peace', 23), ('economic', 22), ('future', 22)], 'innovation': [('american', 12), ('technology', 9), ('future', 6), ('act', 5), ('states', 5), ('administration', 5), ('governor', 5), ('like', 4), ('pro', 4), ('growth', 4)], 'future': [('great', 151), ('america', 98), ('people', 87), ('american', 85), ('applause', 82), ('country', 77), ('americans', 76), ('bright', 75), ('nation', 72), ('president', 64)], 'hope': [('hopefully', 328), ('great', 170), ('people', 152), ('know', 147), ('think', 127), ('president', 123), ('use', 113), ('right', 105), ('country', 100), ('want', 97)], 'advancement': [('american', 3), ('health', 3), ('education', 2), ('citizen', 2), ('incredible', 2), ('people', 2), ('disparity', 2), ('thrive', 2), ('educational', 2), ('come', 2)], 'vision': [('television', 333), ('provision', 235), ('division', 121), ('order', 98), ('great', 96), ('watch', 84), ('know', 80), ('president', 79), ('section', 70), ('people', 67)], 'american': [('americans', 2336), ('people', 963), ('great', 773), ('african', 768), ('applause', 758), ('country', 639), ('hispanic', 604), ('year', 544), ('president', 495), ('want', 489)], 'national': [('security', 286), ('president', 185), ('international', 181), ('emergency', 175), ('united', 149), ('guard', 142), ('states', 127), ('act', 124), ('applause', 123), ('year', 111)], 'pride': [('american', 79), ('heart', 54), ('man', 45), ('new', 43), ('time', 42), ('york', 39), ('houston', 37), ('stand', 36), ('proud', 35), ('detroit', 34)], 'proud': [('america', 409), ('great', 245), ('american', 239), ('thank', 222), ('applause', 210), ('proudly', 193), ('country', 183), ('know', 180), ('people', 142), ('president', 140)], 'constitution': [('president', 116), ('states', 111), ('united', 108), ('law', 105), ('constitutional', 100), ('authority', 84), ('america', 82), ('judge', 68), ('protect', 57), ('defend', 51)], 'constitutional': [('unconstitutional', 39), ('president', 38), ('authority', 27), ('provision', 24), ('right', 21), ('act', 20), ('law', 16), ('consistent', 14), ('treat', 14), ('include', 14)], 'liberty': [('religious', 153), ('right', 132), ('free', 85), ('defend', 82), ('speech', 77), ('bear', 76), ('arm', 65), ('life', 62), ('applause', 54), ('freedom', 52)], 'america': [('american', 6777), ('americans', 2422), ('great', 1980), ('applause', 1622), ('people', 1407), ('president', 1149), ('thank', 1049), ('country', 979), ('year', 915), ('african', 783)], 'justice': [('court', 241), ('supreme', 196), ('criminal', 181), ('reform', 168), ('great', 134), ('applause', 110), ('law', 94), ('department', 93), ('president', 91), ('thank', 88)], 'freedom': [('american', 105), ('applause', 90), ('people', 77), ('right', 76), ('stand', 70), ('great', 68), ('defend', 64), ('president', 59), ('america', 56), ('religious', 56)], 'values': [], 'inequality': [('tax', 15), ('income', 10), ('look', 6), ('republican', 4), ('wage', 3), ('year', 3), ('fall', 2), ('close', 2), ('million', 2), ('pay', 2)], 'equity': [('benefit', 43), ('substantive', 43), ('procedural', 43), ('enforceable', 43), ('law', 43), ('party', 43), ('united', 43), ('states', 43), ('department', 43), ('agency', 43)], 'integrity': [('election', 34), ('nation', 10), ('vote', 10), ('american', 9), ('goal', 8), ('voter', 8), ('ensure', 8), ('important', 8), ('system', 7), ('applause', 7)], 'equality': [('justice', 22), ('inequality', 20), ('tax', 15), ('freedom', 11), ('americans', 10), ('income', 10), ('party', 10), ('fight', 8), ('opportunity', 7), ('country', 7)], 'compassion': [('people', 17), ('compassionate', 15), ('thank', 13), ('want', 13), ('love', 10), ('woman', 8), ('american', 8), ('life', 8), ('care', 7), ('president', 7)], 'civil liberties': [], 'civil rights': [], 'jobs': [('big', 2), ('care', 1), ('system', 1), ('displace', 1), ('million', 1), ('american', 1), ('jobseeker', 1), ('biden', 1), ('plan', 1), ('institute', 1)], 'cost': [('year', 87), ('drug', 83), ('know', 80), ('billion', 69), ('people', 60), ('dollar', 54), ('time', 53), ('want', 51), ('million', 50), ('money', 46)], 'taxes': [], 'cost-effective': [], 'rising costs': [], 'inflation': [('low', 6), ('percent', 5), ('rate', 5), ('number', 5), ('growth', 5), ('dollar', 5), ('year', 5), ('leave', 4), ('interest', 4), ('applause', 4)], 'wealth': [('america', 288), ('wealthy', 235), ('country', 103), ('commonwealth', 79), ('strong', 73), ('people', 62), ('pennsylvania', 56), ('want', 46), ('come', 45), ('proud', 44)], 'financial': [('financially', 30), ('support', 18), ('provide', 16), ('think', 14), ('service', 14), ('institution', 12), ('assistance', 11), ('country', 10), ('president', 10), ('want', 10)], 'business': [('small', 293), ('great', 179), ('know', 174), ('people', 174), ('want', 118), ('job', 112), ('big', 102), ('lot', 97), ('country', 93), ('think', 91)], 'unemployment': [('low', 290), ('american', 267), ('rate', 211), ('history', 153), ('hispanic', 130), ('number', 127), ('african', 126), ('good', 119), ('reach', 118), ('asian', 107)], 'tax': [('taxis', 765), ('cut', 649), ('raise', 313), ('want', 312), ('regulation', 214), ('country', 167), ('big', 167), ('history', 160), ('year', 150), ('know', 132)], 'economy': [('great', 335), ('good', 187), ('world', 185), ('history', 171), ('country', 157), ('job', 125), ('people', 115), ('strong', 113), ('president', 113), ('build', 112)], 'economic': [('year', 204), ('country', 151), ('great', 136), ('economically', 78), ('president', 69), ('world', 68), ('history', 68), ('job', 52), ('good', 52), ('american', 52)], 'green new deal': [], 'climate': [('paris', 63), ('accord', 48), ('change', 20), ('country', 16), ('want', 14), ('president', 13), ('sided', 11), ('disaster', 11), ('applause', 10), ('job', 9)], 'emission': [('carbon', 16), ('zero', 16), ('net', 10), ('year', 8), ('mandate', 7), ('country', 6), ('think', 5), ('reduce', 5), ('want', 4), ('energy', 4)], 'health': [('healthcare', 506), ('care', 408), ('people', 196), ('want', 177), ('free', 154), ('million', 136), ('service', 125), ('public', 117), ('great', 114), ('insurance', 113)], 'health care': [], 'security': [('national', 279), ('social', 238), ('border', 216), ('want', 154), ('protect', 143), ('homeland', 124), ('united', 121), ('president', 117), ('states', 107), ('medicare', 107)], 'borders': [], 'immigration': [('illegal', 100), ('system', 91), ('border', 57), ('law', 48), ('want', 45), ('people', 43), ('country', 42), ('american', 42), ('plan', 40), ('safe', 27)], 'asylum': [('country', 12), ('people', 9), ('loophole', 9), ('release', 8), ('applause', 8), ('fraud', 8), ('end', 7), ('claim', 7), ('catch', 7), ('member', 7)], 'deportation': [('country', 5), ('want', 5), ('border', 5), ('criminal', 3), ('release', 3), ('city', 3), ('crazy', 3), ('bernie', 3), ('detention', 3), ('biden', 3)], 'undocumented': [('immigrant', 5), ('crime', 4), ('crossing', 3), ('low', 3), ('commit', 3), ('president', 2), ('level', 2), ('massive', 2), ('number', 2), ('criminal', 2)], 'fencing': [('wall', 6), ('new', 3), ('outside', 3), ('know', 2), ('especially', 2), ('powerful', 2), ('money', 1), ('properly', 1), ('lot', 1), ('brand', 1)], 'detention center': [], 'border': [('want', 520), ('open', 356), ('wall', 325), ('country', 277), ('southern', 273), ('patrol', 252), ('people', 245), ('come', 214), ('security', 213), ('secure', 200)], 'military': [('great', 273), ('rebuild', 211), ('know', 172), ('united', 166), ('states', 160), ('year', 156), ('people', 153), ('applause', 151), ('want', 150), ('country', 148)], 'criminal': [('alien', 187), ('justice', 176), ('reform', 160), ('country', 94), ('release', 91), ('people', 88), ('year', 80), ('want', 78), ('violent', 70), ('sanctuary', 70)], 'armssupreme court': [], 'senate': [('house', 113), ('thank', 56), ('great', 53), ('president', 44), ('vote', 44), ('people', 43), ('run', 40), ('win', 40), ('think', 36), ('know', 36)], 'bill': [('billion', 3278), ('dollar', 783), ('year', 621), ('china', 493), ('pay', 391), ('know', 307), ('farmer', 302), ('want', 260), ('deal', 241), ('money', 221)]}

# File path to save the CSV
output_file = 'trump_keywords.csv'

# Write the data to a CSV file
with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write the header
    writer.writerow(['Category', 'Word', 'Frequency'])
    # Write the data
    for category, words in data.items():
        for word, frequency in words:
            writer.writerow([category, word, frequency])

print(f"Keywords successfully saved to {output_file}.")

Keywords successfully saved to trump_keywords.csv.
