In [3]:
import datetime
import random
import requests
import time
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from collections import defaultdict
import pandas as pd
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
import spacy
from collections import Counter
from string import punctuation

from pandas import json_normalize

## Scrape Data from Fox News and CNN

Below we create a function that scrapes the articles of the day for both Fox News and CNN and creates a pandas data frame using the content that is pulled from the articles. This function pulls the daily articles, and we ran it every day for five consecutive weekdays to get a full business week's worth of data for our topic modeling. This function is here for purposes of showing our methods, but we will ultimately construct the data frame to be cleaned in the cell beneath it by concatenating the five CSV files that were pulled. 

In [4]:
def return_text_if_not_none(element):
    return element.get_text(separator=' ', strip=True) if element else None

current_year = datetime.datetime.now().year

source = {'cnn': "https://www.cnn.com/politics",
          'foxnews': "https://www.foxnews.com/politics"}

news_pages = defaultdict(list)  

for source_name, source_page in source.items():
    
    # request the page and sleep
    r = requests.get(source_page)
    
    time.sleep(5 + 10 * random.random())
    
    soup = BeautifulSoup(r.content, 'html.parser')
    
    links = soup.find_all('a', href=True)
    
    for link in links:
        
        href = link['href']
        # Convert relative URLs to absolute URLs
        full_url = urljoin(source_page, href)
        
        # Check if the link contains "/politics/" and does not contain "/gallery/"
        if "/politics/" in full_url and "/gallery/" not in full_url:
            
            # Check if it's CNN and the URL has the format 'cnn.com/{}/'
            if source_name == 'cnn' and f"cnn.com/{current_year}/" in full_url:
                
                # Fetch the news content
                content_r = requests.get(full_url)
                
                content_soup = BeautifulSoup(content_r.content, 'html.parser')
                
                article_content = return_text_if_not_none(content_soup.find('div', {'class': 'article__content'}))
                
                news_pages[source_name].append({'url': full_url, 'content': article_content})
                
            # Check if it's FOXNEWS and the URL does not contain "/category/"
            elif source_name == 'foxnews' and "/category/" not in full_url:
                
                # Fetch the news content
                content_r = requests.get(full_url)
                
                content_soup = BeautifulSoup(content_r.content, 'html.parser')
                
                article_content = return_text_if_not_none(content_soup.find('div', {'class': 'article-content'}))
                
                news_pages[source_name].append({'url': full_url, 'content': article_content})
# Create a DataFrame

sample_data = pd.DataFrame([(source_name, item['url'], item['content']) for source_name, items in 
                   news_pages.items() for item in items], columns=['source', 'url', 'content'])

sample_data = sample_data.drop_duplicates()


Now that we have seen how we pulled the data, let us dig into the data that we pulled from 02/12/2024 through 02/16/2024 

In [5]:
# File paths
files = ['MSADS509_News_Project_Dataset/news_0212.csv', 'MSADS509_News_Project_Dataset/news_0213.csv', 
         'MSADS509_News_Project_Dataset/news_0214.csv', 'MSADS509_News_Project_Dataset/news_0215.csv',
        'MSADS509_News_Project_Dataset/news_0216.csv']  # Add paths to your files

# Read each CSV file into a DataFrame and store them in a list
file_dfs = [pd.read_csv(file) for file in files]

# Concatenate all DataFrames into one
df = pd.concat(file_dfs, ignore_index=True)


### Checking the Results of the Web Scraping

Confirming that the CNN content was scraped successfully

In [6]:
df[df['source']=='cnn'].head()

Unnamed: 0,source,url,content
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-bro...,CNN — Chairman of the Joint Chiefs of Staff Ge...
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,CNN — Former President Donald Trump has endors...
2,cnn,https://www.cnn.com/2024/02/12/politics/senate...,The Senate is inching closer to final passage ...
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens...,Washington CNN — President Joe Biden and King ...
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,CNN — Former President Donald Trump on Monday ...


Confirming that the Fox News content was scraped successfully

In [7]:
df[df['source']=='foxnews'].head()

Unnamed: 0,source,url,content
47,foxnews,https://www.foxnews.com/politics/biden-takes-j...,close Video Biden takes jab at special counsel...
48,foxnews,https://www.foxnews.com/politics/rfk-jr-apolog...,close Video RFK Jr. drops surprise campaign ad...
49,foxnews,https://www.foxnews.com/politics/bidens-upcomi...,close Video Biden won't take cognitive test in...
50,foxnews,https://www.foxnews.com/politics/kamala-harris...,close Video Marc Thiessen questions whether Bi...
51,foxnews,https://www.foxnews.com/politics/climate-activ...,close Video Biden’s export suspension on lique...


Let's take a full look at one of the rows for both CNN and Fox to see if there are any obvious steps that stand out that we want to clean up.

In [8]:
# Set pandas to display the full content of a column
# We will do this only temporarily and reset it after
# testing one column
pd.set_option('display.max_colwidth', None)

In [9]:
# Display one row from CNN
print("CNN Article Content:")
print(df[df['source'] == 'cnn'].iloc[0])

CNN Article Content:
source                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             

In [10]:
# Display one row from CNN
print("Fox Article Content:")
print(df[df['source'] == 'foxnews'].iloc[0])

Fox Article Content:
source                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             

In [11]:
# Reset the columns so that we go back to truncating the "content" column
pd.reset_option('display.max_colwidth')

## Data Cleaning, Tokenizing, and Normalizing

### Removing Unwanted Prefixes

We see from looking at the first 5 rows of the CNN and Fox records that the content of the articles starts with "CNN --" or "(city name) CNN" for CNN and "close Video" for Fox. Since this is noise in our attempt to topic model, we will remove this part of the content body using the function below.

In [12]:
# Function to remove prefixes

def remove_prefix(row):
    # Pattern to match "CNN —" if it includes a city's name before it
    cnn_pattern = r'^(?:[\w\s]+\s)?CNN — '
    
    # For CNN, remove pattern if it matches
    if row['source'] == 'cnn':
        return re.sub(cnn_pattern, '', row['content'])
    
    # For Fox News, remove "close Video " prefix
    elif row['source'] == 'foxnews' and row['content'].startswith('close Video '):
        return row['content'][12:]
    
    # Return original content if no prefix to remove
    else:
        return row['content']

df['content'] = df.apply(remove_prefix, axis=1)

In [13]:
# Checking to see how CNN looks without the prefixes
df[df['source']=='cnn']

Unnamed: 0,source,url,content
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-bro...,Chairman of the Joint Chiefs of Staff Gen. CQ ...
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Former President Donald Trump has endorsed Nor...
2,cnn,https://www.cnn.com/2024/02/12/politics/senate...,The Senate is inching closer to final passage ...
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens...,President Joe Biden and King Abdullah II of Jo...
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Former President Donald Trump on Monday asked ...
...,...,...,...
328,cnn,https://www.cnn.com/2024/02/15/politics/emhoff...,Second gentleman Doug Emhoff slipped into New ...
329,cnn,https://www.cnn.com/2024/02/15/politics/trump-...,Former President Donald Trump made his final p...
330,cnn,https://www.cnn.com/2024/02/15/politics/white-...,The White House wrote to Attorney General Merr...
331,cnn,https://www.cnn.com/2024/02/15/politics/mike-t...,House Intelligence Chairman Mike Turner is fac...


In [14]:
# Checking to see how Fox looks without the prefixes
df[df['source']=='foxnews']

Unnamed: 0,source,url,content
47,foxnews,https://www.foxnews.com/politics/biden-takes-j...,Biden takes jab at special counsel report with...
48,foxnews,https://www.foxnews.com/politics/rfk-jr-apolog...,RFK Jr. drops surprise campaign ad during Supe...
49,foxnews,https://www.foxnews.com/politics/bidens-upcomi...,Biden won't take cognitive test in physical ex...
50,foxnews,https://www.foxnews.com/politics/kamala-harris...,Marc Thiessen questions whether Biden is capab...
51,foxnews,https://www.foxnews.com/politics/climate-activ...,Biden’s export suspension on liquefied natural...
...,...,...,...
348,foxnews,https://www.foxnews.com/politics/fox-news-poli...,Welcome to Fox News’ Politics newsletter with ...
349,foxnews,https://www.foxnews.com/politics/fox-news-poli...,Welcome to Fox News’ Politics newsletter with ...
350,foxnews,https://www.foxnews.com/politics/fox-news-poli...,Welcome to Fox News’ Politics newsletter with ...
351,foxnews,https://www.foxnews.com/politics/democrats-win...,Dems flipping NY House seat threatens GOP majo...


### Remove Unwanted First Sentences

We see that some of the CNN articles begin with the following sentences: "A version of this story appeared in CNN’s What Matters newsletter. To get it in your inbox, sign up for free here." We also see that some of the Fox articles begin with the phrase "Welcome to Fox News" in the first sentence. To remove this noise, we will write a function below that handles it.

In [15]:
def remove_first_sentence(row):
    # Split the content into sentences based on '.', '?', and '!'
    sentences = re.split(r'(?<=[.!?]) +', row['content'])
    
    # Initialize updated_content with the original content in case none of the conditions apply
    updated_content = row['content']
    
    if len(sentences) > 1:  # Check if there's more than one sentence
        first_sentence = sentences[0]  # Get the first sentence
        
        if 'Welcome to Fox News' in first_sentence:
            # Join all sentences except the first one. We start at 2 because the word "D.C." as in "Washington D.C."
            updated_content = ' '.join(sentences[2:])
            
        elif 'A version of this story appeared' in first_sentence:
            # Removing 2 sentences since CNN includes 2 unwanted sentences in this instance.
            updated_content = ' '.join(sentences[2:])
            
        elif first_sentence.strip().startswith("What's Happening?"):
            # Directly check if the first sentence is exactly "What's Happening?" and remove it if so
            updated_content = ' '.join(sentences[1:])

    # Remove sentences containing the phrase 'CLICK HERE TO GET THE FOX NEWS APP'
    updated_sentences = [sentence for sentence in sentences if 'FOX NEWS APP' not in sentence]
    updated_sentences = [sentence for sentence in sentences if 'Foxnews.com' not in sentence]
    updated_sentences = [sentence for sentence in sentences if 'Getty Images' not in sentence]
    updated_sentences = [sentence for sentence in sentences if 'CLICK HERE TO GET THE FOX NEWS APP' not in sentence]

    # Join the updated sentences back into content
    updated_content = ' '.join(updated_sentences)
            
    return updated_content

df['content'] = df.apply(remove_first_sentence, axis=1)

Let's check to see how our content looks now without the unwanted first and second sentences found in some of the articles.

In [16]:
# Checking to see how CNN looks without the unwanted first sentences.
df[df['source']=='cnn']

Unnamed: 0,source,url,content
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-bro...,Chairman of the Joint Chiefs of Staff Gen. CQ ...
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Former President Donald Trump has endorsed Nor...
2,cnn,https://www.cnn.com/2024/02/12/politics/senate...,The Senate is inching closer to final passage ...
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens...,President Joe Biden and King Abdullah II of Jo...
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Former President Donald Trump on Monday asked ...
...,...,...,...
328,cnn,https://www.cnn.com/2024/02/15/politics/emhoff...,Second gentleman Doug Emhoff slipped into New ...
329,cnn,https://www.cnn.com/2024/02/15/politics/trump-...,Former President Donald Trump made his final p...
330,cnn,https://www.cnn.com/2024/02/15/politics/white-...,The White House wrote to Attorney General Merr...
331,cnn,https://www.cnn.com/2024/02/15/politics/mike-t...,House Intelligence Chairman Mike Turner is fac...


In [17]:
# Checking to see how Fox looks without the unwanted first sentences.
df[df['source']=='foxnews']

Unnamed: 0,source,url,content
47,foxnews,https://www.foxnews.com/politics/biden-takes-j...,Biden takes jab at special counsel report with...
48,foxnews,https://www.foxnews.com/politics/rfk-jr-apolog...,RFK Jr. drops surprise campaign ad during Supe...
49,foxnews,https://www.foxnews.com/politics/bidens-upcomi...,Biden won't take cognitive test in physical ex...
50,foxnews,https://www.foxnews.com/politics/kamala-harris...,Marc Thiessen questions whether Biden is capab...
51,foxnews,https://www.foxnews.com/politics/climate-activ...,Biden’s export suspension on liquefied natural...
...,...,...,...
348,foxnews,https://www.foxnews.com/politics/fox-news-poli...,Welcome to Fox News’ Politics newsletter with ...
349,foxnews,https://www.foxnews.com/politics/fox-news-poli...,Welcome to Fox News’ Politics newsletter with ...
350,foxnews,https://www.foxnews.com/politics/fox-news-poli...,Welcome to Fox News’ Politics newsletter with ...
351,foxnews,https://www.foxnews.com/politics/democrats-win...,Dems flipping NY House seat threatens GOP majo...


We see that after removing some of the first sentence filler in the CNN content, we have more instances of "CNN --" to begin the content. We will run the same function again to remove this. 

In [18]:
# Running the remove_prefix function again
df['content'] = df.apply(remove_prefix, axis=1)

In [19]:
# Checking to see that we got rid of the "CNN --" prefixes again
df[df['source']=='cnn']

Unnamed: 0,source,url,content
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-bro...,Chairman of the Joint Chiefs of Staff Gen. CQ ...
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Former President Donald Trump has endorsed Nor...
2,cnn,https://www.cnn.com/2024/02/12/politics/senate...,The Senate is inching closer to final passage ...
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens...,President Joe Biden and King Abdullah II of Jo...
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Former President Donald Trump on Monday asked ...
...,...,...,...
328,cnn,https://www.cnn.com/2024/02/15/politics/emhoff...,Second gentleman Doug Emhoff slipped into New ...
329,cnn,https://www.cnn.com/2024/02/15/politics/trump-...,Former President Donald Trump made his final p...
330,cnn,https://www.cnn.com/2024/02/15/politics/white-...,The White House wrote to Attorney General Merr...
331,cnn,https://www.cnn.com/2024/02/15/politics/mike-t...,House Intelligence Chairman Mike Turner is fac...


Next, we look at the end of the articles as the content will often end with contributing author information or other material that is not relevant to the topic of the body content. We show the dataframe ending previews and then write a function to remove last sentences if they contain information that is not relevant. 

In [20]:
# Set pandas to display the full content of a column
pd.set_option('display.max_colwidth', None)

In [21]:
# Create a new column 'content_end_preview' to show the last part of the content
df['content_end_preview'] = df['content'].apply(lambda x: x[-500:])

In [22]:
# Checking the end of CNN articles
df[['source', 'url', 'content_end_preview']] [df['source'] == 'cnn']

Unnamed: 0,source,url,content_end_preview
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-brown-nato-trump/index.html,"eir objectives,” the chairman said. “At the same time, not looking for a broader conflict with the United States.” There have been at least 170 attacks on US and coalition forces in Iraq, Syria and Jordan since October 17. The Pentagon said Monday that those attacks have resulted in 186 wounded or killed in action — including 130 traumatic brain injuries. Three US soldiers were killed in a drone attack in January on a US outpost in Jordan. This story has been updated with additional information."
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-endorse-michael-whatley-lara-trump-rnc/index.html,"great job in his home state of North Carolina, and is committed to election integrity, which we must have to keep fraud out of our election so it can’t be stolen,” Trump said in a statement. “My very talented daughter-in-law, Lara Trump, has agreed to run as the RNC Co-Chair. Lara is an extremely talented communicator and is dedicated to all that MAGA stands for. She has told me she wants to accept this challenge and would be GREAT!” he also said. This is a developing story and will be updated."
2,cnn,https://www.cnn.com/2024/02/12/politics/senate-foreign-aid-bill-ukraine/index.html,"y be part of the bill, but went on to reject the bipartisan deal amid forceful attacks on the measure by Trump and top House Republicans. Over the weekend, Trump also wrote on Truth Social that the US should stop providing foreign aid unless it is structured as a loan, another sign of the political pressure Republicans continue to face amid efforts to send funding to US allies. This story and headline have been updated with additional developments. CNN’s Kate Sullivan contributed to this report."
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens-meeting-with-jordanian-king-comes-at-flashpoint-in-israel-hamas-war/index.html,"ions toward an agreement would continue despite the Israeli prime minister’s comments, which Blinken said were referencing the “absolute non-starters” in the proposal. The full Hamas response proposes three phases, each lasting 45 days, including the withdrawal of Israeli troops from Gaza, a massive humanitarian effort, and freedom of movement for people throughout Gaza, according to a copy obtained by CNN. CNN’s MJ Lee, Priscilla Alvarez, Betsy Klein and Kevin Liptak contributed to this report."
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-supreme-court-immunity-filing/index.html,"nist ban.” The court may have to decide how it wants to handle the former president’s immunity claim at the same time it is drafting an opinion in the ballot case. Together, the cases have thrust the court into the middle of this year’s presidential election in a way it has largely managed to avoid since its decision in Bush v. Gore effectively decided the 2000 election between former President George W. Bush and former Vice President Al Gore. This story has been updated with additional details."
...,...,...,...
328,cnn,https://www.cnn.com/2024/02/15/politics/emhoff-meeting-jewish-arab-youth-gaza/index.html,"out the need to keep building coalitions and executing on the national strategy to combat antisemitism and the coming strategy on combating Islamophobia, the person in the room said. An aide to the US mission to the United Nations explained that the names of the participants were kept private to protect sensitive discussions. “Obviously everyone didn’t agree on policy, but everyone agreed on the need to combat hate and to work against dehumanization,” said another person familiar with the event."
329,cnn,https://www.cnn.com/2024/02/15/politics/trump-makes-final-pitch-to-supreme-court-in-fraught-immunity-case/index.html,"preme Court. Two days later – ahead of deadline – Smith argued in his own brief that Trump had not met the standard to pause proceedings in his case. It generally takes support from five justices to secure such a pause. “The charged crimes strike at the heart of our democracy,” Smith wrote in a filing Wednesday. “The public interest in a prompt trial is at its zenith where, as here, a former president is charged with conspiring to subvert the electoral process so that he could remain in office.”"
330,cnn,https://www.cnn.com/2024/02/15/politics/white-house-letter-special-counsel/index.html,"released, Bauer and Richard Sauber, special counsel to the president, responded to Weinsheimer – to once again disagree with the DOJ’s assessment that Hur’s report was consistent with the department’s policy and practice. In this letter, the lawyers cited – among others – former Attorney General Eric Holder, who said that the Hur report “contains way too many gratuitous remarks and is flatly inconsistent with long standing DOJ traditions.” This story has been updated with additional information."
331,cnn,https://www.cnn.com/2024/02/15/politics/mike-turner-republican-reaction/index.html,"that it should be declassified and made public. One Democratic member with deep national security experience said Wednesday that they had never before received that kind of urgent summons over a national security matter during their time in Congress — and that the intelligence they saw when they arrived was not urgent enough to justify Turner’s comments. CNN’s Lauren Fox, Morgan Rimmer, Manu Raju and Sam Fossum contributed to this report. This story has been updated with additional developments."


In [23]:
# Checking the end of Fox News articles
df[['source', 'url', 'content_end_preview']] [df['source'] == 'foxnews']

Unnamed: 0,source,url,content_end_preview
47,foxnews,https://www.foxnews.com/politics/biden-takes-jab-hur-report-joke-memory-returns-speech-one-more-thing-forgot,"ogress."" CLICK TO GET THE FOX NEWS APP ""The recent Washington Post headline summed it up,"" Biden added, quoting the newspaper's story titled, ""Falling Inflation and Rising Growth Give the U.S. the World's Best Recovery."" ""The world's best recovery!"" Biden said. ""It's because you implemented what we did. You made it work."" Danielle Wallace is a reporter for Fox News Digital covering politics, crime, police and more. Story tips can be sent to danielle.wallace@fox.com and on Twitter: @danimwallace."
48,foxnews,https://www.foxnews.com/politics/rfk-jr-apologizes-family-super-bowl-ad-claims-no-involvement,"icks to stop him. The public sees through it all and won’t stand for it."" Kennedy initially sought to challenge President Biden in the 2024 Democratic presidential primary, but the DNC said it would not hold primary debates and stood behind the incumbent president. Fox News' Bradford Betz contributed to this report. Anders Hagstrom is a reporter with Fox News Digital covering national politics and major breaking news events. Send tips to Anders.Hagstrom@Fox.com, or on Twitter: @Hagstrom_Anders."
49,foxnews,https://www.foxnews.com/politics/bidens-upcoming-physical-exam-will-not-include-cognitive-test-white-house-says,"has been my experience with this president,"" she said. Biden's age is a major concern among U.S. voters, 86% of whom say he is too old to serve a second term, according to an ABC poll. A Sunday poll from ABC/Ipsos found that 86% of Americans believe Biden is too old to serve another term, including 73% of Democrats. Anders Hagstrom is a reporter with Fox News Digital covering national politics and major breaking news events. Send tips to Anders.Hagstrom@Fox.com, or on Twitter: @Hagstrom_Anders."
50,foxnews,https://www.foxnews.com/politics/kamala-harris-ready-serve-democrats-sound-alarm-about-bidens-age,"s crying and wet the bed,"" Begala quipped on CNN last Friday. ""This is terrible for Democrats. And anybody with a functioning brain knows that,"" he declared. GOP CAMPAIGN ARM LAUNCHES MEDIA BLITZ AGAINST DEMS WHO OPPOSED VIOLENT CRIME BILL AS CRISIS IN DC SPIRALS Then-Democrat presidential candidate Hillary Clinton makes a concession speech after being defeated by Donald Trump in New York on November 9, 2016. Brandon Gillespie is an associate editor at Fox News. Follow him on X at @BGillespieAL."
51,foxnews,https://www.foxnews.com/politics/climate-activists-arrested-shutting-down-biden-campaign-hq,"power plant electricity generation, push electric vehicles and incentivize the electrification of the residential sector. ""I mean, it literally is the existential threat. It’s even more consequential than nuclear power, nuclear war,"" he added. ""That would be horrible and awful, and it would just make the environment incredibly worse. But it’s about the environment."" The Biden campaign didn't immediately respond to a request for comment. Thomas Catenacci is a politics writer for Fox News Digital."
...,...,...,...
348,foxnews,https://www.foxnews.com/politics/fox-news-politics-trump-vows-appeal,"run for president, serve as Manchin's VP ...Read more 'COMMONSENSE CONSERVATIVE': Former special forces soldier lands big endorsement in race to flip House seat ...Read more 'RACE OF HIS LIFE' : Dem Sen blasts GOP for not caring about immigration; record comes back to haunt him ...Read more Subscribe now to get Fox News Politics newsletter in your inbox. Get the latest updates from the 2024 campaign trail, exclusive interviews and more on FoxNews.com . This article was written by Fox News staff."
349,foxnews,https://www.foxnews.com/politics/fox-news-politics-judge-and-fury,"for more oversight of Department of Archives and History …Read more 'ESTABLISH THE RECORD': Testimony to begin on possible da Fani Willis disqualification …Read more TRUMP IN COURT: Trump to appear in New York City court for hearing in Manhattan District Attorney Alvin Bragg case …Read more Subscribe now to get Fox News Politics newsletter in your inbox. Get the latest updates from the 2024 campaign trail, exclusive interviews and more on FoxNews.com . This article was written by Fox News staff."
350,foxnews,https://www.foxnews.com/politics/fox-news-politics-borderline-impeachment,"ld Trump …Read more 'INCOMPETENT': New poll reveals the top 2 issues negatively affecting voter confidence in Biden …Read more Across America PENCE HITS BACK: Former VP Mike Pence's policy think tank pushed back on Vance claim that foreign aid has a hidden 'impeachment time bomb' …Read more Subscribe now to get Fox News Politics newsletter in your inbox. Get the latest updates from the 2024 campaign trail, exclusive interviews and more on FoxNews.com . This article was written by Fox News staff."
351,foxnews,https://www.foxnews.com/politics/democrats-win-seat-republicans-win-impeachment-two-presidents-clash-over-nato,"of the president’s mental acuity. And everyone is getting sustained exposure to a system that generally favors political maneuvering over actual results. Get the latest updates from the 2024 campaign trail, exclusive interviews and more at our Fox News Digital election hub. Howard Kurtz is the host of FOX News Channel's MediaBuzz (Sundays 11 a.m.-12 p.m. ET). Based in Washington, D.C., he joined the network in July 2013 and regularly appears on Special Report with Bret Baier and other programs."


We see that indeed some articles end with information about the authors or otherwise irrelevant information. Below is our function to handle some of the instances.

In [24]:
def remove_last_sentence(row):
    sentences = row['content'].split('. ')
    
    if len(sentences) > 1:  # Check if there's more than one sentence
        remove_sentence = '. '.join(sentences[-1:])  
        if 'This story has been updated with additional information' in remove_sentence \
                or 'follow him on' in remove_sentence \
                or 'Follow him on' in remove_sentence \
                or '@fox.com' in remove_sentence \
                or 'Fox News Politics newsletter' in remove_sentence \
                or 'follow him on' in remove_sentence \
                or 'Follow him on' in remove_sentence \
                or '@fox.com' in remove_sentence \
                or '@Fox.com' in remove_sentence \
                or 'FoxNews.com ' in remove_sentence \
                or 'Fox News Digital' in remove_sentence \
                or 'contributed to this' in remove_sentence \
                or 'will be updated' in remove_sentence \
                or 'have been updated' in remove_sentence \
                or 'APP Fox News' in remove_sentence \
                or 'Fox News' in remove_sentence \
                or 'FoxNews.com ' in remove_sentence \
                or '@Fox.com' in remove_sentence \
                or 'Fox News Digital' in remove_sentence \
                or 'Fox News Channel and FOX Business' in remove_sentence \
                or 'Fox News Politics newsletter' in remove_sentence \
                or 'email' in remove_sentence:
            updated_content = '. '.join(sentences[:-1])  # Join all sentences except the last one
            return updated_content
    return row['content']

# Apply the function to the DataFrame
df['content'] = df.apply(remove_last_sentence, axis=1)
df['content_end_preview'] = df.apply(remove_last_sentence, axis=1)

In [25]:
# Checking the end of Fox News articles after we run our function
df[['source', 'url', 'content_end_preview']] [df['source'] == 'foxnews']

Unnamed: 0,source,url,content_end_preview
47,foxnews,https://www.foxnews.com/politics/biden-takes-jab-hur-report-joke-memory-returns-speech-one-more-thing-forgot,"Biden takes jab at special counsel report with joke about his memory President Biden on Monday joked about his memory and age during a speech to the National Association of Counties in Washington, D.C. Join Fox News for access to this content Plus get unlimited access to thousands of articles, videos and more with your free account! Please enter a valid email address. By entering your email, you are agreeing to Fox News Terms of Service and Privacy Policy , which includes our Notice of Financial Incentive . To access the content, check your email and follow the instructions provided. President Biden attempted a joke about his memory during a speech in Washington, D.C., Monday, seemingly taking a jab at Special Counsel Robert Hur's report. Delivering remarks at the National Association of Counties Legislative Conference, Biden spoke about his bipartisan infrastructure law, which he credits for allowing his administration to continue ""making the biggest investment in climate change ever anywhere in the entire world."" ""After devastating floods, tornadoes, wildfires and hurricane, we're going to keep working together to respond, to rebuild and boost resilience to extreme weather. My administration is also helping install rooftop solar to build a national network of electric vehicle charging stations for revitalizing fenceline communities smothered by the legacy of pollution like where I lived in Claymont,"" Biden said, referring to where his family moved in Delaware during the early 1950s. ""We're promoting clean energy in industries of the future made here in America. Made in America,"" he said while transitioning. BIDEN ALLIES GO ON DEFENSE BLITZ FOLLOWING HUR REPORT: 'BUCKET OF BS' ""What I didn't realize, and I've been around, I know it don't look like it, but I've been around a while. I do remember that,"" Biden said, garnering laughter and applause. President Biden gave a speech to the National Association of Counties Legislative Conference, Monday, Feb. 12, 2024, in Washington. (AP Photo/Evan Vucci) The joke was somewhat undercut by Biden concluding the speech – and then adding an interjection at the end, admitting, ""I forgot something,"" before making a final comment. In building his argument for why no charges were recommended following an investigation into Biden's mishandling of classified documents, Hur, who was appointed by Attorney General Merrick Garland, detailed in part that Biden's defense of any potential charges could possibly be that, ""Mr. Biden would likely present himself to a jury, as he did during our interview of him, as a sympathetic, well-meaning, elderly man with a poor memory."" The report cited examples when investigators said the president's memory lapsed, including over when his older son Beau had died. Biden's age and mental fitness have already been a concern for voters. President Biden after delivering remarks to the National Association of Counties Legislative Conference, Monday, Feb. 12, 2024, in Washington. (AP Photo/Evan Vucci) MAYORKAS DUCKS RESPONSIBILITY ON BORDER CRISIS, MIGRANT FIGURES: 'CONGRESS IS THE ONLY ONE WHO CAN FIX THIS' During his speech, Biden also criticized his 2024 rival, former President Trump, and Republicans for opposing a $118 billion supplemental spending agreement that included aid for Ukraine, Israel and Taiwan, as well as an ambitious border security and immigration package. The border package drew widespread opposition from conservative Republicans in both chambers since its release just days earlier. The Senate voted against the supplemental 50-49 Wednesday. It needed 60 votes to pass. The vote went mostly along party lines, except for five Democrats voting no and four Republicans voting yes. ""Some of my extreme Republican friends – and by the way, this is not your father's Republican Party … I'm not taking on all Republicans. I really mean it. The MAGA Republicans, a minority, but a powerful minority. They went out and they killed the deal. My predecessor said he didn't like it. It was a loss for him. We have to end the political games, folks,"" Biden said Monday. President Biden, left, greets NatCo president and commissioner in Ramsey County, Minnesota, Mary Jo McGuire, before he delivers remarks to the National Association of Counties Legislative Conference, Monday, Feb. 12, 2024. (AP Photo/Evan Vucci) The president also claimed a victory for the economy. ""It's clear we have the strongest economy in the world. Nearly 15 million new jobs since I came to office,"" Biden said. ""The longest stretch of under 4% in 50 years. Growth is strong. Rising wages are rising, inflation is down. In fact, the costs have fallen from everything from a gallon of gas to a gallon of milk. We know prices are still too high because of what I call greed-inflation and shrinkflation,"" he said, referring to companies charging the same amount for a product while reducing quantity. ""I'm calling on corporations to pass their savings on to consumers, for God's sake. We're making real progress."" CLICK TO GET THE FOX NEWS APP ""The recent Washington Post headline summed it up,"" Biden added, quoting the newspaper's story titled, ""Falling Inflation and Rising Growth Give the U.S. the World's Best Recovery."" ""The world's best recovery!"" Biden said. ""It's because you implemented what we did"
48,foxnews,https://www.foxnews.com/politics/rfk-jr-apologizes-family-super-bowl-ad-claims-no-involvement,"RFK Jr. drops surprise campaign ad during Super Bowl The ad elicited mixed reactions from viewers and sent 'RFK Jr."" skyrocketing on Google Trends. (Credit: Kennedy 2024) Robert F. Kennedy Jr. apologized to members of his family for a surprise Super Bowl ad that heavily featured his connection to his uncle, former President John F. Kennedy. RFK Jr. argued that the group responsible for the ad, the American Values Super PAC, did not consult with him or his campaign when making the ad. RFK Jr., who is running for president as an independent, nevertheless pinned the video to the top of his profile on X. RFK Jr.'s cousin, Bobby Shriver, first complained about the ad in a post Sunday night, writing that his uncles and mother would never have approved of RFK Jr.'s ""deadly health care views."" ""My cousin’s Super Bowl ad used our uncle’s faces- and my Mother’s. She would be appalled by his deadly health care views. Respect for science, vaccines, & health care equity were in her DNA. She strongly supported my health care work … which he opposes,"" Shriver wrote. BIDEN LASHES OUT AT REPORTERS ASKING ABOUT AGE CONCERNS AFTER SPECIAL COUNSEL REPORT: ‘THAT IS YOUR JUDGMENT!’ ""Bobby. I’m so sorry if that advertisement caused you pain. The ad was created and aired by the American Values Superpac without any involvement or approvals from my campaign. Federal rules prohibit Superpacs from consulting with me or my staff. I send you and your family my sincerest apologies. God bless you,"" RFK Jr. replied. RFK Jr. followed up the statement with a more general apology to any of his family members who were hurt by the ad, stating once again that ""FEC rules prohibit Super PACs from consulting with me or my staff."" Robert F. Kennedy Jr. apologized to members of his family for a surprise Super Bowl ad that heavily featured his connection to his uncle, former President John F. Kennedy. (Rebecca Noble/Getty Images) American Values 2024 ran the 30-second ad for $7 million. The clip is a throwback to an ad used by his uncle, JFK, in the 1960 presidential campaign . TRUMP TEAM MADE ‘EARLY ON’ ATTEMPTS TO RECRUIT RFK JR. AS FORMER PRESIDENT'S RUNNING MATE: REPORT The ad, replacing JFK’s face with that of RFK Jr., implores viewers to ""Vote Independent."" ""The panicked DC power brokers are working overtime to keep Kennedy off the ballot because they know he can and will end their culture of greed and corruption,"" American Values 2024 co-founder Tony Lyons said in a statement provided to Fox News Digital. Robert F. Kennedy Jr. is running for president as an independent, and many Democrats fear he could serve as a spoiler for President Biden. (Nathan Posner/Anadolu Agency via Getty Images) ""They offer us soaring inflation, forever wars, and chronic disease. RFK Jr. offers us real change along with freedom, trust and hope. Like his uncle and his father, Kennedy is a corruption fighter, and it's no wonder the DNC is trying every old trick and inventing new tricks to stop him. The public sees through it all and won’t stand for it."" Kennedy initially sought to challenge President Biden in the 2024 Democratic presidential primary, but the DNC said it would not hold primary debates and stood behind the incumbent president. Fox News' Bradford Betz contributed to this report"
49,foxnews,https://www.foxnews.com/politics/bidens-upcoming-physical-exam-will-not-include-cognitive-test-white-house-says,"Biden won't take cognitive test in physical exam: White House White House press secretary Karine Jean-Pierre on Monday said that President Biden will not be taking a cognitive test during his regular physical exam. President Biden will not take a cognitive test as part of his upcoming physical exam, the White House confirmed Monday. White House press secretary Karine Jean-Pierre stated that Biden's physician, Dr. Kevin O'Connor, does not believe a cognitive test is necessary. She said O'Connor believes Biden proves his cognitive ability ""every day [in] how he operates and how he thinks."" Reporters pressed Jean-Pierre on the issue due to last week's report from Special Counsel Robert Hur that found Biden has significant memory issues. ""Does the White House think that the idea of the president taking a cognitive test as a part of this physical is a legitimate idea?"" a reporter asked. BIDEN LEAD OVER TRUMP SHRINKS WITH THIRD PARTY CANDIDATES ADDED President Biden will not take a cognitive test as part of his upcoming physical exam, the White House confirmed Monday. (AP/Evan Vucci) ""I'm just gonna say what Dr. O'Connor said to me about a year ago when [Biden's physical] was released,"" Jean-Pierre responded. ""The president proves every day [in] how he operates and how he thinks, by dealing with world leaders, by making difficult decisions on behalf of the American people – whether it's domestic or it's national security."" HALEY ARGUES TRUMP AND BIDEN ARE ‘GRUMPY OLD MEN’ ""That is how Dr. O'Connor sees it, and that is how I'm going to leave it,"" she added. Reporters pressed Jean-Pierre on Biden's health due to last week's report from Special Counsel Robert Hur that found Biden has significant memory issues. (Al Drago/Bloomberg via Getty Images) Jean-Pierre gave a more full-throated defense of Biden when reporters continued to press her on the topic. She said she has known Biden for more than a decade and continues to find him to be ""sharp"" and ""on top of things."" ""When we have meetings with him and his staff he is constantly pushing us, trying to get more information, and so that has been my experience with this president,"" she said. Biden's age is a major concern among U.S. voters, 86% of whom say he is too old to serve a second term, according to an ABC poll. A Sunday poll from ABC/Ipsos found that 86% of Americans believe Biden is too old to serve another term, including 73% of Democrats"
50,foxnews,https://www.foxnews.com/politics/kamala-harris-ready-serve-democrats-sound-alarm-about-bidens-age,"Marc Thiessen questions whether Biden is capable of finishing current term Fox News contributor Marc Thiessen joins 'Fox & Friends' to discuss the significance of the special counsel's report on Biden's age after a recent poll showed 86% of Americans now believe he is too old to serve as president. Vice President Kamala Harris says she is ""ready to serve"" as concerns over President Biden's age and mental fitness continue to grow, notably among members of their own party. Harris made the statement during a a recent interview last week before the Thursday release of Special Counsel Robert Hur's report on Biden's mishandling of classified documents, which described the president’s memory as having ""significant limitations."" ""I am ready to serve. There’s no question about that,"" Harris told The Wall Street Journal , adding that everyone who observes her work ""walks away fully aware of my capacity to lead."" BIDEN CONSIDERED RESIGNING VICE PRESIDENCY ‘IN PROTEST’ OVER OBAMA'S AFGHANISTAN POLICY: HUR REPORT President Biden and Vice President Harris. (Getty Images) Hur's report recommended no charges against Biden over his actions pertaining to the mishandling of classified documents, partly because he would have a defense at trial as a ""sympathetic, well-meaning, elderly man with a poor memory."" Republicans piled on Biden following the Hur report's release last week, but Democrats have also begun voicing — or at least admitting — greater concern surrounding Biden's age and whether that could hurt his chances at winning re-election this year. Former President Bill Clinton's lead strategist, James Carville, argued Saturday that the White House has little confidence in Biden after he turned down a Super Bowl Sunday interview. TREASURY CONFIRMS TERMS LIKE ‘MAGA,’ TRUMP,' KAMALA,' ‘BIDEN’ USED IN PRIVATE BANK TRANSACTION SEARCHES Democrat strategist James Carville. (Getty Images) ""It’s the biggest television audience, not even close, and you get a chance to do a 20-25-minute interview on that day,"" Carville said. ""And you don’t do it? That’s a kind of sign that the staff, or yourself, doesn’t have much confidence in you. There’s no other way to read this."" Another former Clinton strategist, Paul Begala, said Hur’s indictment of Biden’s memory and Biden's subsequent response was ""terrible for Democrats."" ""Oh yeah. Look, I’m a Biden supporter, and I slept like a baby last night: I woke up every two hours crying and wet the bed,"" Begala quipped on CNN last Friday. ""This is terrible for Democrats. And anybody with a functioning brain knows that,"" he declared. GOP CAMPAIGN ARM LAUNCHES MEDIA BLITZ AGAINST DEMS WHO OPPOSED VIOLENT CRIME BILL AS CRISIS IN DC SPIRALS Then-Democrat presidential candidate Hillary Clinton makes a concession speech after being defeated by Donald Trump in New York on November 9, 2016"
51,foxnews,https://www.foxnews.com/politics/climate-activists-arrested-shutting-down-biden-campaign-hq,"Biden’s export suspension on liquefied natural gas could spike your heat bill Fox News congressional correspondent Chad Pergram reports on the Biden administration’s controversial decision to suspend exports of liquefied natural gas. Join Fox News for access to this content Plus get unlimited access to thousands of articles, videos and more with your free account! Please enter a valid email address. By entering your email, you are agreeing to Fox News Terms of Service and Privacy Policy , which includes our Notice of Financial Incentive . To access the content, check your email and follow the instructions provided. A group of youth climate activists were arrested Monday for blocking the entrance to President Biden's campaign headquarters in Delaware while demanding his administration ""end the era of fossil fuels."" According to the left-wing climate group Sunrise Movement, 21 of its members were arrested and roughly 80 others were involved in the protest in Wilmington, Delaware, on Monday. The activists warned that, if Biden failed to take definitive action combating climate change, millions of young voters would stay home in November. ""Climate change is at our doorstep. Our homes are flooding, we’re breathing in toxic air, Black people like me are dying while the President expands oil and gas production to record levels,"" Sunrise campaign director Kidus Girma said in a statement after the protest. ""Then President Biden goes around and claims he’s a climate president and wants our votes? That’s bulls---."" While Girma failed to specify which actions Biden has taken to boost greater reliance on fossil fuels, the Sunrise Movement has repeatedly called for him to formally declare global warming a national emergency, an action he has resisted throughout his presidency. A climate emergency declaration would enable Biden to bypass Congress and take a number of executive actions not normally granted to the White House. Sunrise Movement activists were arrested Monday for blocking the entrance to President Biden's campaign headquarters in Delaware. (Sunrise Movement/X/Video screenshot | Anna Moneymaker/Getty Images) MODERATE DEMS SILENT AS BIDEN SKIRTS SENATE CONFIRMATION FOR JOHN KERRY'S REPLACEMENT In addition, the Sunrise Movement broadly opposes all oil, gas and coal development, calling instead for an aggressive nationwide transition to green energy sources like wind and solar. Currently, fossil fuels generate the majority of domestic electricity and support both the transportation and manufacturing sectors in America, according to federal data. ""There are dozens of things he could sign into law tomorrow if he wanted to protect low-income communities of color like mine that live next to fossil fuel sites that are killing us through cancer and asthma,"" Sunrise Movement volunteer Ariela Lara said. ""He could make sure that every time a climate disaster hits, no one gets evicted and everyone has access to free health care,"" she continued. ""He could end the fossil fuel era by refusing to approve a single oil or gas well in this country. Biden can’t build renewables on Monday, build fossil fuels on Tuesday, and then claim to be climate president. That’s not how science works, and young voters know it."" AOC DECLARES VICTORY IN FIGHT FOR GREEN NEW DEAL 5 YEARS LATER: 'SOCIAL AND ECOLOGICAL TRANSFORMATION' But while Biden has yet to declare a climate emergency , he has repeatedly taken aim at the fossil fuel industry while seeking to boost green energy. President Biden delivers remarks during a press conference on the grounds of National Renewable Energy Laboratory on Sept. 14, 2021, in Arvada, Colorado. (Helen H. Richardson/The Denver Post via Getty Images) Shortly after taking office, the president signed executive orders to pause all oil and gas leasing on federal lands and waters, and canceled permits for the Keystone XL oil pipeline project. Although a federal court eventually intervened to force the Biden administration to continue leasing, it has dragged its feet on holding new lease sales and created regulatory roadblocks for producers. And Biden signed the Inflation Reduction Act, which earmarks tens of billions of dollars for green energy programs. His administration has also unleashed an onslaught of environmental regulations to curb fossil fuel power plant electricity generation, push electric vehicles and incentivize the electrification of the residential sector. ""I mean, it literally is the existential threat. It’s even more consequential than nuclear power, nuclear war,"" he added. ""That would be horrible and awful, and it would just make the environment incredibly worse. But it’s about the environment."" The Biden campaign didn't immediately respond to a request for comment"
...,...,...,...
348,foxnews,https://www.foxnews.com/politics/fox-news-politics-trump-vows-appeal,"Welcome to Fox News’ Politics newsletter with the latest political news from Washington D.C. and updates from the 2024 campaign trail. What's Happening? - NY judge orders Trump to pay hundreds of millions, bars him from operating business… - DA Fani Willis did not testify Friday after fiery courtroom appearance… - Biden visits East Palestine, Ohio, more than a year later… Trump vows appeal L - Former President Donald Trump R - New York Judge Arthur Engoron (Fox News) Former President Trump blasted ""clubhouse politician"" Judge Arthur Engoron Friday after he barred him from operating his business in New York for three years and fined him more than $350 million, defending the ""great company"" he built and telling Fox News Digital that the ruling is yet another example of Democrats ""trying to stop"" him, but that ""they will not be successful."" Engoron handed down his ruling Friday after a months-long civil fraud trial beginning in October and stemming from New York Attorney General Letitia James' lawsuit alleging the former president inflated his assets and committed fraud. Trump spoke exclusively to Fox News Digital shortly after Engoron's ruling was made public Friday afternoon. ""A crooked New York judge working with the very corrupt attorney general of New York State, who ran on the basis of ‘I will get trump’ before knowing me — before even knowing anything about me — just ruled that I have to pay a fine of $355 million based on absolutely nothing,"" Trump told Fox News Digital. ""No victims. No damages. Great financial statements, with full disclaimer clauses, only success."" Southern scandal 'SOUTHERN GENTLEMAN' : Meet Nathan Wade, the special prosecutor accused of having 'improper' affair with Fani Willis ...Read more FIERY DAY 2 : DA Fani Willis doesn't take the stand in second day of the hearing ...Read more SEE IT: Top moments from the DA Fani Willis hearing over 'improper' affair with Nathan Wade so far ...Read more White House 'MENTAL DECLINE' : Republicans pressure Biden to take cognitive test, calling it a 'national security concern' ...Read more 'OUTRAGEOUS EFFORT': Biden calls for an end to impeachment inquiry after indictment of FBI informant: 'Outrageous effort' ...Read more ‘LASER FOCUS’ : Biden insist's he's committed to helping East Palestine a year after toxic train derailment …Read more Tails from the campaign trail 'FAR OVERDUE' : Trump-endorsed Ohio Senate candidate blasts Biden's visit to East Palestine ...Read more MANCHIN OUT: West Virginia senator declines third-party presidential run ...Read more 'WE'RE GOOD FRIENDS': Romney reveals whether he has plans to run for president, serve as Manchin's VP ...Read more 'COMMONSENSE CONSERVATIVE': Former special forces soldier lands big endorsement in race to flip House seat ...Read more 'RACE OF HIS LIFE' : Dem Sen blasts GOP for not caring about immigration; record comes back to haunt him ...Read more Subscribe now to get Fox News Politics newsletter in your inbox"
349,foxnews,https://www.foxnews.com/politics/fox-news-politics-judge-and-fury,"Welcome to Fox News’ Politics newsletter with the latest political news from Washington D.C. and updates from the 2024 campaign trail. What's Happening? - Fulton County DA Fani Willis shocks in raucous courtroom testimony - Date set for first Trump criminal trial - Nearly half of Americans think Biden should be replaced as Dem nominee Fulton County DA Fani Willis (Getty Images) Embattled Fulton County district attorney Fani Willis took the stand to testify against allegations she had an ""improper"" affair with special prosecutor Nathan Wade , which could derail her case against former President Donald Trump. Willis was notably agitated and at points raised her voice during a raucous several hours of testimony. At one point, Willis held up a printed copy the allegations against her in both hands and turned to the judge yelling, ""it is a lie!"" Willis also called defense attorney Ashleigh Merchan's interests ""contrary to democracy."" Judge Scott McAfee presiding over the proceeding, called for a five-minute recess at one point. When court was back in session he told lawyers to stop ""talking over each other"" and cautioned Willis, saying: ""We have to listen to the questions as asked. And if this happens again and again, I'm going to have no choice but to strike your testimony."" Willis is expected back on the stand Friday for further testimony. Trump had his own courtroom drama Thursday. He appeared in a Manhattan court for a pretrial hearing in the criminal ""hush-money"" case against him, where the judge set a date for the beginning of the first trial on criminal charges. The trial is set to start March 25, a few weeks after Super Tuesday but before the end of the GOP primary. Trump said he would be campaigning in the evening after attending the trial. ""I'll be here during the day and I'll be campaigning during the night. Biden should be doing the same thing but he’ll be sleeping,"" Trump said outside the courtroom. Tales from the Campaign Trail ‘BADLY TAINTED’: Trump reacts to dramatic Fani Willis testimony ...Read more ROMNEY FOR VP?: Sen. Joe Manchin floats potential running mates in hypothetical third party presidential bid …Read more EXTRA AMMUNITION: Haley takes aim at Trump as trial of former president to start next month …Read more ONE YEAR LATER: Haley's final stand against Trump? …Read more White House GUN CONTROL: Biden, Harris call for gun control in separate comments after deadly shooting at Kansas City Chiefs' parade …Read more 'DEFERRED ENFORCED DEPARTURE': White House announces Palestinians will be protected from deportation …Read more Capitol Hill HUR TO TESTIFY: Special counsel to appear before House committee after report revealing Biden memory issues …Read more DOUBLING DOWN: Sen. JD Vance's office spars with Pence group over 'impeachment time bomb' in foreign aid bill …Read more 'ILL ADVISED': Cotton demands information from DOD on contract with Chinese-owned company …Read more TAKING TIME: Republican Speaker Johnson on $95B Ukraine, Israel bill: House will not be 'rushed' …Read more SHUTTING IT DOWN: Capitol Police arrest pro-Palestinian demonstrators inside Cannon House Office Building …Read more 'SLUSH FUND': GOP lawmaker reveals how he will target progressives to pay for foreign aid package …Read more Across America 2024 SUCCESSION?: Nearly half of Americans think Biden could be replaced as Dem nominee: poll …Read more 'POISED TO ATTACK': FBI Director Wray warns allies of hacking by Chinese proxy groups …Read more EYES IN SPACE: US launches missile detection satellites into orbit amid concerns about Russian weapons …Read more ALABAMA VOTES: After LGBTQ lecture, Alabama Senate votes for more oversight of Department of Archives and History …Read more 'ESTABLISH THE RECORD': Testimony to begin on possible da Fani Willis disqualification …Read more TRUMP IN COURT: Trump to appear in New York City court for hearing in Manhattan District Attorney Alvin Bragg case …Read more Subscribe now to get Fox News Politics newsletter in your inbox"
350,foxnews,https://www.foxnews.com/politics/fox-news-politics-borderline-impeachment,"Welcome to Fox News’ Politics newsletter with the latest political news from Washington D.C. and updates from the 2024 campaign trail. What's Happening? - House Republican warns of serious national security threat related to space - Mayorkas becomes first cabinet secretary to be impeached since the 19th century - Democrat Tom Suozzi triumphs in special election to replace Santos Mayorkas impeachment succeeds by one vote U.S. Department of Homeland Security Secretary Alejandro Mayorkas speaks to the media about an overview of public safety plans for Super Bowl week at the Mandalay Bay Convention Center on February 07, 2024 in Las Vegas, Nevada. (Candice Ward/Getty Images) On the eve of Valentine's Day and Ash Wednesday, Homeland Security Secretary Alejandro Mayorkas was impeached in a historic vote over his mishandling of the illegal immigration crisis at the U.S.-Mexico border. T hree Republican lawmakers, Reps. Mike Gallagher, R-Wis., Ken Buck, R-Colo., and Tom McClintock, R-Calif., jumped the aisle to vote against the impeachment. The 214-213 vote came after the first impeachment failed earlier this month. Every House Democrat showed up to protect the cabinet secretary, including Rep. Al Green, D-Texas, who temporarily left the hospital where he was recovering from surgery to cast his vote. Once Rep. Steve Scalise returned to D.C. from a cancer treatment, the GOP had the numbers to advance the articles of impeachment. House Republicans have accused Mayorkas of violating his oath to defend the U.S. by failing to secure the border and ""refusing"" to enforce immigration laws. The three Republicans who voted against impeachment on Tuesday have criticized Mayorkas' handling of the border but expressed reservations over whether it rose to the level of impeachment. McClintock warned it could set a precedent for political impeachments that could harm Republican officials in the future. This is the first time a Cabinet secretary has been impeached by the U.S. Congress since 1876. It's now up to the Senate — where Democrats run the show — to hold an impeachment trial. Reps. Mike Gallagher, R-Wis., Ken Buck, R-Colo., and Tom McClintock, R-Calif. (Getty Images) Capitol Hill 'SERIOUS' THREAT: House Intel Chair Turner issues vague warning on 'serious national security threat,' urges Biden to declassify …Read more 'WILLING TO TRADE': Republican senators rally support to add 'meaningful' border security to House's foreign aid package …Read more White House 'DEVASTATING REPUDIATION': White House claims Suozzi's victory in NY special election speaks volumes about GOP, Trump …Read more WHITE HOUSE DODGE: Sullivan avoids details when pressed on 'serious national security threat' …Read more 'HEY EVERYBODY!': WH compares Ronny Jackson to 'Simpsons' character in email to Fox Digital …Read more Tales from the Campaign Trail Former U.S. Rep. Tom Suozzi, Democratic candidate for New York's 3rd congressional district, speaks at his election night party Tuesday, Feb. 13, 2024, in Woodbury, N.Y. (AP Photo/Stefan Jeremiah) BACK AGAIN: Who is Tom Suozzi? A look at the Democrat who flipped Santos' seat blue …Read more BLAME GAME: Haley faults Trump for New York special election loss …Read more 'EVERY SINGLE PENNY': Lara Trump says RNC funds will go to electing father-in-law Donald Trump …Read more 'INCOMPETENT': New poll reveals the top 2 issues negatively affecting voter confidence in Biden …Read more Across America PENCE HITS BACK: Former VP Mike Pence's policy think tank pushed back on Vance claim that foreign aid has a hidden 'impeachment time bomb' …Read more Subscribe now to get Fox News Politics newsletter in your inbox"
351,foxnews,https://www.foxnews.com/politics/democrats-win-seat-republicans-win-impeachment-two-presidents-clash-over-nato,"Dems flipping NY House seat threatens GOP majority Fox News’ Bryan Llenas reports on Democrat Tom Suozzi's victory in the New York special election to fill former Rep. George Santos’ seat. If the second attempt to impeach the Homeland Security chief had taken place a short time later, the Republicans would have failed again. Instead, they managed to impeach Alejandro Mayorkas–the first sitting Cabinet secretary to draw that sanction–by a single vote. But after a victory in George Santos’ old district, the Democrats would have had the extra vote to stop the impeachment. TRUMP’S NATO COMMENTS TRIGGER FIERCE MEDIA AND EUROPEAN OPPOSITION: HOW SERIOUS IS HE? Tom Suozzi beat Republican Mazi Pilip in Tuesday’s special election on Long Island, unleashing a tidal wave of punditry about his winning formula–openly tackling such issues as illegal migration and crime rather than avoiding them. I always caution against drawing sweeping conclusions in one-off local races, and this election in a snowstorm is no exception. Former U.S. Rep. Tom Suozzi, Democratic candidate for New York's 3rd congressional district, speaks at his election night party Tuesday, Feb. 13, 2024, in Woodbury, N.Y. (AP Photo/Stefan Jeremiah) The underlying factor was Santos, the outlandish, lying, fabricating lawmaker who won the seat with a made-up resume, was expelled by the House and is under indictment. Voters felt hosed by the Republican publicity hound, and maybe the Dems were more motivated to vote. Sure, Suozzi deserves credit for seizing on illegal migration and crime rather than avoiding such explosive issues – and doggedly distancing himself from President Biden. But he also has to run again in the fall. NATO CHIEF SAYS TRUMP CRITICISM 'DOES UNDERMINE THE SECURITY OF ALL OF US' Trump, for his part, blamed Pilip, ""running in a race where she didn’t endorse me and tried to ‘straddle the fence,’ when she would have easily WON if she understood anything about MODERN DAY politics in America…I STAYED OUT OF THE RACE, ‘I WANT TO BE LOVED!’"" A subtle Valentine’s Day message? Republican presidential hopeful and former US President Donald Trump looks on, flanked by son Eric Trump (L) and daughter-in-law Lara Trump, during an Election Night Party in Nashua, New Hampshire, on January 23, 2024. Donald Trump won the key New Hampshire primary Tuesday, moving him ever closer to locking in the Republican presidential nomination and securing an extraordinary White House rematch with Joe Biden. (Photo by TIMOTHY A. CLARY/AFP via Getty Images)) The move against Mayorkas, the first against a Cabinet officer in 150 years, is about the politics of symbolism. Republicans know full well the Democratic-controlled Senate is not going to convict him. This was about keeping the spotlight on one of the GOP’s best issues . But if the press saddled Johnson with a humiliating defeat last week, it has to credit him with a big win now. Both episodes shed light on the fractious politics of the Hill. Just when it looked like the Senate might pass a bipartisan border security bill–which included military aid to Ukraine and Israel–Donald Trump ripped it and the package was dead. YOU DON'T NEED COMPREHENSIVE IMMIGRATION REFORM TO SECURE THE BORDER: MARC THIESSEN Now the Senate appears ready to pass a stand-alone military aid bill by a filibuster-proof majority. But Johnson says he won’t bring it up for a House floor vote. That would bury it, unless a handful of Republicans join with Democrats to force a vote through a discharge petition. Think about it: the United States, unable to help two major allies because of election-year politics, especially Ukraine, which remains under siege by Vladimir Putin. And that’s why Biden took the rare step of delivering a televised speech on Tuesday. His predecessor gave him an opening by saying he wouldn’t protect any NATO member who didn’t pay its fair share in military costs. And if that were the case, Putin and Russia could ""do whatever the hell they want."" WASHINGTON, DC - FEBRUARY 13: U.S. President Joe Biden speaks on the Senate's recent passage of the National Security Supplemental Bill, which provides military aid to Ukraine, Israel and Taiwan, in the State Dining Room of the White House on February 13, 2024, in Washington, DC. During his remarks Biden urged House Republicans and U.S. Speaker of the House Mike Johnson (R-LA) to move the legislation through the House of Representatives. (Anna Moneymaker) Biden, in his speech , accused Trump of siding with the Russian dictator, calling the comments ""dumb,"" ""shameful,"" ""dangerous"" and ""un-American."" Put aside whether Biden is right or Trump is trying to pressure delinquent allies. Joe Biden passed up a softball Super Bowl interview. So why is he getting in front of the cameras now? One, he’s trying to get push Congress to pass the military aid bill. Two, he’s trying to change the subject from his own questionable memory in that wake of that stinging special counsel’s report. Three, he is finally heeding the advice of those who say he needs to do more television to prove his competence and dim the focus on every gaffe or misstatement. What’s fascinating is the spin of each party when it comes to backing their candidate. Democrats are hitting the airwaves saying Biden is sharp and laser-focused in private, and counsel Robert Hur has no business airing his personal criticism of the president’s mental acuity. And everyone is getting sustained exposure to a system that generally favors political maneuvering over actual results. Get the latest updates from the 2024 campaign trail, exclusive interviews and more at our Fox News Digital election hub. Howard Kurtz is the host of FOX News Channel's MediaBuzz (Sundays 11 a.m.-12 p.m. ET). Based in Washington, D.C., he joined the network in July 2013 and regularly appears on Special Report with Bret Baier and other programs."


In [26]:
# Checking the end of CNN articles after we run our function
df[['source', 'url', 'content_end_preview']] [df['source'] == 'cnn']

Unnamed: 0,source,url,content_end_preview
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-brown-nato-trump/index.html,"Chairman of the Joint Chiefs of Staff Gen. CQ Brown said Monday that “US credibility is at stake” in the wake of comments from former President Donald Trump that he would encourage Russia to “do whatever the hell they want” to NATO partners that don’t meet spending guidelines on defense. Asked by NBC News about Trump’s admission that he would not abide by the collective-defense clause at the heart of NATO if reelected, Brown said that the alliance is strong and has been around for 75 years. “I think we have a responsibility to uphold those alliances,” Brown told NBC’s Lester Holt in an interview airing Monday evening. “US credibility is at stake with each of our alliances, and US leadership is still needed, wanted, and watched.” “The US is committed,” Brown added. “And that’s the message I communicate, and that’s the message that’s been received.” Brown’s remarks come as Trump, the 2024 Republican front-runner, has come under fire for his comments over the weekend indicating he does not intend to defend NATO allies from Russian attack if he is reelected. Former President Donald Trump speaks as he holds a campaign rally at Coastal Carolina University ahead of the South Carolina Republican presidential primary in Conway, South Carolina, on February 10. Sam Wolfe/Reuters Related article Trump will pull US out of NATO if he wins election, ex-adviser warns At a rally in South Carolina Saturday, Trump recalled a conversation he had while president with “one of the presidents of a big country,” who asked him whether the US would defend them from a Russian invasion even if they “don’t pay.” “No, I would not protect you,” he recalled saying. “In fact, I would encourage them to do whatever the hell they want. You got to pay. You got to pay your bills.” According to NBC , Brown said that he realizes there will be “various dialogue in discussions at the political level,” but that he will focus on “continuing to build and strengthen our relationship with NATO.” “My job is to make sure that we are doing everything we can with our NATO allies on the military aspect,” he said, “and I’ll continue to do that throughout.” Brown, who was sworn in as the chairman of the Joint Chiefs of staff last fall, serves as the principal military advisor to the president. The military officer who serves in the role does so at the pleasure of the president, meaning that if Trump were reelected this year, Brown could serve as his chairman unless he appointed another officer. Gen. Mark Milley , Brown’s predecessor and Trump’s chairman of the Joint Chiefs throughout his tenure, was a frequent target of Trump’s ire, and the two have taken verbal - sometimes subtle - shots at each other after Trump left the White House. In late September, at the change of command ceremony between Milley and Brown, the outgoing chairman made it a point to underscore where his loyalty and the loyalty of the military should lie. “We don’t take an oath to a king, or a queen, or a tyrant or a dictator. And we don’t take an oath to a wannabe dictator,” Milley said at the time, in a veiled reference to Trump. “We take an oath to the Constitution and we take an oath to the idea that is America – and we’re willing to die to protect it.” Milley chose to deliver the scathing criticism of his former boss in his last address as the nation’s top general as he stood next to President Joe Biden. In a continuation of the acrimonious feud between the two, Trump fired back on social media, calling Milley a “moron” and “STUPID & VERY DANGEROUS!” Brown addressed recent criticism of his boss Monday following mention of apparent memory lapses in special counsel Robert Hur’s report , calling Biden “pretty sharp.” Asked whether he was surprised by the comments made about the president’s memory, Brown said he was, adding they were “not characteristic of what I’ve seen.” “He’s got a very good grasp of the issues,” Brown said. “He asks, I think, very pertinent questions.” Iran likely ‘not looking for a broader conflict’ with US As the US is navigating increasingly high tensions in the Middle East — from Iran-backed groups in Iraq and Syria, the Iran-backed Houthis in Yemen, and Israel’s campaign in Gaza against Hamas — Brown echoed comments made by other US officials that deterrence is key. Broadly, the US is focused on deterring “any further aggression,” Brown said, while also protecting American forces. Asked whether he believed Iran wants a war with the US, Brown responded, “I don’t know that they do.” “Having watched Iran operate, they will do things through their militia groups and others to put pressure, to achieve their objectives,” the chairman said. “At the same time, not looking for a broader conflict with the United States.” There have been at least 170 attacks on US and coalition forces in Iraq, Syria and Jordan since October 17. The Pentagon said Monday that those attacks have resulted in 186 wounded or killed in action — including 130 traumatic brain injuries. Three US soldiers were killed in a drone attack in January on a US outpost in Jordan"
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-endorse-michael-whatley-lara-trump-rnc/index.html,"Former President Donald Trump has endorsed North Carolina Republican Party chairman Michael Whatley to succeed Ronna McDaniel as Republican National Committee chair. Alongside endorsing Whatley, Trump said in a statement that he will also throw his support behind Lara Trump, his daughter-in-law, for the role of co-chairwoman at the RNC. Trump’s endorsement solidifies his effort to bring about a leadership change and exert control over the committee. As part of the plan, Chris LaCivita, Trump’s co-campaign manager, has been tapped to serve as the RNC chief operating officer but will continue in his current role. “For these reasons, I think my friend Michael Whatley should be the RNC’s next leader. Michael h﻿as been with me from the beginning, has done a great job in his home state of North Carolina, and is committed to election integrity, which we must have to keep fraud out of our election so it can’t be stolen,” Trump said in a statement. “My very talented daughter-in-law, Lara Trump, has agreed to run as the RNC Co-Chair. Lara is an extremely talented communicator and is dedicated to all that MAGA stands for. She has told me she wants to accept this challenge and would be GREAT!” he also said"
2,cnn,https://www.cnn.com/2024/02/12/politics/senate-foreign-aid-bill-ukraine/index.html,"The Senate is inching closer to final passage of a $95.3 billion foreign aid bill with assistance for Ukraine and Israel after working through the weekend to make progress on the package. But in a sign of the grim odds facing the bill in the House, Speaker Mike Johnson has harshly criticized the package – and many House Republicans are opposed to further aid to Ukraine. The Senate has continued to move forward on the bill as Trump has argued the US should not grant foreign aid unless it is a loan, signaling opposition to the legislation. Trump has also indicated he would encourage Russian aggression against any NATO member country that doesn’t pay enough. In a statement on Monday, Johnson criticized the bill, citing its lack of border provisions, saying, the Senate “should have gone back to the drawing board to amend the current bill to include real border security provisions that would actually help end the ongoing catastrophe.” Johnson had previously opposed a broader bill that did include border provisions. The speaker has characterized those provisions as insufficient, despite the fact that they were the product of bipartisan negotiations and included restrictive border measures. A number of Senate Republicans either defended or downplayed Trump’s NATO comments on Monday. “I’m 100% behind him,” Sen. Tommy Tuberville said of Trump saying NATO members should be paying their dues or risk Putin invading their country. The Alabama Republican also suggested European allies should be “very worried” about an invasion, saying they should protect themselves and not rely on the US, adding the country “can’t protect everybody.” Sen. Roger Marshall of Kansas said European allies who are worried about Trump’s comments need to “get over it.” “You know, they need to get over it. They need to stand up and be tough. We need to secure our own border first. We need to take care of things here at home first. When we secure our own border, when we take care of home, great. Let’s help other people as well,” the Republican from Kansas said. In the Senate, consideration of the bill has crawled along for days. There is still no time agreement to speed up passage of the bill as Sen. Rand Paul has vowed to drag out the timeline in protest over the legislation. Any one senator can slow down the process and force the Senate to take time-consuming votes to arrive at final passage. Paul continued to dig in on Sunday, saying that he will hold out until “hell freezes over.” He indicated he is ready to hold the floor by speaking on the issue of national debt and other matters. “I love to talk. That’s one of my favorite things to do,” he said. “We do this for a purpose,” Paul said. “I don’t like being here. … I’m not here because it’s fun, I’m here because I don’t think enough people are talking about the danger of the debt.” On Sunday, just hours before the start of the Super Bowl, the chamber took a key vote to move the package forward by a tally of 67 to 27 . There are expected to be two more procedural votes Monday evening. The foreign aid package includes billions of dollars to support Ukraine and for security assistance for Israel, as well as humanitarian assistance for civilians in Gaza, the West Bank and Ukraine. The bill includes $60 billion to support Ukraine in its fight against Russia, $14.1 billion in security assistance for Israel, $9.2 billion in humanitarian assistance and $4.8 billion to support regional partners in the Indo-Pacific region, among other provisions, according to the Senate Appropriations Committee. Lawmakers are moving forward with the foreign aid bill after Republicans blocked the broader bill that would have combined the foreign aid with a bipartisan border deal. Republicans had initially demanded that border security be part of the bill, but went on to reject the bipartisan deal amid forceful attacks on the measure by Trump and top House Republicans. Over the weekend, Trump also wrote on Truth Social that the US should stop providing foreign aid unless it is structured as a loan, another sign of the political pressure Republicans continue to face amid efforts to send funding to US allies"
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens-meeting-with-jordanian-king-comes-at-flashpoint-in-israel-hamas-war/index.html,"President Joe Biden and King Abdullah II of Jordan met Monday aiming to figure out how to move the Israel-Hamas war into a new phase in which Israeli hostages are released and fighting stops for a prolonged period of time. “The key elements of the deal are on the table,” Biden said while addressing reporters alongside the king at the White House. “There are gaps that remain, but I’ve encouraged Israeli leaders to keep working to achieve the deal. The United States will do everything possible to make it happen.” Biden did not elaborate on what the “gaps” in the deal are. He added that a planned Israeli operation in the southern Gaza city of Rafah should not go forward without a “credible plan” to protect civilians. “Many people there have been displaced, displaced multiple times,” Biden said from the White House on Monday, “fleeing the violence to the north, and now they’re packed into Rafah, exposed and vulnerable.” “They need to be protected,” Biden added. Biden added that he and Abdullah discussed “a hostage deal between Israel and Hamas, which would bring immediate and sustained period of calm into Gaza, for at least six weeks, which we could then take the time to build into something more enduring,” during the meeting. But Abdullah, the first Arab leader to visit the White House since Hamas’ attack on Israel on October 7, went further – reiterating his call for a complete ceasefire, which Biden has so far resisted. “We cannot stand by and let this continue,” the Jordanian king told reporters. “We need a lasting ceasefire now. This war must end.” He said it was essential the United Nations Relief and Works Agency for Palestine Refugees in the Near East, the main United Nations agency responsible for Gaza, continues to receive funding. The agency warned earlier this month it may have to halt its work in Gaza after the US and other nations withdrew support last month over allegations some of its staff were involved with Hamas’ October 7 attack. Abdullah also said in his remarks that a ground operation in Rafah would amount to devastation, adding it would “produce another humanitarian catastrophe.” “The situation is already unbearable for over a million people who have been pushed into Rafah since the war started,” Abdullah said. While the Jordanian king called Biden a “dear friend” and said the president’s leadership is “key to addressing this conflict,” the open rifts between Biden and Abdullah underscored the delicate diplomatic balance the president is facing as the war in Gaza enters its fifth month – and as he faces a possible inflection point in his presidency. In the wake of special counsel Robert Hur’s report , which contained politically embarrassing passages about the president’s memory, Biden is facing perhaps the most scrutiny of his presidency over his mental acuity. The 81-year-old president’s age is his biggest political problem, and the special counsel’s report has struck a nerve, as evidenced by Biden’s amped-up news conference just hours after it published. But that domestic political pressure has not obscured the foreign crises that have occupied much of the president’s term in office, and it was during that news conference that Biden leveled his latest ramped-up criticism at Israel, calling its response to the October 7 terror attack “over the top.” The president was aiming to show off his command of the issues at hand when King Abdullah, a key regional ally who has been critical of Israel’s campaign in Gaza, comes to the West Wing. The president’s supporters have frequently pointed to the Israel-Hamas war as evidence of Biden’s mental faculties being in good shape. Vice President Kamala Harris on Friday pushed back against concerns about the president’s age as she recounted in detail the experience serving alongside Biden in the aftermath of Hamas’ October 7 attack, noting that she was in “almost every meeting” with him and his national security team in the days that followed. Biden sat for interviews with Hur on October 8 and 9. “The president was in front of and on top of it all, asking questions and requiring that America’s military and intelligence community and diplomatic community would figure out and know – how many people are dead, how many Americans, how many hostages, is the situation stable?” Harris said. And Democratic Rep. Daniel Goldman of New York, who spoke by phone with Biden a day ahead of his October 8 interview with Hur, said the president was “sharper than anyone I’ve spoken to” about the situation in the Middle East. In his meeting with King Abdullah, Biden had some high-pressure issues to work through as the Jordanians have called on the White House to put more pressure on Israel over its campaign against Hamas in Gaza, which has taken an immense humanitarian toll. Jordan, Egypt and the Palestinian Authority canceled a planned meeting with Biden less than 24 hours before a planned four-way summit in the Jordanian capital, Amman, in October when he traveled to Israel. The cancellation followed a massive blast in Gaza’s Al-Ahli Baptist Hospital that reportedly killed hundreds of Palestinians. The Jordanian and Egyptian governments have called for a ceasefire in Gaza, while Biden has resisted making a similar call. And the meeting came just weeks after three American soldiers were killed during an attack at a base in Jordan last month, prompting the US to launch dozens of retaliatory strikes that targeted Iran-backed militias. But the top order of business was how to achieve a cessation to fighting that also involves the release of hostages still held by Hamas since the October 7 terror attack on Israel. There are 136 hostages being held in Gaza, including 132 who were captured during Hamas’ October 7 attack. Twenty-nine of the hostages are dead, according to the Israeli prime minister’s office. Biden and Israeli Prime Minister Benjamin Netanyahu discussed a deal to secure the release of hostages in Gaza at length on Sunday, according to a senior administration official, who cautioned that while a framework is in place, gaps remain. Over the last several months, the US has attempted to put more pressure on the Israeli government to support a “humanitarian pause” in its war against Hamas. But those efforts have yielded little success. Last week, Netanyahu called Hamas’ recent proposals for a ceasefire and hostage deal in Gaza “delusional.” Secretary of State Antony Blinken previously said negotiations toward an agreement would continue despite the Israeli prime minister’s comments, which Blinken said were referencing the “absolute non-starters” in the proposal. The full Hamas response proposes three phases, each lasting 45 days, including the withdrawal of Israeli troops from Gaza, a massive humanitarian effort, and freedom of movement for people throughout Gaza, according to a copy obtained by CNN"
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-supreme-court-immunity-filing/index.html,"Former President Donald Trump on Monday asked the Supreme Court to step into the charged dispute over whether he may claim immunity from prosecution, once again pressing the nine justices to resolve a question that could undermine his campaign for a second term. Trump asked the Supreme Court to temporarily block a scathing and unanimous decision from the DC Circuit handed down last week that flatly rejected his claims of immunity from election subversion charges brought by special counsel Jack Smith. “Conducting a months-long criminal trial of President Trump at the height of election season will radically disrupt President Trump’s ability to campaign against President Biden,” Trump’s attorneys wrote in their request. RENO, NEVADA - DECEMBER 17: Republican presidential candidate former U.S. President Donald Trump delivers remarks during a campaign rally at the Reno-Sparks Convention Center on December 17, 2023 in Reno, Nevada. Former U.S. President Trump held a campaign rally as he battles to become the Republican Presidential nominee for the 2024 Presidential election. (Photo by Justin Sullivan/Getty Images) Justin Sullivan/Getty Images Related article Takeaways from the scathing appeals court ruling denying immunity to Donald Trump The DC Circuit’s ruling “threatens immediate irreparable injury to the First Amendment interests of President Trump and tens of millions of American voters, who are entitled to hear President Trump’s campaign message as they decide how to cast their ballots in November.” The emergency filing instantly shoves the Supreme Court into the unusual position of having to juggle two politically fraught matters involving the front-runner for the Republican presidential nomination. Four days ago, the court heard a historic argument over whether Trump should be disqualified from the ballot for his actions on January 6, 2021. How the Supreme Court responds to Trump’s request will have huge influence on whether – and how quickly – the former president will be put on trial for criminal allegations as he seeks the Republican nomination and presidency again. Trump took the unusual step of saying he might seek to two avenues to appeal the earlier decision: An appeal to the Supreme Court and, potentially, a separate rehearing before the full DC Circuit. That move underscored Trump’s unusual position in a case where he was incentivized to file at the Supreme Court but also wants to preserve his ability to keep the immunity question before the appeals court. “Former President Trump is making it rather clear that his goal is to stretch out these proceedings as much as possible,” said Steve Vladeck, CNN Supreme Court analyst and professor at the University of Texas School of Law. While that won’t necessarily stop the justices from treating the filing as an appeal, “it underscores Trump’s unambiguous endgame here, which is to play out the string for as long as the courts will let him,” Vladeck added. A panel of three DC Circuit judges set an aggressive timeline for Trump to appeal, nudging him directly to the Supreme Court and effectively requiring him to make his request by Monday. A key part of Trump’s legal strategy has been to delay his criminal cases until after the 2024 election. The fight over Trump’s immunity had already prompted US District Judge Tanya Chutkan to postpone a trial date previously set for March 4. Chief Justice John Roberts, who handles emergency appeals from the DC Circuit, will likely set out a schedule quickly that will offer clues about the timing. Smith, eager to avoid further delay in starting a trial, is expected to push for a quick resolution. Generally, it takes the votes of five justices to grant a stay. At issue is a striking 57-page unanimous opinion from the DC Circuit on Tuesday that allowed Trump to face charges for actions he took while in office and brushed aside his claims that former presidents are immune from such prosecution. “We cannot accept that the office of the Presidency places its former occupants above the law for all time thereafter,” the court wrote. “Former President Trump lacked any lawful discretionary authority to defy federal criminal law and he is answerable in court for his conduct.” Broadly, Trump argued in court that presidents might be hesitant to act if they were concerned about the prospect of criminal charges after they left office. His criminal indictment in the 2020 election interference probe, if allowed to stand, would have a “chilling effect” on future administrations, he said. But US Circuit Judges Karen LeCraft Henderson, Florence Pan and J. Michelle Childs rejected all of Trump’s arguments that those principles conferred sweeping protections to a former president. The judges were clear that the allegations against Trump are serious and left no question they believe the charges can be prosecuted. The panel repeatedly eviscerated Trump’s alleged behavior after the 2020 presidential election as unpresidential and constituting an assault on American institutions. The case has already made its way to the Supreme Court once before. In December, the justices rejected a request from Smith to leapfrog the DC Circuit and decide the immunity question on an expedited basis. At that time, the court did not explain its reasoning and there were no noted dissents. Trump’s lawyers referenced the earlier decision in the very first line of their brief, quoting the renowned late Yankees catcher Yogi Berra. “This application is ‘déjà vu all over again,’” Trump’s attorneys wrote. During more than two hours of oral arguments Thursday in the separate ballot case, most of the justices appeared willing to side with Trump on the question of whether he can appear on the ballot or if his actions on January 6 made him ineligible under the 14th Amendment’s “insurrectionist ban.” The court may have to decide how it wants to handle the former president’s immunity claim at the same time it is drafting an opinion in the ballot case. Together, the cases have thrust the court into the middle of this year’s presidential election in a way it has largely managed to avoid since its decision in Bush v. Gore effectively decided the 2000 election between former President George W. Bush and former Vice President Al Gore. This story has been updated with additional details."
...,...,...,...
328,cnn,https://www.cnn.com/2024/02/15/politics/emhoff-meeting-jewish-arab-youth-gaza/index.html,"Second gentleman Doug Emhoff slipped into New York on Thursday for a small meeting with youth leaders selected from across the Jewish American, Muslim American and Palestinian American communities, marking the administration’s latest move in its evolving response to the Israel-Hamas war . The event, hosted by United Nations Ambassador Linda Thomas-Greenfield and Rashad Hussain, the US ambassador-at-large for international religious freedom, was kept quiet by administration officials hoping to facilitate a conversation, rather than a debate or another opportunity for the protests that have followed President Joe Biden and Vice President Kamala Harris across the country. But Emhoff is not involved with administration policy, so the discussion was not about a ceasefire nor negotiations over a hostage release nor the White House’s maneuvering with Israeli Prime Minister Benjamin Netanyahu. Nor was it about ongoing political fallout, according to a person in the room. It focused on how the situation has affected people in America, billed as “a roundtable discussion on the sharp rise in Antisemitism, Islamophobia, and other forms of hate after the October 7 terrorist attacks in Israel.” In January, the Anti-Defamation League recorded a 361% rise in reported antisemitic incidents in the three months after the Hamas attack began on October 7 compared to the same period in 2022. In November, the Council on American-Islamic Relations said it had received an “unprecedented” rise in reported anti-Arab and anti-Muslim bias incidents in the month following the start of the war. “These communities have felt real pain, fear, and isolation,” Thomas-Greenfield said in a statement to CNN. “In this moment in which it’s all too easy to focus on what divides us, we must carve out moments like today’s roundtable to remember what unites us: to find shared humanity, and shared purpose, in one another’s stories, and work together to create a lasting peace for all.” Though he was working on the issue before, Emhoff’s portfolio as second gentleman has increasingly been consumed with countering hate across the country since October 7. He’s had community conversations with leaders in Michigan, California and at Cornell University in New York. But Thursday’s event – coordinated by Andrew Weinstein, who was appointed by President Joe Biden in 2022 to serve as a public delegate to the United Nations – was the first structured specifically around youth leaders, and across Jewish Americans and Arab Americans at once. The event itself was kept closed to the public and press, but the person in the room described an honest conversation defined by participants talking about their own experiences over the last few months, with Emhoff, Thomas-Greenfield and Hussain mostly listening. Emhoff finished by talking about the need to keep building coalitions and executing on the national strategy to combat antisemitism and the coming strategy on combating Islamophobia, the person in the room said. An aide to the US mission to the United Nations explained that the names of the participants were kept private to protect sensitive discussions. “Obviously everyone didn’t agree on policy, but everyone agreed on the need to combat hate and to work against dehumanization,” said another person familiar with the event."
329,cnn,https://www.cnn.com/2024/02/15/politics/trump-makes-final-pitch-to-supreme-court-in-fraught-immunity-case/index.html,"Former President Donald Trump made his final pitch Thursday to the Supreme Court in his effort to pause a trial over the election subversion charges brought by special counsel Jack Smith. “There are overwhelming reasons why the case should not go to trial ‘in three months or less,’” Trump told the Supreme Court in a 16-page filing . “With any other defendant, it would be virtually unthinkable for the case to go to trial so soon, and ‘wildly unfair’ to do so.” Trump claims former presidents must have immunity from such charges to avoid political reprisals when they leave office. So far, two lower federal courts have balked at that argument. The Supreme Court is expected to decide on Trump’s request within a few days. With Trump’s final brief in hand, a question of timing is now squarely before the high court. The justices could deny Trump’s emergency request to temporarily block a DC Circuit ruling against him on the immunity issue or they could agree to further consider his case. A view of the U.S. Supreme Court on Thursday morning January 4, 2024 in Washington, DC. Drew Angerer/Getty Images Related article Special counsel urges Supreme Court to deny Trump’s request for delay in immunity case The first outcome would clear the way for a US District court in Washington, DC, to schedule a trial on the underlying election charges, which Smith is eager to resolve before the November election. The second could signal the justices may schedule oral arguments in the case later this year, potentially pushing a trial back several months. On Monday, Trump asked the Supreme Court to block a unanimous decision from the DC Circuit handed down last week that rejected his claims of immunity from the election subversion charges. “Without immunity from criminal prosecution, the presidency as we know it will cease to exist,” Trump told the Supreme Court. Two days later – ahead of deadline – Smith argued in his own brief that Trump had not met the standard to pause proceedings in his case. It generally takes support from five justices to secure such a pause. “The charged crimes strike at the heart of our democracy,” Smith wrote in a filing Wednesday. “The public interest in a prompt trial is at its zenith where, as here, a former president is charged with conspiring to subvert the electoral process so that he could remain in office.”"
330,cnn,https://www.cnn.com/2024/02/15/politics/white-house-letter-special-counsel/index.html,"The White House wrote to Attorney General Merrick Garland the day before special counsel Robert Hur’s report was released, vehemently objecting to aspects of the report – including its “multiple denigrating statements” about the president’s memory. The letter was the culmination of a months-long attempt by Biden’s team to weigh in on the investigation into President Joe Biden’s handling of classified documents through letters to Hur and eventually to Hur’s boss, the attorney general. Ultimately, Hur did not appear to heed calls by Biden’s lawyers to produce a narrow and concise report limited to the facts of the charging decision. Instead, the 388-page report — which declined to bring charges — sparked a political firestorm by making repeated references to Biden’s inability to recall dates and details. “We object to the multiple denigrating statements about President Biden’s memory which violate longstanding DOJ practice and policy,” Biden’s personal lawyer, Bob Bauer, and White House counsel, Edward Siskel, wrote in a three-page letter to Garland on February 7. “This report goes further to include allegations that the President has a failing memory in a general sense, an allegation that has no law enforcement purpose.” The lawyers wrote that “a global and pejorative judgment on the President’s powers of recollection in general is uncalled for and unfounded.” In the February 7 letter, Biden’s lawyers likened Hur’s efforts in his report to those of James Comey, the FBI director in 2016 who criticized then-candidate Hillary Clinton for her use of a private email server, despite not bringing charges. The lawyers wrote Hur’s report “mirrors one of the most widely-recognized examples in recent history of inappropriate prosecutor criticism of uncharged conduct.” “The FBI and DOJ personnel’s criticism of uncharged conduct during investigations in connection with the 2016 election was found to violate ‘long-standing Department practice and protocol,’” the lawyers wrote. Politico and The New York Times published copies of the letter. The White House counsel’s office declined to comment. The letter underscores the deep anger generated by Hur’s report inside the White House and lays bare tensions between Biden’s team and Garland. CNN has previously reported that frustrations have run high inside the White House regarding some of Garland’s decisions related to the classified documents matter. The letter to Garland also objected to Hur’s characterization of Biden’s practice of retaining personal diaries, saying that even though “prior Presidents have done exactly the same thing,” the special counsel described the behavior as “totally irresponsible.” Hur’s report, which concluded that criminal charges were not warranted against Biden regarding his handling of classified documents, made numerous references to the president’s memory and recall issues that were allegedly exhibited during his multi-hour interview with Hur in October. Since the report’s release, the White House and the president’s lawyers have strenuously objected to the handling of the investigation and, in particular, the references to Biden’s memory issues as gratuitous and inappropriate. A day after Hur’s report was released publicly, a top Justice Department official responded by defending the document and its inclusion of details about Biden’s memory. “The context in which this information is used in the report makes it appropriate under Department policy and the Special Counsel regulations. The identified language is neither gratuitous nor unduly prejudicial because it is not offered to criticize or demean the President,” wrote Associate Deputy Attorney General Bradley Weinsheimer to Biden’s team. Weinsheimer, the department’s top career official, also pushed back on Biden’s lawyers’ comparison to Comey, calling it “inapt,” as Comey was an FBI director and not a special counsel. He said Hur’s report was “readily distinguishable from Director Comey’s press conference” about the Clinton email server. On February 12, four days after the Hur report was released, Bauer and Richard Sauber, special counsel to the president, responded to Weinsheimer – to once again disagree with the DOJ’s assessment that Hur’s report was consistent with the department’s policy and practice"
331,cnn,https://www.cnn.com/2024/02/15/politics/mike-turner-republican-reaction/index.html,"House Intelligence Chairman Mike Turner is facing sharp criticism – including from within his own party – as some GOP lawmakers are complaining that a statement the Ohio Republican made Wednesday was alarmist, “constituted poor judgement” and should be investigated. Some of Turner’s colleagues believe he was drawing attention to a Russian threat to build support for Ukraine aid and others believe it was his effort to push for his version of a Foreign Intelligence Surveillance Act overhaul that was facing a vote later in the day. “It’s f***ing bullsh*t,” one prominent House Republican told CNN of Turner’s move. “He ought to lose his job. He did that to get his way on FISA.” Others were just as unnerved by the move. “I don’t know what his motives were it just to me, it just looks bad,” Republican Rep. Tim Burchett of Tennessee told CNN’s Manu Raju. “I have a bad feeling about this.” Another Tennessee Republican, Rep. Andy Ogles, sent a scathing letter to House Speaker Mike Johnson on Thursday calling for an investigation into Turner for his vague statements, calling his disclosure “reckless” and asserting he disregarded of the consequences. “This act constituted poor judgement at a minimum and a complete breach of trust influenced by the pursuit of a political agenda at a maximum … it is with great reticence that I formally request an inquiry as to any impact the Chairman’s statements may have had on U.S. foreign and domestic policy,” Ogles wrote. CNN has reached out to Johnson for comment on the letter. Not long after Turner released his statement warning of a “serious national security threat,” Johnson arranged a last-minute news conference in the Capitol to make clear there’s “no need for public alarm” and clarifying he had already requested a briefing on the matter a month ago. Turner did not give the House speaker’s office a heads up prior to making his public statement characterizing the matter as a “serious national security threat,” according to a source familiar with the matter. Turner did, however, inform Johnson he would be requesting that the Biden administration declassify information about the threat, another source said. GOP Rep. Dan Crenshaw of Texas, a member of the Intelligence Committee, defended Turner’s actions and blasted members who want him investigated. Asked about the letter from Ogles, Crenshaw said, Ogles “needs to f***ing check himself.” “He doesn’t know what he is talking about,” Crenshaw said. “It’s absurd. It’s a deeply absurd action. I am tired of people making extremely passionate, opinionated actions based on no knowledge.” Asked about some of the theories members are floating that Turner was using his position to try and drum up support for Ukraine aid and other theories as to why Turner released the statement, Crenshaw said the timing was just coincidence. “It’s coincidence,” he said. “There was notifications from the intelligence community that made it just clearer now than before. This was just the time.” Turner issued a statement defending his decision to make the national security threat available to members, but the statement does not address why Turner decided to make his statement public, which has caused backlash. Turner said he worked with the Biden administration to notify Congress about the national security threat and that the administration cleared his statement with the top Democrat on the panel before it got sent to lawmakers. The committee voted on Tuesday to release the information to lawmakers, and Turner said that vote was 23-1. Turner’s initial statement came hours before the House was scheduled to take a vote related to reauthorizing the Foreign Intelligence Surveillance Act, a vote that was later postponed due to opposition from the conservative House Freedom Caucus, whose members had been pushing for amendment votes. Members of the Freedom Caucus later tanked a procedural vote on unrelated legislation Wednesday evening. Ogles tied Turner’s statement to trying to drum up support for passing the FISA package. “In hindsight, it has become clear that the intent was not to ensure the safety of our homeland and the American people, but rather to ensure additional funding for Ukraine and passage of an unreformed Foreign Intelligence Surveillance Act (FISA).” GOP Rep. Matt Gaetz accused Turner of “gaslighting the country” about the seriousness of the security threat members were briefed on Thursday in order to promote Ukraine funding. “We put out our concern with Chairman Turner gaslighting the country on these things,” Gaetz told CNN’s Manu Raju. “I worry that the motivation to draw so much attention to this is less about intelligence and national security and more about a politician who wants to send $60 billion to Ukraine,” he said, “and wants to reauthorize the Foreign Intelligence Surveillance Act that we want to see substantially curtailed because of the spying abuses.” CNN reported Wednesday that the US has new intelligence on Russian military capabilities related to its efforts to deploy a nuclear anti-satellite system in space, according to multiple sources familiar with the intelligence. The intelligence was briefed to Congress and key US allies, and some lawmakers say it is serious enough that it should be declassified and made public. One Democratic member with deep national security experience said Wednesday that they had never before received that kind of urgent summons over a national security matter during their time in Congress — and that the intelligence they saw when they arrived was not urgent enough to justify Turner’s comments. CNN’s Lauren Fox, Morgan Rimmer, Manu Raju and Sam Fossum contributed to this report. This story has been updated with additional developments."


Now that we have finished setting up our remove_last_sentence function, we can remove the "content_end_preview" column and reset the pandas display setting.

In [27]:
# Drop the content end preview column
df.drop(columns=['content_end_preview'], inplace=True)

# Reset the columns so that we go back to truncating the "content" column
pd.reset_option('display.max_colwidth')

### Standardizing Entity Names

Let's start by combining specified word pairs so that we handle cases where two or more words refer to a single entity, such as "Hunter Biden" or "Supreme Court." 

In [28]:
# Combine specified word pairs

df['content'] = df['content'].str.replace(r'\bHunter\s+Biden\b', 'HunterBiden', regex=True)
df['content'] = df['content'].str.replace(r'\bHUNTER\s+Biden\b', 'HunterBiden', regex=True)
df['content'] = df['content'].str.replace(r'\bSouth\s+Carolina\b', 'SouthCarolina', regex=True)
df['content'] = df['content'].str.replace(r'\bSupreme\s+Court\b', 'SupremeCourt', regex=True)
df['content'] = df['content'].str.replace(r'\bsupreme\s+court\b', 'SupremeCourt', regex=True)
df['content'] = df['content'].str.replace(r'\bCourt\s+House\b', 'CourtHouse', regex=True)
df['content'] = df['content'].str.replace(r'\bcourt\s+house\b', 'CourtHouse', regex=True)
df['content'] = df['content'].str.replace(r'\bHouse\s+Representative\b', 'HouseRep', regex=True)
df['content'] = df['content'].str.replace(r'\bhouse\s+representative\b', 'HouseRep', regex=True)
df['content'] = df['content'].str.replace(r'\bHouse\s+Rep\b', 'HouseRep', regex=True)
df['content'] = df['content'].str.replace(r'\bhouse\s+rep\b', 'HouseRep', regex=True)
df['content'] = df['content'].str.replace(r'\bvoters\b', 'voter', regex=True)
df['content'] = df['content'].str.replace(r'\bvotes\b', 'vote', regex=True)
df['content'] = df['content'].str.replace(r'\bdemocratic(?:s)?\b', 'Democrat', case=False, regex=True)
df['content'] = df['content'].str.replace(r'\bDemocrats\b', 'Democrat', regex=True)
df['content'] = df['content'].str.replace(r'\brepublicans\b', 'Republican', regex=True)
df['content'] = df['content'].str.replace(r'\bRepublicans\b', 'Republican', regex=True)
df['content'] = df['content'].str.replace(r'\bwhite\s+house\b', 'WhiteHouse', regex=True)
df['content'] = df['content'].str.replace(r'\bWhite\s+house\b', 'WhiteHouse', regex=True)
df['content'] = df['content'].str.replace(r'\bNew\s+York\b', 'NewYork', regex=True)

We now need to account for variations in Biden's and Trump's names. This is because we want the model to see the different spellings as referring to the same thing. We accomplish this with the following code. 

In [29]:
biden_variations = df['content'].str.findall(
    r'\bPresident\s+Joe\s+Biden\b|'  
    r'\bPresident\s+Biden\b|'         
    r'\bJoe\s+Biden(?:’s)?\b|'             
    r'\bBiden(?:’s|s)?\b|'  
    r'\bBIDEN\b|' 
    r'\bBiden\'s\b'                  
)
# Flatten the list of variations
biden_variations = [item for sublist in biden_variations for item in sublist]

# Count occurrences of each variation
biden_variation_counts = Counter(biden_variations)

# Replace variations of Biden's name with 'Biden' in the content column
df['content'] = df['content'].str.replace(
    r'\bPresident\s+Joe\s+Biden\b|'  
    r'\bPresident\s+Biden\b|'         
    r'\bJoe\s+Biden(?:’s)?\b|'             
    r'\bBiden(?:’s|s)?\b|'  
    r'\bBIDEN\b|' 
    r'\bBiden\'s\b'    
    , 'Biden', regex=True)

print("Occurrences of different variations of Biden's name:")
for variation, count in biden_variation_counts.items():
    print(f"{variation}: {count}")

Occurrences of different variations of Biden's name:
President Joe Biden: 205
Biden: 1587
Biden’s: 328
President Biden: 192
Bidens: 34
Joe Biden: 232
Joe Biden’s: 22
President Joe Biden: 1
BIDEN: 134


In [30]:
# Count occurrences of 'Biden' after replacement
biden_count_after = df['content'].str.count('Biden').sum()

print("Occurrences of Biden after replacement:", biden_count_after)

Occurrences of Biden after replacement: 2921


In [31]:
# Find all variations of Trump's name in the content column
trump_variations = df['content'].str.findall(
    r'\bPresident\s+Donald\s+Trump\b|'  
    r'\bPresident\s+Trump\b|'         
    r'\bDonald\s+Trump(?:’s)?\b|'             
    r'\bTrump(?:’s)?\b|'   
    r'\bTRUMP(?:’S)?\b|'  
    r'\bFormer\s+President\s+Donald\s+Trump\b|' 
    r'\bDonald\s+J(?:ohn)?\s+Trump\b'            
)

# Flatten the list of variations
trump_variations = [item for sublist in trump_variations for item in sublist]

# Count occurrences of each variation
trump_variation_counts = Counter(trump_variations)

# Replace variations of Trump's name with 'Trump' in the content column
df['content'] = df['content'].str.replace(
    r'\bPresident\s+Donald\s+Trump\b|'  
    r'\bPresident\s+Trump\b|'         
    r'\bDonald\s+Trump(?:’s)?\b|'             
    r'\bTrump(?:’s)?\b|'   
    r'\bTRUMP(?:’S)?\b|'  
    r'\bFormer\s+President\s+Donald\s+Trump\b|' 
    r'\bDonald\s+J(?:ohn)?\s+Trump\b'               
    , 'Trump', regex=True) 

print("Occurrences of different variations of Trump's name:")
for variation, count in trump_variation_counts.items():
    print(f"{variation}: {count}")

Occurrences of different variations of Trump's name:
President Donald Trump: 115
Trump’s: 671
Trump: 2486
Former President Donald Trump: 64
President Trump: 105
Donald Trump: 151
Donald Trump’s: 31
Donald Trump: 1
TRUMP: 40
TRUMP’S: 2
President Trump: 1


In [32]:
# Count occurrences of 'Trump' after replacement
trump_count_after = df['content'].str.count('Trump').sum()

print("Occurrences of Trump after replacement:", trump_count_after)

Occurrences of Trump after replacement: 3677


### Data Preprocessing Pipeline

Next, we remove stop words and punctuation, and then we tokenize and prepare data for use in the model. 

In [33]:
punctuation = set(punctuation) # speeds up comparison
sw = stopwords.words("english")
extra_sw = ['cnn', 'fox', 'news', 'said', '–', '-', '--', '—','told', 'would', '…read', 'get', 'could', 
            'also', "it’s", 'think', 'time', 'even', 'former', 'party', 'i', '“i', 'she’s', 'says', 
            'images', 'getty', 'im', 'this', 'we', 'it', 'digital', 'the', 'that', 'story', 'doesn']
sw.extend(extra_sw)
whitespace_pattern = re.compile(r"\s+")

def remove_stop(tokens) :
    
    return [t for t in tokens if t.lower() not in sw]

def remove_punctuation(text, punct_set=punctuation) : 
    
    return("".join([ch for ch in text if ch not in punct_set]))

def tokenize(text) : 
    
    return re.split(whitespace_pattern, text)

def prepare(text, pipeline) : 
    
    tokens = str(text)
    
    for transform in pipeline : 
        tokens = transform(tokens)
        
    return(tokens)

pipeline = [str.lower, remove_punctuation, tokenize, remove_stop]

In [34]:
# Tokenize and preprocess each row
df['tokens'] = df['content'].apply(lambda x: prepare(x, pipeline=pipeline))

# Print the resulting dataframe
df.head()

Unnamed: 0,source,url,content,tokens
0,cnn,https://www.cnn.com/2024/02/12/politics/cq-bro...,Chairman of the Joint Chiefs of Staff Gen. CQ ...,"[chairman, joint, chiefs, staff, gen, cq, brow..."
1,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Trump has endorsed North Carolina Republican P...,"[trump, endorsed, north, carolina, republican,..."
2,cnn,https://www.cnn.com/2024/02/12/politics/senate...,The Senate is inching closer to final passage ...,"[senate, inching, closer, final, passage, 953,..."
3,cnn,https://www.cnn.com/2024/02/12/politics/bidens...,Biden and King Abdullah II of Jordan met Monda...,"[biden, king, abdullah, ii, jordan, met, monda..."
4,cnn,https://www.cnn.com/2024/02/12/politics/trump-...,Trump on Monday asked the SupremeCourt to step...,"[trump, monday, asked, supremecourt, step, cha..."


## Basic Descriptive Statistics

Below we write a function that allows us to view the results of our preprocessed data from CNN and Fox News. We find the total words (tokens), unique words, total characters, lexical diversity, and most common words for each news organization.

In [35]:
def descriptive_stats(tokens, num_tokens = 50, verbose=True) :
    """
        Given a list of tokens, print number of tokens, number of unique tokens, 
        number of characters, lexical diversity (https://en.wikipedia.org/wiki/Lexical_diversity), 
        and num_tokens most common tokens. Return a list with the number of tokens, number
        of unique tokens, lexical diversity, and number of characters. 
    
    """
    num_tokens = len(tokens)
    num_unique_tokens = len(set(tokens))  
    lexical_diversity = num_unique_tokens / num_tokens
    num_characters = sum(len(s) for s in tokens)
    
    if verbose :        
        print(f"There are {num_tokens} tokens in the data.")
        print(f"There are {num_unique_tokens} unique tokens in the data.")
        print(f"There are {num_characters} characters in the data.")
        print(f"The lexical diversity is {lexical_diversity:.3f} in the data.")        
        print (f"The ten most common words are:")
        print(Counter(tokens).most_common(10))
        
    return([num_tokens, num_unique_tokens,
            lexical_diversity,
            num_characters])

In [36]:
# calls to descriptive_stats here

print("CNN News Stats\n")

descriptive_stats(
    [token for tokens in df[df['source'] == 'cnn']['tokens']for token in tokens])

print('\n')
print("FoxNews Stats\n")

descriptive_stats(
    [token for tokens in df[df['source'] == 'foxnews']['tokens']for token in tokens])

CNN News Stats

There are 141795 tokens in the data.
There are 11384 unique tokens in the data.
There are 932028 characters in the data.
The lexical diversity is 0.080 in the data.
The ten most common words are:
[('trump', 3003), ('biden', 1599), ('republican', 1047), ('election', 869), ('democrat', 823), ('president', 800), ('house', 735), ('us', 731), ('senate', 527), ('case', 487)]


FoxNews Stats

There are 50972 tokens in the data.
There are 7478 unique tokens in the data.
There are 331598 characters in the data.
The lexical diversity is 0.147 in the data.
The ten most common words are:
[('biden', 924), ('house', 476), ('trump', 443), ('president', 282), ('republican', 272), ('democrat', 242), ('us', 221), ('special', 213), ('bobulinski', 200), ('senate', 196)]


[50972, 7478, 0.1467079965471239, 331598]

## Saving the Data

Below we create a csv file to use for modeling.

In [37]:
# save df for next step

df.to_csv('MSADS509_News_Project_Dataset/cleaned.csv', index=False)