In [291]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import nltk
nltk.download('all')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize

from nltk.stem import WordNetLemmatizer


[nltk_data] Error loading all: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1000)>


In [85]:
def get_author_and_date(url_series):
    result = []
    for url in url_series:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        article_date = soup.select('div[data-testid="article-body-timestamp"] time')[0].contents[0]
        article_author = soup.select('span.byline-name')[0].text
        result.append((article_author, article_date))
    return result

In [86]:
def get_NBC(limit=None):
    url = 'https://www.nbcnews.com/Israel-Hamas-war-Gaza-Strip-conflict'
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_NBC = pd.DataFrame(columns=['headline', 'description', 'url'])
        article_wrappers = soup.find_all('div', class_='wide-tease-item__info-wrapper')
        headlines_NBC = [article.find('h2', class_='wide-tease-item__headline') for article in article_wrappers]
        df_NBC['headline'] = [headline_NBC.text for headline_NBC in headlines_NBC]
        df_NBC['description'] = [article.find('div', class_='wide-tease-item__description').text for article in article_wrappers]
        df_NBC['url'] = [headline.parent['href'] for headline in headlines_NBC]
        authors_and_dates = get_author_and_date(df_NBC['url'])
        df_NBC['author'] = [author_and_date[0] for author_and_date in authors_and_dates]
        df_NBC['date'] = [author_and_date[1] for author_and_date in authors_and_dates]
        
        if limit:
            df_NBC = df_NBC.head(limit)
        return df_NBC
    else:
        print("Error: Response code", response.status_code)

In [87]:
results = get_NBC()
print(results)

                                             headline  \
0   Hamas releases video of Israeli hostage Hersh ...   
1   Aid into Gaza increased after Israel killed 7 ...   
2   GOP senator bashes Marjorie Taylor Greene: 'Sh...   
3   Biden signs Ukraine aid, Israel funding and Ti...   
4   Orphaned by an airstrike and saved from her de...   
5   Deadline for deal to end Columbia protest enca...   
6   Nancy Pelosi says Israel's Netanyahu 'should r...   
7   Satellite photos suggest Iran air defense rada...   
8   People taken into custody at NYU as pro-Palest...   
9   Police arrest pro-Palestinian supporters at en...   
10  Columbia holds classes virtually as Jewish lea...   

                                          description  \
0   The Israeli American 23-year-old angrily asks ...   
1   The increase is “nothing that we’re celebratin...   
2   Sen. Thom Tillis said Greene was damaging the ...   
3   The Senate on Tuesday passed the package of fo...   
4   “I don’t know if we should

In [292]:
# Function to perform sentiment analysis
def analyze_sentiment(text):
    sid = SentimentIntensityAnalyzer()
    scores = sid.polarity_scores(text)
    if scores['compound'] >= 0.05:
        return 'Positive'
    elif scores['compound'] <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# Function to get data from NBC News and perform sentiment analysis
def get_NBC_sentiment(url, limit=None):
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_NBC = pd.DataFrame(columns=['headline', 'description', 'url'])
        article_wrappers = soup.find_all('div', class_='wide-tease-item__info-wrapper')
        headlines_NBC = [article.find('h2', class_='wide-tease-item__headline') for article in article_wrappers]
        descriptions_NBC = [article.find('div', class_='wide-tease-item__description').text for article in article_wrappers]
        urls_NBC = [headline.parent['href'] for headline in headlines_NBC]
        
        # Analyze sentiment for each description
        sentiments = [analyze_sentiment(description) for description in descriptions_NBC]

        # Create DataFrame
        df_NBC['headline'] = [headline.text for headline in headlines_NBC]
        df_NBC['description'] = descriptions_NBC
        df_NBC['url'] = urls_NBC
        df_NBC['sentiment'] = sentiments
        
        if limit:
            df_NBC = df_NBC.head(limit)
        
        return df_NBC
    else:
        print("Error: Response code", response.status_code)
        return None

# Example usage
url = 'https://www.nbcnews.com/Israel-Hamas-war-Gaza-Strip-conflict'
df_NBC_sentiment = get_NBC_sentiment(url, limit=5)
print(df_NBC_sentiment)

                                            headline  \
0  Campus protests live updates: Camps cleared ou...   
1  Patriots owner says 'Jew hatred' on U.S. colle...   
2  Orphaned by an airstrike and saved from her de...   
3  U.S. and 17 other countries demand that Hamas ...   
4  Share of college students blaming Hamas for Oc...   

                                         description  \
0  Columbia University said earlier in the day th...   
1  The longtime Columbia University supporter als...   
2  Her miraculous birth brought brief hope to her...   
3  The Hamas leader in Gaza has so far rejected a...   
4  The single biggest share of college students (...   

                                                 url sentiment  
0  https://www.nbcnews.com/news/us-news/live-blog...  Negative  
1  https://www.nbcnews.com/business/business-news...  Positive  
2  https://www.nbcnews.com/news/world/israel-hama...  Positive  
3  https://www.nbcnews.com/politics/national-secu...  Negative  
4

In [36]:
#Ukraine
def get_NBC_Ukraine(limit=None):
    url = 'https://www.nbcnews.com/world/russia-ukraine-news'
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_NBC_Ukraine = pd.DataFrame(columns=['headline', 'description', 'url'])
        article_wrappers = soup.find_all('div', class_='wide-tease-item__info-wrapper')
        headlines_NBC = [article.find('h2', class_='wide-tease-item__headline') for article in article_wrappers]
        df_NBC_Ukraine['headline'] = [headline_NBC.text for headline_NBC in headlines_NBC]
        df_NBC_Ukraine['description'] = [article.find('div', class_='wide-tease-item__description').text for article in article_wrappers]
        df_NBC_Ukraine['url'] = [headline.parent['href'] for headline in headlines_NBC]
        authors_and_dates = get_author_and_date(df_NBC_Ukraine['url'])
        df_NBC_Ukraine['author'] = [author_and_date[0] for author_and_date in authors_and_dates]
        df_NBC_Ukraine['date'] = [author_and_date[1] for author_and_date in authors_and_dates]
        
        if limit:
            df_NBC_Ukraine = df_NBC_Ukraine.head(limit)
        print(df_NBC_Ukraine)
        return df_NBC_Ukraine
        
    else:
        print("Error: Response code", response.status_code)
    
results = get_NBC_Ukraine()

                                             headline  \
0   House passes critical aid to Ukraine, Israel a...   
1   Congress passes bill that could unlock billion...   
2   House advances Ukraine and Israel aid with rar...   
3   Man suspected of aiding Zelenskyy assassinatio...   
4   Germany arrests men accused of spying on U.S. ...   
5   House Republicans release aid bills for Israel...   
6   Ukraine sees an 'extremely strange' double sta...   
7   Mike Johnson unveils plan to move Israel, Ukra...   
8   Mike Johnson signals he won't put Senate suppl...   
9   Luxury yachts and other myths: How Republican ...   
10  Russian military trainers arrive in Niger as r...   

                                          description  \
0   The bills amount to a $95 billion aid package ...   
1   More than $6 billion of the $300 billion in fr...   
2   The foreign aid package moved forward with the...   
3   If convicted, the man could face up to eight y...   
4   Prosecutors accuse the two

In [295]:
# Performing the sentiment analysis
def analyze_sentiment(text):
    sid = SentimentIntensityAnalyzer()
    scores = sid.polarity_scores(text)
    if scores['compound'] >= 0.05:
        return 'Positive'
    elif scores['compound'] <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

#  getting data from NBC News and perform sentiment analysis
def get_NBC_Ukraine_sentiment(url, limit=None):
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_NBC = pd.DataFrame(columns=['headline', 'description', 'url'])
        article_wrappers = soup.find_all('div', class_='wide-tease-item__info-wrapper')
        headlines_NBC = [article.find('h2', class_='wide-tease-item__headline') for article in article_wrappers]
        descriptions_NBC = [article.find('div', class_='wide-tease-item__description').text for article in article_wrappers]
        urls_NBC = [headline.parent['href'] for headline in headlines_NBC]
        
        # Analyze sentiment for each description
        sentiments = [analyze_sentiment(description) for description in descriptions_NBC]

        # DataFrame
        df_NBC['headline'] = [headline.text for headline in headlines_NBC]
        df_NBC['description'] = descriptions_NBC
        df_NBC['url'] = urls_NBC
        df_NBC['sentiment'] = sentiments
        
        if limit:
            df_NBC = df_NBC.head(limit)
        
        return df_NBC
    else:
        print("Error: Response code", response.status_code)
        return None

# Example usage
url = 'https://www.nbcnews.com/world/russia-ukraine-news'
df_NBC_Ukraine_sentiment = get_NBC_Ukraine_sentiment(url, limit=5)
print(df_NBC_Ukraine_sentiment)

                                            headline  \
0  New U.S. military aid is finally heading to Uk...   
1  Russia detains ally of Defense Minister Shoigu...   
2  Mitch McConnell says Tucker Carlson and Trump'...   
3  They were promised jobs in Russia. They ended ...   
4  Rep. Marjorie Taylor Greene says Speaker Mike ...   

                                         description  \
0  For many in Kyiv, the news that the aid packag...   
1  Deputy Defense Minister Timur Ivanov was accus...   
2  The Senate Republican leader pointed to Carlso...   
3  Frustrated families of Indian migrant workers ...   
4  Greene indicated she’ll move forward with trig...   

                                                 url sentiment  
0  https://www.nbcnews.com/news/world/us-military...  Negative  
1  https://www.nbcnews.com/news/world/russia-ukra...  Positive  
2  https://www.nbcnews.com/politics/congress/mcco...   Neutral  
3  https://www.nbcnews.com/news/world/russia-ukra...  Positive  
4

In [42]:
def get_author_and_date_NYT(url_series):
    result = []
    for url in url_series:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        # Extracting article date
        article_date_element = soup.select('div.css-agrsgss')
        article_date = article_date_element[0].contents[0] if article_date_element else None
        # Extracting article author
        article_author_element = soup.select('div.css-1i4y2t3')
        article_author = article_author_element[0].text if article_author_element else None
        result.append((article_author, article_date))
    return result

In [72]:
def get_NYT_IsraelPalestine(limit=None):
    url = 'https://www.nytimes.com/news-event/israel-hamas-gaza'
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_NYT_IsraelPalestine = pd.DataFrame(columns=['headline', 'description', 'url'])
        article_wrappers = soup.find_all('article', class_='css-1l4spti')
        headlines_NYTIsraelPalestine = [article.find('h3', class_='css-1kv6qi') for article in article_wrappers]
        dateNYTs = soup.find("span",{"data-testid": "todays-date"})
        df_NYT_IsraelPalestine['headline'] = [headline_NYT.text.strip() if headline_NYT else None for headline_NYT in headlines_NYTIsraelPalestine]
        df_NYT_IsraelPalestine['description'] = [article.find('p', class_='css-1pga48a').text.strip() if article.find('p', class_='css-1pga48a') else None for article in article_wrappers]
        df_NYT_IsraelPalestine['url'] = [headline.parent['href'] for headline in headlines_NYTIsraelPalestine]
        #df_NYT_IsraelPalestine['date'] = [dateNYTs.text.strip() if dateNYTs else None]
        # authors_and_dates = get_author_and_date(df_NYT_IsraelPalestine['url'])
        # df_NYT_IsraelPalestine['author'] = [author_and_date[0] for author_and_date in authors_and_dates]
        # df_NYT_IsraelPalestine['date'] = [author_and_date[1] for author_and_date in authors_and_dates]
        if limit:
            df_NYT_IsraelPalestine = df_NYT_IsraelPalestine.head(limit)
        print(df_NYT_IsraelPalestine)
        return df_NYT_IsraelPalestine
    else:
        print("Error: Response code", response.status_code)

get_NYT_IsraelPalestine()

                                            headline  \
0  Pro-Palestinian Encampments Surface at Campuse...   
1  Biden Says Aid Bill for Ukraine and Israel Wil...   
2  The Venice Biennale and the Art of Turning Bac...   
3         Why Is This Seder Unlike All Other Seders?   
4  Robert Kraft’s Fight Against Antisemitism Bega...   
5  ‘A Good Day for World Peace’: Biden Signs Aid ...   
6            Senate Passes $95.3 Billion Aid Package   
7  Seder Is About Family, Food, Freedom. And Now,...   
8  Summer Lee, ‘Squad’ Member, Wins Democratic Ho...   
9  At Brooklyn Seder Protest, Jewish New Yorkers ...   

                                         description  \
0  Many students have been demanding that their s...   
1  President Biden signed an aid package for Ukra...   
2  Every art institution now speaks of progress, ...   
3  The dinner parties held by Shtick, a pop-up se...   
4  The New England Patriots owner said this week ...   
5  The $95.3 billion measure comes after months

Unnamed: 0,headline,description,url
0,Pro-Palestinian Encampments Surface at Campuse...,Many students have been demanding that their s...,/2024/04/24/us/pro-palestinian-encampments-pro...
1,Biden Says Aid Bill for Ukraine and Israel Wil...,President Biden signed an aid package for Ukra...,/video/us/politics/100000009431377/biden-ukrai...
2,The Venice Biennale and the Art of Turning Bac...,"Every art institution now speaks of progress, ...",/2024/04/24/arts/design/venice-biennale-review...
3,Why Is This Seder Unlike All Other Seders?,"The dinner parties held by Shtick, a pop-up se...",/2024/04/24/style/passover-seder-shtick.html
4,Robert Kraft’s Fight Against Antisemitism Bega...,The New England Patriots owner said this week ...,/2024/04/24/business/robert-kraft-donation-col...
5,‘A Good Day for World Peace’: Biden Signs Aid ...,The $95.3 billion measure comes after months o...,/2024/04/24/us/politics/biden-ukraine-israel-a...
6,Senate Passes $95.3 Billion Aid Package,The bipartisan bill includes $60.8 billion for...,/video/us/politics/100000009430626/senate-bipa...
7,"Seder Is About Family, Food, Freedom. And Now,...","At Passover Seders, many families addressed th...",/2024/04/23/nyregion/seder-israel-gaza-war.html
8,"Summer Lee, ‘Squad’ Member, Wins Democratic Ho...","A first-term representative, Ms. Lee defeated ...",/2024/04/23/us/politics/lee-pennsylvania-prima...
9,"At Brooklyn Seder Protest, Jewish New Yorkers ...",Approximately 200 were arrested after pro-Pale...,/2024/04/23/nyregion/schumer-seder-protest-bro...


In [73]:
#NYT Ukraine_Russia
def get_NYT_UkraineRussia(limit=None):
    url = 'https://www.nytimes.com/news-event/ukraine-russia'
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_NYT_Ukraine_Russia = pd.DataFrame(columns=['headline', 'description', 'url'])
        article_wrappers = soup.find_all('article', class_='css-1l4spti')
        headlines_NYTUkraineRussia = [article.find('h3', class_='css-1kv6qi') for article in article_wrappers]
        df_NYT_Ukraine_Russia['headline'] = [headline_NYT.text if headline_NYT else None for headline_NYT in headlines_NYTUkraineRussia]
        #df_NYT_IsraelPalestine['headline'] = [headline_NYT.text for headline_NYT in headlines_NYTIsraelPalestine]
        df_NYT_Ukraine_Russia['description'] = [article.find('p', class_='css-1pga48a').text.strip() if article.find('p', class_='css-1pga48a') else None for article in article_wrappers]
        #df_NYT_IsraelPalestine['description'] = [article.find('p', class_='css-1n8orw4').text for article in article_wrappers]
        df_NYT_Ukraine_Russia['url'] = [headline.parent['href'] for headline in headlines_NYTUkraineRussia]
        # authors_and_dates = get_author_and_date(df_NYT_Ukraine_Russia['url'])
        # df_NYT_Ukraine_Russia['author'] = [author_and_date[0] for author_and_date in authors_and_dates]
        # df_NYT_Ukraine_Russia['date'] = [author_and_date[1] for author_and_date in authors_and_dates]
        
        if limit:
            df_NYT_Ukraine_Russia = df_NYT_Ukraine_Russia.head(limit)
        print(df_NYT_Ukraine_Russia)
        return df_NYT_Ukraine_Russia
        
    else:
        print("Error: Response code", response.status_code)

get_NYT_UkraineRussia()

                                            headline  \
0                    Ukraine’s Race to Hold the Line   
1  Russian Attacks Crush Factories and Way of Lif...   
2  Ukraine Aid Divides Republicans, After Trump T...   
3  U.S. Military Aid for Ukraine Could Soon Flow ...   
4     Do Tanks Have a Place in 21st-Century Warfare?   
5  Mariinsky Dancers Barred From Youth Ballet Gal...   
6  Ukrainians Wait, Nervously, to See if U.S. Wil...   
7  Johnson Turns to Democrats to Bring Up Ukraine...   
8  Germany Arrests 2 in Spying and Sabotage Case ...   
9  Ukraine’s Bold Biennale Show, Two Years Into t...   

                                         description  \
0  The days of lightning battlefield breakthrough...   
1  The assaults have all but destroyed the factor...   
2  His most vocal allies in the House, however, w...   
3  What would $60 billion buy? Lots of air-defens...   
4  As explosive drones gain battlefield prominenc...   
5  Two dancers from the Russian company were se

Unnamed: 0,headline,description,url
0,Ukraine’s Race to Hold the Line,The days of lightning battlefield breakthrough...,/interactive/2024/04/22/world/europe/ukraine-w...
1,Russian Attacks Crush Factories and Way of Lif...,The assaults have all but destroyed the factor...,/2024/04/21/world/europe/ukraine-industry-donb...
2,"Ukraine Aid Divides Republicans, After Trump T...","His most vocal allies in the House, however, w...",/2024/04/20/us/politics/trump-ukraine-house-vo...
3,U.S. Military Aid for Ukraine Could Soon Flow ...,What would $60 billion buy? Lots of air-defens...,/2024/04/20/us/politics/weapons-aid-ukraine-mi...
4,Do Tanks Have a Place in 21st-Century Warfare?,As explosive drones gain battlefield prominenc...,/2024/04/20/world/europe/tanks-ukraine-drones-...
5,Mariinsky Dancers Barred From Youth Ballet Gal...,Two dancers from the Russian company were set ...,/2024/04/19/arts/dance/mariinsky-dancers-barre...
6,"Ukrainians Wait, Nervously, to See if U.S. Wil...","From the battlefield to battered cities, soldi...",/2024/04/19/world/europe/ukraine-aid-congress....
7,Johnson Turns to Democrats to Bring Up Ukraine...,"Democrats, who stepped in late Thursday to sav...",/2024/04/18/us/politics/democrats-vote-ukraine...
8,Germany Arrests 2 in Spying and Sabotage Case ...,"The two men, dual citizens of both countries, ...",/2024/04/18/world/europe/germany-russia-spying...
9,"Ukraine’s Bold Biennale Show, Two Years Into t...","At the art world’s seminal event, national pav...",/2024/04/18/arts/design/venice-biennale-ukrain...


In [None]:
def analyze_sentiment(text):
    sid = SentimentIntensityAnalyzer()
    sentiment_scores = sid.polarity_scores(text)
    return sentiment_scores['compound']

In [213]:
#get author and date CNN
def get_author_and_date_CNN(main_url):
    main_url = 'https://www.cnn.com/world/middleeast/israel'
    anchor_class = 'container__link'
    response = requests.get(main_url)
    soup = BeautifulSoup(response.content, 'html.parser')
    links = soup.find_all('a', href=True)
    for link in links:
        link['class'] = anchor_class
    authors = {}
    for link in links:
        url = link['href']
        linked_page_response = requests.get(url)
        linked_page_soup = BeautifulSoup(linked_page_response.content, 'html.parser')
        
        # Extract author information from the linked page
        author_element = linked_page_soup.find('span', class_='byline__name')
        if author_element:
            author = author_element.text.strip()
            authors['url'] = author
    print(authors)
    return authors
get_author_and_date_CNN() 

    

TypeError: get_author_and_date_CNN() missing 1 required positional argument: 'main_url'

In [93]:
def get_author_and_date_CNN(url_series):
    result = []
    for url in url_series:
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        div_timestamp = soup.select('div.timestamp')[0]
        article_date = div_timestamp['data-uri'].split('/')[-1]
        article_author = soup.select('span.byline__name')[0].text
        result.append((article_author, article_date))
    return result
urls = ['https://www.cnn.com/2024/04/23/politics/senate-vote-foreign-aid/index.html', 'https://www.cnn.com/2024/04/24/middleeast/israeli-american-hostage-hersh-goldberg-intl/index.html']
author_and_date = get_author_and_date_CNN(urls)
print(author_and_date)
# <div class="timestamp" data-uri="cms.cnn.com/_components/timestamp/instances/clvb5ojq8000m07pa39dw40ek@published" data-editable="settings">


[('Morgan Rimmer', 'clvb5ojq8000m07pa39dw40ek@published'), ('Eyad Kourdi', 'clvdyf3id000m3np9e2ns5jnx@published')]


In [270]:
def get_CNN_Israel(limit=None):
    url = 'https://www.cnn.com/world/middleeast/israel'
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_CNN_Israel = pd.DataFrame(columns=['headline', 'description', 'urls', 'author_and_date']) 
        article_wrappers = soup.find_all('a', class_='container___link')
        # df_CNN_Israel['headline'] = soup.find('span', class_='container__headline-text')
        # headlines = [headline.text.strip() for headline in headlines_CNN]
        df_CNN_Israel['headline'] = [article.find('div', class_='container__headline').text.strip() for article in article_wrappers]  
        df_CNN_Israel['urls'] = [link['href'] for link in soup.find_all('div', class_='zone__items')[0].find_all('a', href=True)]
        # authors_and_dates = soup.find_all('div.class="byline__names') #get_author_and_date_CNN(df_CNN_Israel['url'])
        # if limit is not None:
        #     df_CNN_Israel = df_CNN_Israel.head(limit)
        
        return df_CNN_Israel

        # #headlines_CNN_Israel['url'] = [headline.parent['href'] for headline in headlines_CNN_Israel]
        # df_CNN_Israel['url'] = [headline.parent["a", "href"] for headline in headlines_CNN_Israel]
    #print(authors_and_dates)
    
    df_CNN_Israel()
    print(df_CNN_Israel)

In [271]:
get_CNN_Israel()


Unnamed: 0,headline,description,urls,author_and_date
0,,,/2024/04/25/middleeast/hamas-officials-say-gro...,
1,,,/2024/04/25/middleeast/hamas-officials-say-gro...,
2,,,/2024/04/24/middleeast/israeli-american-hostag...,
3,,,/2024/04/24/middleeast/israeli-american-hostag...,
4,,,/2024/04/25/politics/donald-trump-charlottesvi...,
5,,,/2024/04/25/politics/donald-trump-charlottesvi...,
6,,,/middleeast/live-news/israel-hamas-war-gaza-ne...,
7,,,/middleeast/live-news/israel-hamas-war-gaza-ne...,
8,,,/2024/04/24/politics/trump-looms-large-over-bi...,
9,,,/2024/04/24/politics/trump-looms-large-over-bi...,


In [272]:
# def get_CNN_Israel(limit=None):
#     url = "https://www.cnn.com/world/middleeast/israel"
#     response = requests.get(url)
#     if response.status_code == 200:
#         html = response.text
#         soup = BeautifulSoup(html, 'html.parser')
#         headlines_CNN = soup.find_all('span', class_='container__headline-text')
#         headlines = [headline.text.strip() for headline in headlines_CNN]
#         url = 
#         df_CNN_Israel = pd.DataFrame(columns=['headline', 'description', 'url', 'author_and_date'])
#         if limit:
#             headlines_CNN = headlines_CNN[:limit]
#         return headlines
#     else:
#         print("Error: Response code", response.status_code)
#     #print(df_CNN_Israel)
#     return []
        
# get_CNN_Israel()

SyntaxError: invalid syntax (992184547.py, line 9)

In [210]:
def get_CNN_Israel(limit=None):
    url = "https://www.cnn.com/world/middleeast/israel"
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        headlines_CNN = soup.find_all('span', class_='container__headline-text')
        headlines = [headline.text.strip() for headline in headlines_CNN]
        if limit:
            headlines = headlines[:limit]
        df_CNN_Israel = pd.DataFrame({'headline': headlines})
        return df_CNN_Israel
    else:
        print("Error: Response code", response.status_code)
        return pd.DataFrame()  # Return an empty DataFrame if there's an error

df = get_CNN_Israel()
print(df)

                                             headline
0   Hamas officials say group willing to disarm if...
1   Hamas releases video of hostage Hersh Goldberg...
2   Trump downplays deadly Charlottesville rally a...
3                   April 24, 2024 - Israel-Hamas war
4   Trump looms large over Biden’s long-shot talks...
5   Biden signs foreign aid bill providing crucial...
6   How the White House convinced Mike Johnson to ...
7   UN demands investigation after mass graves fou...
8   Here’s what’s in the foreign aid package for U...
9   How each US senator voted on the $95 billion f...
10                 6 surprises from a major news week
11  Washington wrestles with ‘new equation’ of dir...
12  Israeli and Iranian strikes transform Middle E...
13  US imposes sanctions on two groups for fundrai...
14  Police arrest pro-Palestinian protesters at Em...
15  Police arrest pro-Palestinian protesters at Em...
16  Police arrest pro-Palestinian protesters at Em...
17  Police arrest pro-Palest

In [215]:
def get_CNN_Ukraine(limit=None):
    url = "https://www.cnn.com/world/middleeast/israel"
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        headlines_CNN = soup.find_all('span', class_='container__headline-text')
        headlines = [headline.text.strip() for headline in headlines_CNN]
        if limit:
            headlines = headlines[:limit]
        df_CNN_Ukraine= pd.DataFrame({'headline': headlines})
        return df_CNN_Ukraine
    else:
        print("Error: Response code", response.status_code)
        return pd.DataFrame()  # Return an empty DataFrame if there's an error

df = get_CNN_Israel()
print(df)

['Hamas officials say group willing to disarm if Palestinian state is established', 'Hamas releases video of hostage Hersh Goldberg-Polin in proof he survived Oct. 7 injuries', 'Trump downplays deadly Charlottesville rally as a ‘peanut’ compared to Israel-Gaza protests', 'April 24, 2024 - Israel-Hamas war', 'Trump looms large over Biden’s long-shot talks on Israel-Saudi normalization', 'Biden signs foreign aid bill providing crucial military assistance to Ukraine', 'How the White House convinced Mike Johnson to back Ukraine aid', 'UN demands investigation after mass graves found at Gaza hospitals raided by Israel', 'Here’s what’s in the foreign aid package for\xa0Ukraine, Israel', 'How each US senator voted on the $95 billion foreign aid package', '6 surprises from a major news week', 'Washington wrestles with ‘new equation’ of direct attacks between Iran and Israel', 'Israeli and Iranian strikes transform\xa0Middle East geopolitics', 'US imposes sanctions on two groups for fundraising

In [287]:
def get_CNN_Ukraine(limit=None):
    url = 'https://www.cnn.com/world/europe/ukraine'
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        df_CNN_Ukraine = pd.DataFrame(columns=['headline', 'description', 'urls', 'author_and_date']) 
        article_wrappers = soup.find_all('a', class_='container___link')
        headlines_CNN = [article.find('div', class_='container__text').text.strip() for article in article_wrappers]
        df_CNN_Ukraine['headline'] = [headline_CNN.text if headlines_CNN else None for headline_CNN in headlines_CNN]
        df_CNN_Ukraine['urls'] = [link['href'] for link in soup.find_all('div', class_='zone__items')[0].find_all('a', href=True)]
        
    return df_CNN_Ukraine
    
# df_CNN_Ukraine()
# print(df_CNN_Ukraine)
# # [article.find('h3', class_='css-1kv6qi') for article in article_wrappers]
#         df_NYT_Ukraine_Russia['headline'] = [headline_NYT.text if headline_NYT else None for headline_NYT in headlines_NYTUkraineRussia]

In [288]:
get_CNN_Ukraine()

Unnamed: 0,headline,description,urls,author_and_date
0,,,/2024/04/24/europe/russian-minister-timur-ivan...,
1,,,/2024/04/24/europe/russian-minister-timur-ivan...,
2,,,/2024/04/24/politics/us-secretly-sent-long-ran...,
3,,,/2024/04/24/politics/us-secretly-sent-long-ran...,
4,,,/2024/04/24/world/ukraine-army-military-aid-in...,
5,,,/2024/04/24/world/ukraine-army-military-aid-in...,
6,,,/2024/04/23/europe/ukraine-consulates-mobiliza...,
7,,,/2024/04/23/europe/ukraine-consulates-mobiliza...,
8,,,/2024/04/24/politics/biden-signs-foreign-aid-b...,
9,,,/2024/04/24/politics/biden-signs-foreign-aid-b...,


In [283]:
df_CNN_Ukraine = get_CNN_Ukraine()
print(df_CNN_Ukraine)

    headline description                                               urls  \
0        NaN         NaN  /2024/04/24/europe/russian-minister-timur-ivan...   
1        NaN         NaN  /2024/04/24/europe/russian-minister-timur-ivan...   
2        NaN         NaN  /2024/04/24/politics/us-secretly-sent-long-ran...   
3        NaN         NaN  /2024/04/24/politics/us-secretly-sent-long-ran...   
4        NaN         NaN  /2024/04/24/world/ukraine-army-military-aid-in...   
5        NaN         NaN  /2024/04/24/world/ukraine-army-military-aid-in...   
6        NaN         NaN  /2024/04/23/europe/ukraine-consulates-mobiliza...   
7        NaN         NaN  /2024/04/23/europe/ukraine-consulates-mobiliza...   
8        NaN         NaN  /2024/04/24/politics/biden-signs-foreign-aid-b...   
9        NaN         NaN  /2024/04/24/politics/biden-signs-foreign-aid-b...   
10       NaN         NaN  /2024/04/24/politics/white-house-ukraine-fundi...   
11       NaN         NaN  /2024/04/24/politics/white