In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import datetime

headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
    }

def get_feed_article_titles_df(feedname,url):
    """
    Get article titles and create a DataFrame.
    
    Args:
        feedname (str): Name of the feed.
        url (str): URL of the XML feed.
    
    Returns:
        pandas.DataFrame: DataFrame containing article titles and feed name.
    """
    try:
        result = requests.get(url, headers=headers)

        soup = BeautifulSoup(result.text, "xml")        
        article_urls = [i.text for i in soup.findAll('link')]

        #The verge has the links in the id tag, if the list is empty with the link tag, try the id tag
        if len([item for item in article_urls if bool(item)])  == 0: 
            article_urls = [i.text for i in soup.findAll('id')]      
        
       
        #Parse it as html to get the links correctly, other wise In some websites, <media:title> is also returned as a link
        soup = BeautifulSoup(result.text, "html.parser")
        article_titles = [i.text for i in soup.findAll('title')]      
        
        df = pd.DataFrame({'Article_title': article_titles, 'Article_URL': article_urls[-len(article_titles):], 'Feedname': feedname})
        
        #Remove homepage from url list and empty url rows
        homepage = url.split('.com')[0] + '.com/'
        df = df[(df['Article_URL'] != homepage) & (df['Article_URL'] != '') ]        
        
        # Drop duplicate URLs
        df = df.drop_duplicates(subset=['Article_URL'], keep='first')

        df['Fetch_Date'] = str(datetime.datetime.now())

        return df

    except Exception as e:
        print("Error getting feed: ", e)
        return pd.DataFrame()

def get_article_text(url):
    try:
        result = requests.get(url[0][0], headers=headers)
        soup = BeautifulSoup(result.text, "html.parser")   
        return (soup.text)     
    except Exception as e:        
        try:
            result = requests.get(url[0], headers=headers)
            soup = BeautifulSoup(result.text, "html.parser")   
            return (soup.text)    
        except Exception as e:
            try:
                result = requests.get(url, headers=headers)
                soup = BeautifulSoup(result.text, "html.parser")   
                return (soup.text)    
            except Exception as e:
                print(e)
                return None


In [2]:
import sqlite3

db_name = 'RssFeeds.db'

def get_connection():
    """
    Establish a connection to a SQLite database.
    
    Args:
        db_name (str): Name of the SQLite database file.
    
    Returns:
        sqlite3.Connection: Connection object to the SQLite database.
    """
    try:
        con = sqlite3.connect(db_name)
        return con
    except sqlite3.Error as e:
        print("Error connecting to database: ", e)
        return None

def create_db():
    """
    Create a new SQLite database and execute the given query to create tables.
    
    Args:
        db_name (str): Name of the SQLite database file.
        query (str): SQL query to create tables in the database.
    
    Returns:
        sqlite3.Connection: Connection object to the SQLite database.
    """
    
    query = ["CREATE TABLE IF NOT EXISTS FEEDS( Feedname, Article_title UNIQUE, Article_URL, Duplicate, Fetch_Date, Summary)",
    "CREATE TABLE IF NOT EXISTS SUMMARY( Feedname, Article_URL,Summary)"]

    con = get_connection()
    
    if con is None:
        return None
    
    try:
        for item in query:
            cur = con.cursor()
            cur.execute(item)
            con.commit()
        #return con
    except sqlite3.Error as e:
        print("Error creating database: ", e)
        con.close()
        #return None
    
    print("DB created successfully")

def insert_to_db(data, query):
    """
    Insert data into SQLite database using executemany.
    
    Args:
        con (sqlite3.Connection): Connection object to the SQLite database.
        data (list of tuples): Data to be inserted into the database.
        query (str): SQL query for insertion.
    
    Returns:
        None
    """
    con = get_connection()

    if not data:
        print("No data to insert.")
        return
    
    try:
        cur = con.cursor()
        cur.executemany(query, data)
        con.commit()
    except sqlite3.Error as e:
        print("Error inserting data into database: ", e)
        con.rollback()

def insert_to_FEEDS(data):
    con = get_connection()

    if len(data) == 0:
        print("No data to insert.")
        return
    
    try:
        cur = con.cursor()

        query = "INSERT OR REPLACE INTO FEEDS(Article_title,Article_URL,Feedname,Fetch_Date) VALUES (?, ?, ?, ?)"

        cur.executemany(query, data)
        con.commit()
        con.close()
    except sqlite3.Error as e:
        print("Error inserting data into database: ", e)
        con.rollback()
        con.close()

def insert_to_FEEDS_with_summary(data):
    con = get_connection()

    if len(data) == 0:
        print("No data to insert.")
        return
    
    try:
        cur = con.cursor()

        query = "INSERT OR REPLACE INTO FEEDS(Feedname, Article_title,Article_URL,Duplicate,Fetch_Date,Summary) VALUES (?, ?, ?, ?, ?, ?)"

        cur.executemany(query, data)
        con.commit()
        con.close()
    except sqlite3.Error as e:
        print("Error inserting data into database: ", e)
        con.rollback()
        con.close()

def insert_to_Summary(data):
    con = get_connection()

    if len(data) == 0:
        print("No data to insert.")
        return
    
    try:
        cur = con.cursor()

        query = "INSERT OR REPLACE INTO SUMMARY(Feedname, Article_URL,Summary) VALUES (?, ?, ?)"

        cur.executemany(query, data)
        con.commit()
        con.close()
    except sqlite3.Error as e:
        print("Error inserting data into database: ", e)
        con.rollback()
        con.close()


def delete_from_db(tablename):
    """
    Delete data from SQLite database.
    
    Args:
        con (sqlite3.Connection): Connection object to the SQLite database.
        query (str): SQL query for deletion.
    
    Returns:
        None
    """
    con = get_connection()
    query = "DROP TABLE IF EXISTS " + tablename
    try:
        cur = con.cursor()
        cur.execute(query)
        con.commit()
    except sqlite3.Error as e:
        print("Error deleting data from database: ", e)
        con.rollback()


def query_db(query):
    """
    Execute a SQL query and fetch results from SQLite database.
    
    Args:
        con (sqlite3.Connection): Connection object to the SQLite database.
        query (str): SQL query to be executed.
    
    Returns:
        list of tuples: Result set fetched from the database.
    """
    con = get_connection()

    try:
        cur = con.cursor()
        cur.execute(query)
        return cur.fetchall()
    except sqlite3.Error as e:
        print("Error executing query: ", e)
        return []
    

In [3]:
from langchain_community.llms import HuggingFaceHub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI




def get_article_summary(article_text):
    # Define the LLM
    #llm = HuggingFaceHub(repo_id='tiiuae/falcon-7b-instruct', huggingfacehub_api_token=huggingfacehub_api_token)
    llm = OpenAI(temperature=0, openai_api_key=openai_api_key,model = "gpt-3.5-turbo-instruct")		

    # Create a chat prompt template
    prompt = PromptTemplate.from_template("You are an English major with good command of the language. You are able to \
        succintly summarize the meaning behind large bodies of text. Using these skills summarize the text: {article_text}")
    #output_parser = StrOutputParser()

    chain = prompt | llm #| output_parser
    try:
        result = chain.invoke({"article_text": article_text})
        return result
    except Exception as e:
        print(e)
        return None


In [60]:
feedlist={'Engadget':'https://www.engadget.com/rss.xml', 
          'The Verge':'https://www.theverge.com/rss/index.xml',
          'Techcrunch':'https://techcrunch.com/feed/',
          'Ars Technica':'https://feeds.arstechnica.com/arstechnica/index',
          'Jalopnik':'https://jalopnik.com/rss'}  

def refresh_feeds():
    """
    Refresh feeds by retrieving and inserting data for each feed in the feedlist.

    This function iterates over each feed in the feedlist, retrieves article data,
    and inserts it into the corresponding database table.

    Note:
    - The feedlist dictionary should contain feed names as keys and their URLs as values.
    - The `get_feed_articles_df` function is expected to return a DataFrame with article titles
      and URLs for a given feed.
    - The `insert_to_FEEDS` function is expected to insert the DataFrame values into the 
      corresponding database table.

    Example usage:
    refresh_feeds()

    """
    new_feeds_df = pd.DataFrame()

    for feed in feedlist:
        print('Getting and inserting data for', feed)
        if new_feeds_df.shape[0] == 0:
            new_feeds_df = get_feed_article_titles_df(feed, feedlist[feed])        
        else:
            new_feeds_df = pd.concat([new_feeds_df,get_feed_article_titles_df(feed, feedlist[feed])])


    #Get new articles and remove existing articles from the fetched feed list. Logic is below
    '''
        dfA=pd.DataFrame({'A':[1,2,3],'B':[2,3,4]}) # new data
        dfB=pd.DataFrame({'A':[1,6],'B':[2,7]}) # existing data

        print(dfA.head())
        dfA.set_index('A',inplace=True)
        dfB.set_index('A',inplace=True)
        newdf=dfA.drop(dfB.index,errors='ignore')
    '''
    query = "select Article_title, Article_URL, feedname, Fetch_date from FEEDS where feedname = " + "'Jalopnik'"
    existing_feeds_df = pd.read_sql(query,get_connection())
    
    df = new_feeds_df.set_index('Article_URL').drop(existing_feeds_df['Article_URL'], errors='ignore').reset_index(drop=False)


    # for feed in feedlist:
    #     print('Getting and inserting data for', feed)
    #     df = get_feed_article_titles_df(feed, feedlist[feed])        
    #     insert_to_FEEDS(df.values)
    insert_to_FEEDS(df.values)   
    
    print('Added ', len(df), 'new Articles')

    df = pd.read_sql("Select * from FEEDS", get_connection())
    df.to_csv('feeds.csv', index=False,mode='a')

def summarize_feeds_and_store_in_db(n=None):
  
  df = pd.read_sql("Select * from FEEDS where Summary is null", get_connection())
  print('There are ', len(df), 'feeds that need to be summarized')

  for i in range(len(df)):    
    if i == n:
        break

    url = df.iloc[i]['Article_URL']
    print('Fetching and summarizing ', url)

    try:
        article_text = get_article_text(url)
    except Exception as e:
        print("Error fetching the article text")
        print(e)
        continue

    try:
        summary = get_article_summary(article_text)
        df.loc[df['Article_URL'] == url,'Summary'] = summary.strip()  
    except Exception as e:
        print("Error getting article summary")
        print(e)
        continue 
  
  df1 = df[df['Summary'].notnull()]

  insert_to_FEEDS_with_summary(df1.values)

#   df2 = pd.read_sql("Select * from FEEDS ", get_connection())
#   df2.to_csv('feeds.csv', index=False)


In [5]:
create_db()

DB created successfully


In [66]:
refresh_feeds()
summarize_feeds_and_store_in_db(10)

Getting and inserting data for Engadget


  k = self.parse_starttag(i)
  k = self.parse_starttag(i)


Getting and inserting data for The Verge
Getting and inserting data for Techcrunch


  k = self.parse_starttag(i)
  k = self.parse_starttag(i)


Getting and inserting data for Ars Technica
Getting and inserting data for Jalopnik


  k = self.parse_starttag(i)


Added  132 new Articles
There are  384 feeds that need to be summarized
Fetching and summarizing  https://arstechnica.com/?p=2022821
Fetching and summarizing  https://arstechnica.com/?p=2022851
Fetching and summarizing  https://arstechnica.com/?p=2022820
Fetching and summarizing  https://arstechnica.com/?p=2022823
Fetching and summarizing  https://arstechnica.com/?p=2022790
Fetching and summarizing  https://arstechnica.com/?p=2022794
Fetching and summarizing  https://arstechnica.com/?p=2022789
Fetching and summarizing  https://arstechnica.com/?p=2022688
Fetching and summarizing  https://www.engadget.com/jack-dorsey-claims-bluesky-is-repeating-all-the-mistakes-he-made-at-twitter-234326121.html?src=rss
Fetching and summarizing  https://www.engadget.com/apple-apologizes-for-its-tone-deaf-ad-that-crushed-human-creativity-to-make-an-ipad-211116524.html?src=rss


In [67]:
df = pd.read_sql("Select * from FEEDS ", get_connection())

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 469 entries, 0 to 468
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Feedname       469 non-null    object
 1   Article_title  469 non-null    object
 2   Article_URL    469 non-null    object
 3   Duplicate      1 non-null      object
 4   Fetch_Date     469 non-null    object
 5   Summary        95 non-null     object
dtypes: object(6)
memory usage: 22.1+ KB


In [65]:
summarize_feeds_and_store_in_db(10)

There are  395 feeds that need to be summarized
Fetching and summarizing  https://www.theverge.com/2024/5/9/24152926/threads-view-count-feature-now-available
Fetching and summarizing  https://www.theverge.com/2024/5/9/24152918/maryland-kids-code-online-privacy-act-netchoice
Fetching and summarizing  https://www.theverge.com/24152765/lego-retro-radio-icons
Fetching and summarizing  https://www.theverge.com/2024/5/9/24152782/amazon-apple-ipad-deal-sale
Fetching and summarizing  https://techcrunch.com/2024/05/09/retell-ai-lets-companies-build-agents-to-answer-their-calls/
Fetching and summarizing  https://techcrunch.com/2024/05/09/tiktok-automatically-label-ai-generated-content-created-other-platforms/
Fetching and summarizing  https://techcrunch.com/2024/05/09/india-weighs-delaying-caps-on-upi-market-share-in-win-for-phonepe-google-pay/
Fetching and summarizing  https://techcrunch.com/2024/05/08/thai-food-delivery-app-line-man-wongnai-weighs-ipo-in-thailand-us-in-2025/
Fetching and summa

Experiments below

In [47]:
new_feeds_df = pd.DataFrame()

for feed in feedlist:
    print('Getting and inserting data for', feed)
    if new_feeds_df.shape[0] == 0:
        new_feeds_df = get_feed_article_titles_df(feed, feedlist[feed])        
    else:
        new_feeds_df = pd.concat([new_feeds_df,get_feed_article_titles_df(feed, feedlist[feed])])


#Get all articles from the DB, and remove existing articles from the fetched feed list. Logic is below
'''
    dfA=pd.DataFrame({'A':[1,2,3],'B':[2,3,4]}) # new data
    dfB=pd.DataFrame({'A':[1,6],'B':[2,7]}) # existing data

    print(dfA.head())
    dfA.set_index('A',inplace=True)
    dfB.set_index('A',inplace=True)
    newdf=dfA.drop(dfB.index,errors='ignore')
'''
query = "select Article_title, Article_URL, feedname, Fetch_date from FEEDS where feedname = " + "'Jalopnik'"
existing_feeds_df = pd.read_sql(query,get_connection())

df = new_feeds_df.set_index('Article_URL').drop(existing_feeds_df['Article_URL'], errors='ignore').reset_index(drop=False)
df.info()

Getting and inserting data for Engadget


  k = self.parse_starttag(i)
  k = self.parse_starttag(i)


Getting and inserting data for The Verge
Getting and inserting data for Techcrunch


  k = self.parse_starttag(i)
  k = self.parse_starttag(i)


Getting and inserting data for Ars Technica
Getting and inserting data for Jalopnik
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Article_URL    102 non-null    object
 1   Article_title  102 non-null    object
 2   Feedname       102 non-null    object
 3   Fetch_Date     102 non-null    object
dtypes: object(4)
memory usage: 3.3+ KB


  k = self.parse_starttag(i)


In [63]:
df = pd.read_sql("Select * from FEEDS ", get_connection())

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 469 entries, 0 to 468
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Feedname       469 non-null    object
 1   Article_title  469 non-null    object
 2   Article_URL    469 non-null    object
 3   Duplicate      1 non-null      object
 4   Fetch_Date     469 non-null    object
 5   Summary        74 non-null     object
dtypes: object(6)
memory usage: 22.1+ KB


In [64]:
df.to_csv('feeds.csv',index=False)

In [38]:
dfA=pd.DataFrame({'A':[1,2,3],'B':[2,3,4]}) # new data
dfB=pd.DataFrame({'A':[1,6],'B':[2,7]}) # existing data

print(dfA.head())
dfA.set_index('A',inplace=True)
dfB.set_index('A',inplace=True)
newdf=dfA.drop(dfB.index,errors='ignore')

newdf.index

   A  B
0  1  2
1  2  3
2  3  4


Index([2, 3], dtype='int64', name='A')

In [39]:
dfA.head()

Unnamed: 0_level_0,B
A,Unnamed: 1_level_1
1,2
2,3
3,4


In [40]:

print('Getting and inserting data for', 'Jalopnik')
new_feeds_df = get_feed_article_titles_df('Jalopnik', feedlist['Jalopnik'])        
new_feeds_df.info()
#insert_to_FEEDS(df.values)

# df = pd.read_sql("Select * from FEEDS", get_connection())
# df.to_csv('feeds.csv', index=False,mode='a')

   

Getting and inserting data for Jalopnik
<class 'pandas.core.frame.DataFrame'>
Index: 51 entries, 1 to 51
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Article_title  51 non-null     object
 1   Article_URL    51 non-null     object
 2   Feedname       51 non-null     object
 3   Fetch_Date     51 non-null     object
dtypes: object(4)
memory usage: 2.0+ KB


  k = self.parse_starttag(i)


In [41]:
query = "select Article_title, Article_URL, feedname, Fetch_date from FEEDS where feedname = " + "'Jalopnik'"
existing_feeds_df = pd.read_sql(query,get_connection())
existing_feeds_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75 entries, 0 to 74
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Article_title  75 non-null     object
 1   Article_URL    75 non-null     object
 2   Feedname       75 non-null     object
 3   Fetch_Date     75 non-null     object
dtypes: object(4)
memory usage: 2.5+ KB


In [42]:
newDf = new_feeds_df.set_index('Article_URL').drop(existing_feeds_df['Article_URL'], errors='ignore').reset_index(drop=False)
newDf

Unnamed: 0,Article_URL,Article_title,Feedname,Fetch_Date


In [281]:
df3[df3.duplicated(subset=['Article_URL'])]

Unnamed: 0,Article_title,Article_URL,Feedname,Fetch_Date,Duplicate,Summary


In [233]:
df = pd.read_sql("Select * from FEEDS where Summary is null", get_connection())
df

Unnamed: 0,Feedname,Article_title,Article_URL,Duplicate,Fetch_Date,Summary
0,Engadget,Alienware m16 R2 review: When less power makes...,https://www.engadget.com/alienware-m16-r2-revi...,,2024-05-09 11:58:49.748816,
1,Engadget,Netflix and Roblox team up for a digital theme...,https://www.engadget.com/netflix-and-roblox-te...,,2024-05-09 11:58:49.748816,
2,Engadget,Nintendo is done paying Elon Musk for X integr...,https://www.engadget.com/nintendo-is-done-payi...,,2024-05-09 11:58:49.748816,
3,Engadget,Ember's Travel Mug 2+ with Find My support dro...,https://www.engadget.com/embers-travel-mug-2-w...,,2024-05-09 11:58:49.748816,
4,Engadget,Dungeons and Dragons is coming to Dead by Dayl...,https://www.engadget.com/dungeons-and-dragons-...,,2024-05-09 11:58:49.748816,
...,...,...,...,...,...,...
148,Jalopnik,Who Is The Greatest Racing Driver Of All Time?,https://jalopnik.com/who-is-the-greatest-racin...,,2024-05-09 11:58:50.932554,
149,Jalopnik,"Lighter, More Powerful 2025 BMW M4 CS Is The K...",https://jalopnik.com/lighter-more-powerful-202...,,2024-05-09 11:58:50.932554,
150,Jalopnik,More Americans Watched The Miami Grand Prix Th...,https://jalopnik.com/more-americans-watched-th...,,2024-05-09 11:58:50.932554,
151,Jalopnik,You Should At Least Bid On This Special McLare...,https://jalopnik.com/you-should-at-least-bid-o...,,2024-05-09 11:58:50.932554,


In [213]:
df.head()

Unnamed: 0,Feedname,Article_title,Article_URL,Duplicate,Fetch_Date,Summary
0,Techcrunch,Google DeepMind debuts huge AlphaFold update a...,https://techcrunch.com/2024/05/08/google-deepm...,,2024-05-08 20:24:46.061625,
1,Engadget,Alienware m16 R2 review: When less power makes...,https://www.engadget.com/alienware-m16-r2-revi...,,2024-05-09 11:20:34.562828,
2,Engadget,Netflix and Roblox team up for a digital theme...,https://www.engadget.com/netflix-and-roblox-te...,,2024-05-09 11:20:34.562828,
3,Engadget,Nintendo is done paying Elon Musk for X integr...,https://www.engadget.com/nintendo-is-done-payi...,,2024-05-09 11:20:34.562828,
4,Engadget,Ember's Travel Mug 2+ with Find My support dro...,https://www.engadget.com/embers-travel-mug-2-w...,,2024-05-09 11:20:34.562828,


In [215]:
df1 = df[df['Summary'].notnull()]

insert_to_FEEDS_with_summary(df1.values)

In [232]:
temp = pd.read_sql("Select * from FEEDS where Summary is null", get_connection(),)
temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 153 entries, 0 to 152
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Feedname       153 non-null    object
 1   Article_title  153 non-null    object
 2   Article_URL    153 non-null    object
 3   Duplicate      0 non-null      object
 4   Fetch_Date     153 non-null    object
 5   Summary        0 non-null      object
dtypes: object(6)
memory usage: 7.3+ KB


In [238]:
df2 = pd.read_sql("Select * from FEEDS ", get_connection())
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 239 entries, 0 to 238
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Feedname       239 non-null    object
 1   Article_title  239 non-null    object
 2   Article_URL    239 non-null    object
 3   Duplicate      1 non-null      object
 4   Fetch_Date     239 non-null    object
 5   Summary        86 non-null     object
dtypes: object(6)
memory usage: 11.3+ KB


In [245]:
df = pd.read_csv('feeds.csv')
df.head()

Unnamed: 0,Feedname,Article_title,Article_URL,Duplicate,Fetch_Date,Summary
0,Engadget,OpenAI says it can detect images made by its o...,https://www.engadget.com/openai-says-it-can-de...,,2024-05-08 20:24:45.726655,OpenAI has developed a tool to detect images c...
1,Engadget,The M4 iPad Pro is literally lighter than Air,https://www.engadget.com/the-m4-ipad-pro-is-li...,,2024-05-08 20:24:45.726655,The M4 iPad Pro is the latest release from App...
2,Engadget,Everything announced at Apple's Let Loose iPad...,https://www.engadget.com/everything-announced-...,,2024-05-08 20:24:45.726655,Advertisement Advertisement Advertisement A...
3,Engadget,What the heck is going on with Helldivers 2?,https://www.engadget.com/what-the-heck-is-goin...,,2024-05-08 20:24:45.726655,Advertisement Advertisement Advertisement A...
4,Engadget,TikTok is suing the US government to stop its ...,https://www.engadget.com/tiktok-is-suing-the-u...,,2024-05-08 20:24:45.726655,TikTok is taking legal action against the US g...


In [246]:
insert_to_FEEDS_with_summary(df.values)

In [253]:
df = pd.read_sql("Select Feedname, Article_URL,Summary from FEEDS where Summary is not null",get_connection())
df.info()

insert_to_Summary(df[1:].values)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86 entries, 0 to 85
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Feedname     86 non-null     object
 1   Article_URL  86 non-null     object
 2   Summary      86 non-null     object
dtypes: object(3)
memory usage: 2.1+ KB


In [254]:
pd.read_sql("Select Feedname, Article_URL,Summary from SUMMARY",get_connection())

Unnamed: 0,Feedname,Article_URL,Summary
0,Engadget,https://www.engadget.com/openai-says-it-can-de...,OpenAI has developed a tool to detect images c...
1,Engadget,https://www.engadget.com/the-m4-ipad-pro-is-li...,The M4 iPad Pro is the latest release from App...
2,Engadget,https://www.engadget.com/everything-announced-...,Advertisement Advertisement Advertisement A...
3,Engadget,https://www.engadget.com/what-the-heck-is-goin...,Advertisement Advertisement Advertisement A...
4,Engadget,https://www.engadget.com/tiktok-is-suing-the-u...,TikTok is taking legal action against the US g...
...,...,...,...
80,Jalopnik,https://jalopnik.com/the-best-hybrid-suvs-for-...,"The Best Hybrid SUVs For Less Than $45,000 Acc..."
81,Jalopnik,https://jalopnik.com/the-faa-is-investigating-...,The Federal Aviation Administration is investi...
82,Techcrunch,https://techcrunch.com/2024/05/08/google-deepm...,Google DeepMind has released a new version of ...
83,The Verge,https://www.theverge.com/2024/5/9/24152675/son...,Sonos recently released a controversial redesi...
