In [1]:
!pip install nltk
!pip install six
!pip install lxml
!pip install beautifulsoup4
!pip install requests
!pip install scrapy



In [1]:
import os 
import requests
import json
import urllib.request
from dotenv import load_dotenv
from newspaper import Article
import nltk
import requests
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from chromadb import Client
from chromadb.utils import embedding_functions
from langchain_ollama import OllamaLLM
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.chains import RetrievalQA

In [2]:
def splitting():
    loader = TextLoader("./current.txt")
    doc=loader.load()
    
    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=150,
    separators=['\n\n']
    )
    d_chunk = text_splitter.split_documents(doc)
    
    embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs={"device": "mps"}
    )
    
    vectorstore = Chroma.from_documents(
        documents=d_chunk,
        embedding=embeddings,
        persist_directory="./vector_db" 
    )
    global retriever
    retriever = vectorstore.as_retriever()

In [3]:
#load API key from .env file
load_dotenv()
GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")

# https://docs.python.org/3/library/json.html
# This library will be used to parse the JSON data returned by the API.

# https://docs.python.org/3/library/urllib.request.html#module-urllib.request
# This library will be used to fetch the API.

apikey = GNEWS_API_KEY
category = "technology"
url = f"https://gnews.io/api/v4/top-headlines?category={category}&lang=en&country=us&max=30&apikey={apikey}"

os.remove("current.txt")
with urllib.request.urlopen(url) as response:
    data = json.loads(response.read().decode("utf-8"))
    articles = data["articles"]

    for i,article in enumerate(articles):
        description = article.get("description")
        url = article.get("url")

        print(description)
        try:    
            article = Article(url)
            article.download()
            article.parse()
            article.nlp()

            print("Title:", article.title)
            print("\nPublish Date:", article.publish_date)
            print("\nSummary:\n", article.summary)
            print("\n\n")
            with open ("current.txt","a") as f:
                f.write("Title: "+article.title+'\n'+"Summary: "+article.summary+'\n'+"URL: "+url+'\n')
        except:
            continue

Nexusmods has been sold to an unknown party after 24 years.
Title: End of an era: Nexus Mods has change in ownership after 24 years following "stress-related health issues"

Publish Date: 2025-06-17 12:18:05.049744+00:00

Summary:
 Nexus Mods has new ownership after 24 years under the stewardship of website founder Dark0ne.
Nexus Mods stands as the largest hub for video game mods for PC users, with a library of 716,500 mods across 3,768 games.
"So, I firmly believe that the best thing for the future of Nexus Mods is for me to step aside and bring in new leadership to steer the business forward with renewed energy to make Nexus Mods the modding community we all truly deserve."
The blog only links to their Nexus Mods accounts page, rather than any information on previous business experience.
As an interesting aside, that fourth name on Chosen's Linkedin page is especially interesting.



From a fresh-looking Marvel fighting game to a horror classic reborn and a brand new Resident Evil, t

In [4]:
splitting()

In [5]:
system_messages = """
You are a helpful assistant. Your job is to extract insights from news articles and present them to tech savvy users cleanly without duplicates.

Format:
- Present each article as: 
"Title: [title]" followed by 
"Summary: [summary]" and "URL: [url]"
- Each article should be separated by a blank line
- Number each article (1., 2., 3., ...)

Rules:
- Summarize in maximum 7 sentences
- Remove filler content like advertisements, subscription prompts, social media references
- If multiple articles have the same URL, combine their content into one
- Do not duplicate URLs - each URL should appear only once
- Focus only on the provided content
- Do not make up information
- Do not hallucinate

Output:
1. Article Title
   Summary: [Clean summary here]
   URL: [URL]

2. Article Title
   Summary: [Clean summary here]  
   URL: [URL]
"""
prompt = ChatPromptTemplate.from_messages([
    ("system", system_messages),
    ("user", "{context}\nUser: {question}")
])
ollama_llm = OllamaLLM(
    model="qwen3:4b",
    base_url="http://127.0.0.1:11434",
    temperature=0.5
)
qa_chain = RetrievalQA.from_chain_type(
    llm=ollama_llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={
        "prompt":prompt,
        "document_variable_name": "context"
    },
    return_source_documents=False
)
import re 
def clean_output(text):
    # Remove content between <think> and </think> tags
    text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
    text = re.sub(r'Here is a summary of all the articles presented in the files, including their URLs:\s*', '', text)
    return text.strip()

def ask(input_s: str) -> str:
    user_input = input_s + "/no_think. REMEMBER: Do not repeat any URL twice. Each URL should appear exactly once."
    result = qa_chain.invoke({"query": user_input})
    non_thinking_output = clean_output(result[qa_chain.output_key])
    return non_thinking_output

with open ("send.txt","w") as f:
            f.write(ask("Summarize all articles presented in the files and include the URL"))

In [6]:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

def send_via_gmail():
    # Email configuration
    sender_email = os.getenv("sender_email")
    sender_password = os.getenv("sender_password") #app password
    recipient_email = os.getenv("recipient_email")
    
    # Create message
    msg = MIMEMultipart()
    msg['From'] = sender_email
    msg['To'] = recipient_email
    msg['Subject'] = 'Tech newspaper summary'

    f = open("send.txt")
    body = f.read()
    msg.attach(MIMEText(body, 'plain'))
    
    try:
        # Connect to Gmail's SMTP server
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()  # Enable encryption
        server.login(sender_email, sender_password)
        
        # Send email
        text = msg.as_string()
        server.sendmail(sender_email, recipient_email, text)
        server.quit()
        
        print("✅ Email sent successfully via Gmail!")
        
    except Exception as e:
        print(f"❌ Error: {e}")

send_via_gmail()

✅ Email sent successfully via Gmail!
