## Purpose:
This code uses llama or DeepSeek-both are opensource model, to summarize the latest crypto news from 10 best crypto websites and emails the result to my mailbox.
Ollama is downloaded and installed from ollama.com
The Ollama server should runs locally on http://localhost:11434/ . To run it locally on the server user **ollama serve**
Ollama package was use instead of the local server. It does basically thesame thing as use using the server

In [None]:
# !pip install selenium
# !pip install undetected-chromedriver

In [1]:
# imports

import os
import requests
from dotenv import load_dotenv
from IPython.display import Markdown, display
import ollama

# import web scrappers libraries
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time

### Define a website scrapper class to collect data from any website

In [2]:
class WebsiteCrawler:
    def __init__(self, url, wait_time=20, chrome_binary_path=None):
        """
        Initialize the WebsiteCrawler using Selenium to scrape JavaScript-rendered content.
        """
        self.url = url
        self.wait_time = wait_time

        options = uc.ChromeOptions()
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--disable-blink-features=AutomationControlled")
        options.add_argument("start-maximized")
        options.add_argument(
            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
        )
        if chrome_binary_path:
            options.binary_location = chrome_binary_path

        self.driver = uc.Chrome(options=options)

        try:
            # Load the URL
            self.driver.get(url)

            # Wait for Cloudflare or similar checks
            time.sleep(10)

            # Ensure the main content is loaded
            WebDriverWait(self.driver, self.wait_time).until(
                EC.presence_of_element_located((By.TAG_NAME, "main"))
            )

            # Extract the main content
            main_content = self.driver.find_element(By.CSS_SELECTOR, "main").get_attribute("outerHTML")

            # Parse with BeautifulSoup
            soup = BeautifulSoup(main_content, "html.parser")
            self.title = self.driver.title if self.driver.title else "No title found"
            self.text = soup.get_text(separator="\n", strip=True)

        except Exception as e:
            print(f"Error occurred: {e}")
            self.title = "Error occurred"
            self.text = ""

        finally:
            self.driver.quit()

### Prompt to summarize news from a single website for the model

In [3]:
# See how this function creates exactly the format above

# define a system prompt
system_prompt = "You are a crypto investor that searches website to extract and summarize daily latest \
crypto news that will impact the price of crypto. Ignore text that are not crypto related or that are not current news. \
Respond in markdown."

# define a user prompt
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}" 
    user_prompt += "\nThe contents of this website is as follows; please highlight top 3 breaking news related to cryptocurrencies.\n"
    user_prompt += website.text
    return user_prompt

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [4]:
# set constants used by by ollama

OLLAMA_API = "http://locahost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "deepseek-r1:1.5b" #"llama3.2"

chrome_path = "C:/Program Files/Google/Chrome/Application/chrome.exe"

# to check if model is loaded
#!ollama pull llama3.2
!ollama pull deepseek-r1:1.5b

[?25l[?25h[?25lpulling manifest â ‹ [?25h[?25l[2K[1Gpulling manifest â ™ [?25h[?25l[2K[1Gpulling manifest â ¹ [?25h[?25l[2K[1Gpulling manifest â ¸ [?25h[?25l[2K[1Gpulling manifest â ¼ [?25h[?25l[2K[1Gpulling manifest â ´ [?25h[?25l[2K[1Gpulling manifest â ¦ [?25h[?25l[2K[1Gpulling manifest â § [?25h[?25l[2K[1Gpulling manifest â ‡ [?25h[?25l[2K[1Gpulling manifest â � [?25h[?25l[2K[1Gpulling manifest 
pulling aabd4debf0c8... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 1.1 GB                         
pulling 369ca498f347... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  387 B                         
pulling 6e4c38e1172f... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 1.1 KB                         
pulling f4d24e9138dd... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  148 B                         
pulling a85fe2a2e58e... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–�  487 B

### Get latest crypto news from individual website

In [5]:
def new_summary(url, chrome_path):
    web = WebsiteCrawler(url, 30, chrome_path)
    message = messages_for(web)
    try:
        #response = requests.post(OLLAMA_API, json={"model":MODEL,"messages":message,"stream":False}, headers=HEADERS)
        response = ollama.chat(model=MODEL, messages=message)
    except:
        print("Unable to use llama")
    print(response)
    #web_summary = response.json()['message']['content']
    web_summary = response['message']['content']
    
    return web_summary

In [6]:
summary = new_summary("https://www.coindesk.com", chrome_path)

model='deepseek-r1:1.5b' created_at='2025-02-03T05:16:58.4669196Z' done=True done_reason='stop' total_duration=275583084900 load_duration=5999276100 prompt_eval_count=2048 prompt_eval_duration=125211000000 eval_count=1219 eval_duration=143060000000 message=Message(role='assistant', content="<think>\nOkay, so I'm trying to figure out how the mainnet of Tether is different from their smart contract version called Layer-2. I know that Tether has both a mainnet and a Layer-2 network, but I'm not exactly sure what each one does. Let me start by recalling some basic crypto knowledge.\n\nTether is a stablecoin that's pegged to the US dollar. It can be either on its mainnet or as a smart contract on Layer-2. The mainnet allows holders to trade Tether directly with other cryptocurrencies, while Layer-2 transactions are done via smart contracts without involving actual wallets. I remember hearing that Layer-2 is faster and more transparent.\n\nI think in the mainnet, users can perform standard t

In [7]:
summary

"<think>\nOkay, so I'm trying to figure out how the mainnet of Tether is different from their smart contract version called Layer-2. I know that Tether has both a mainnet and a Layer-2 network, but I'm not exactly sure what each one does. Let me start by recalling some basic crypto knowledge.\n\nTether is a stablecoin that's pegged to the US dollar. It can be either on its mainnet or as a smart contract on Layer-2. The mainnet allows holders to trade Tether directly with other cryptocurrencies, while Layer-2 transactions are done via smart contracts without involving actual wallets. I remember hearing that Layer-2 is faster and more transparent.\n\nI think in the mainnet, users can perform standard transactions like buying or selling Tether, which probably allows them to engage in direct cryptocurrency trading. On the other hand, Layer-2 only handles smart contract interactions. Maybe the mainnet has less transparency because of the complexity of smart contracts? But I also recall that

### Get Top Crypto news from 10 crypto website

In [None]:
website_list = [
    "https://www.coindesk.com",
    "https://www.cointelegraph.com/",
    "https://www.u.today/",
    "https://www.decrypt.co/",
    "https://www.cryptotimes.io",
    "https://www.coinbureau.com",
    "https://www.blockworks.co",
    "https://www.bloomberg.com/crypto",
    "https://www.news.bitcoin.com/",
    "https://www.cryptonews.com/"
    ] 

news_stack = {}
for website in website_list:
    # try:
    summary = new_summary(website, chrome_path)
    # except:
    # print(f"Unable to access {website}")
    news_stack[website] = summary

In [None]:
news_stack

### Prompt to summarize the result of multiple website's news highlights

In [None]:
# define a system prompt
system_prompt = "You are a crypto news analyst named Amy, that reports crypto news. \
Prepare the news in a format that can be emailed to crypto Enthusiast. \
Add the source to each news"

# define a user prompt
def user_prompt(all_news):
    user_prompt = f"You are looking at the content of a Python dictionary where the key is the source website and \
    the values are the top 3 latest news from that website." 
    user_prompt += "\nThe contents of this dictionary are as follows; Please combine and summarize \
    all the news avoiding duplicating the information. Highlight the top 12 latest news that \
    have a high impact on cryptocurrencies price. \
    When a news appears in multiple sources, identify all the website sources it appeared in.\n"
    for source, news in all_news.items():
        user_prompt += f"source={source}, news = {news}"
    return user_prompt

def messages(all_news):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt(all_news)}
    ]

### Summarize all highlighted news from all the websites

In [None]:
def final_summary(news):
    web = WebsiteCrawler(url, 30, chrome_path)
    message = messages_for(web)
    try:
        #response = requests.post(OLLAMA_API, json={"model":MODEL,"messages":message,"stream":False}, headers=HEADERS)
        response = ollama.chat(model=MODEL, messages=message)
    except:
        print("Unable to use llama")
    print(response)
    #web_summary = response.json()['message']['content']
    web_summary = response['message']['content']
    
    return web_summary

In [None]:
# call the final_summary() into a variable 'email' which contains the summarized news
email = final_summary(news_stack)

### Email latest news

In [None]:
# this is what the email looks like in string format
email

In [None]:
from email.message import EmailMessage
import ssl
import smtplib

In [None]:
# identify the email sender and receiver
sender = "chiamy694@gmail.com"
password = os.environ.get('EMAIL_PASSWORD')
receiver = "priscacare20@gmail.com"

In [None]:
# extract the subject of the email
subject = email.split('\n')[0][9:]

In [None]:
# extract the body of the email
body = '\n'.join(email.split('\n')[1:])

In [None]:
Markdown(body)

In [None]:
# sending the email using emailmessage package
email_message = EmailMessage()
email_message["From"] = sender
email_message["To"] = receiver
email_message["Subject"] = subject
email_message.set_content(body)

# to add a layer of security
context = ssl.create_default_context()
#send email with smtp
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as smtp:
    smtp.login(sender, password)
    smtp.sendmail(sender, receiver, email_message.as_string())