Using Word2vec

In [None]:
!pip install gensim spacy requests




In [None]:
import gensim.downloader as api

wv = api.load('word2vec-google-news-300')


In [None]:
import spacy
import requests
from bs4 import BeautifulSoup
import gensim.downloader as api

# Load spaCy and Word2Vec models
nlp = spacy.load('en_core_web_sm')

# Constants
API_KEY = '<ur-api-key>'
BASE_URL = 'https://api.thenewsapi.com/v1/news/top'
COMPANIES = ["Apple Inc.", "LinkedIn", "Tesla"]
ALERTS = {"CXO News", "Cybersecurity", "AI", "Finance", "Tech"}

# Define keywords for each alert type
ALERT_KEYWORDS = {
    "CXO News": ["CEO", "executive", "chief", "president"],
    "Cybersecurity": ["cybersecurity", "hacking", "malware", "phishing", "security"],
    "AI": ["AI", "artificial intelligence", "machine learning", "deep learning", "neural networks"],
    "Finance": ["finance", "investment", "stock", "trading", "economic", "market"],
    "Tech": ["technology", "tech", "innovation", "startup", "software", "hardware"]
}

# Define keywords for business and consumer classifications
CLASSIFICATION_KEYWORDS = {
    "Business": ["corporate", "industry", "enterprise", "business", "stock", "market", "investment", "economy"],
    "Consumer": ["customer", "consumer", "product", "service", "retail", "shopping", "brand", "lifestyle"]
}

def fetch_news(company):
    params = {
        'api_token': API_KEY,
        'search': company,
        'language': 'en',
        'limit': 20,
        'categories': 'general'
    }
    response = requests.get(BASE_URL, params=params)
    return response.json().get('data', []) if response.status_code == 200 else []

def fetch_article_content(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            paragraphs = soup.find_all('p')
            content = ' '.join([p.get_text() for p in paragraphs])
            return content
    except Exception as e:
        print(f"Error fetching article content: {e}")
    return ''

def determine_tag(description, api_categories):
    doc = nlp(description)
    words = [token.text.lower() for token in doc if token.is_alpha]
    tag_scores = {tag: 0 for tag in ALERTS}
    classification_scores = {"Business": 0, "Consumer": 0}

    # Check API categories for any direct matches with ALERTS
    for category in api_categories:
        if category in ALERTS:
            tag_scores[category] += 1  # Basic increment for direct category match

    # Evaluate content-based similarity for alert tags
    for tag, keywords in ALERT_KEYWORDS.items():
        similarities = [wv.similarity(word, keyword) for word in words for keyword in keywords if word in wv and keyword in wv]
        if similarities:
            average_similarity = sum(similarities) / len(similarities)
            tag_scores[tag] += average_similarity

    # Evaluate content-based similarity for business/consumer classification
    for classification, keywords in CLASSIFICATION_KEYWORDS.items():
        similarities = [wv.similarity(word, keyword) for word in words for keyword in keywords if word in wv and keyword in wv]
        if similarities:
            average_similarity = sum(similarities) / len(similarities)
            classification_scores[classification] += average_similarity

    # Determine the best tag based on the highest score
    best_tag = max(tag_scores, key=tag_scores.get, default=None)
    best_classification = max(classification_scores, key=classification_scores.get, default=None)

    tag_result = best_tag if tag_scores.get(best_tag, 0) > 0 else None
    classification_result = best_classification if classification_scores.get(best_classification, 0) > 0 else None

    return tag_result, classification_result

def process_news():
    for company in COMPANIES:
        print(f"\nProcessing news for {company}...")
        news_articles = fetch_news(company)
        for article in news_articles:
            description = article.get('description')
            if not description:
                description = fetch_article_content(article['url'])
            if description:
                tag, classification = determine_tag(description, article.get('categories', []))
                print(f"Title: {article['title']}")
                print(f"Tag: {tag if tag else 'No relevant tag found'}")
                print(f"Classification: {classification if classification else 'No classification found'}")
                print(f"URL: {article['url']}")
            else:
                print(f"Title: {article['title']}")
                print("Tag: No relevant tag found")
                print("Classification: No classification found")
                print(f"URL: {article['url']}")

if __name__ == "__main__":
    process_news()



Processing news for Apple Inc....
Title: Swiss court upholds Apple Inc. trademark appeal
Tag: Tech
Classification: Consumer
URL: https://www.swissinfo.ch/eng/business/swiss-court-upholds-apple-inc--trademark-appeal/48726930
Title: Chinese National Sentenced to Prison for Defrauding Apple Inc.
Tag: Finance
Classification: Business
URL: https://defence.pk/pdf/threads/chinese-national-sentenced-to-prison-for-defrauding-apple-inc.736623/
Title: Alphabet Inc., Amazon.com Inc., Apple Inc. and more among the key companies in the market- Technavio - Bubblear News
Tag: Finance
Classification: Business
URL: https://bubblear.com/podcast-market-in-europe-to-grow-by-usd-1-12-billion-from-2022-to-2027-alphabet-inc-amazon-com-inc-apple-inc-and-more-among-the-key-companies-in-the-market-technavio/4325/

Processing news for LinkedIn...
Title: linkedin: 3 photos that tell how scam on LinkedIn works
Tag: Cybersecurity
Classification: Consumer
URL: https://www.gadgetsnow.com/social/3-photos-that-tell-how

Now Using *LLM*

In [None]:
!pip install -qqq openai==0.28

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━[0m [32m61.4/76.5 kB[0m [31m747.2 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m741.8 kB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import requests
import openai
from spacy.lang.en import English

# Load the spaCy model for basic text processing
nlp = English()

# Constants
API_KEY_GPT = '<ur-api-key>'
API_KEY_NEWS = '<ur-api-key>'
BASE_URL = 'https://api.thenewsapi.com/v1/news/top'
COMPANIES = ["Apple Inc.", "LinkedIn", "Tesla"]

# Function to fetch news articles
def fetch_news(company):
    params = {
        'api_token': API_KEY_NEWS,
        'search': company,
        'language': 'en',
        'limit': 5,
        'categories': []
    }
    response = requests.get(BASE_URL, params=params)
    return response.json().get('data', []) if response.status_code == 200 else []

# Use GPT-3.5 to tag and categorize news articles
def tag_and_categorize_article(description, url):
    if not description:  # Check if description is empty
        description = f"Please check the URL for content: {url}"

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a highly trained AI capable of understanding complex topics and providing insights."},
            {"role": "user", "content": f"Please tag the following news article description with one of these categories and only give one tag: CXO News, Cybersecurity, AI, Finance, Tech.\nDescription: {description}"}
        ],
        api_key=API_KEY_GPT
    )
    tag = response.choices[0].message['content'].strip()
    tag = tag.replace('Category: ', '').replace('This news article falls under the category of ', '').replace('This news article description would be best tagged as "', '').replace('"', '')

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a highly trained AI capable of understanding complex topics and providing insights."},
            {"role": "user", "content": f"Please categorize the following news article description as either 'Business' or 'Consumer'in one one word.\nDescription: {description}"}
        ],
        api_key=API_KEY_GPT
    )
    category = response.choices[0].message['content'].strip()
    category = category.replace('Category: ', '').replace('This news article falls under the category of ', '').replace('This news article description would be best categorized as "', '').replace('"', '')

    return tag, category

# Main function to process and print news data
def process_news():
    for company in COMPANIES:
        print(f"\nProcessing news for {company}...")
        news_articles = fetch_news(company)
        for article in news_articles:
            tag, category = tag_and_categorize_article(article.get('description', ''), article['url'])
            print(f"Title: {article['title']}")
            print(f"Tag: {tag if tag else 'No relevant tag found'}")
            print(f"Category: {category if category else 'No relevant category found'}")
            print(f"URL: {article['url']}")

if __name__ == "__main__":
    process_news()



Processing news for Apple Inc....
Title: Swiss court upholds Apple Inc. trademark appeal
Tag: Finance
Category: Business
URL: https://www.swissinfo.ch/eng/business/swiss-court-upholds-apple-inc--trademark-appeal/48726930
Title: Chinese National Sentenced to Prison for Defrauding Apple Inc.
Tag: Cybersecurity
Category: Business
URL: https://defence.pk/pdf/threads/chinese-national-sentenced-to-prison-for-defrauding-apple-inc.736623/
Title: Alphabet Inc., Amazon.com Inc., Apple Inc. and more among the key companies in the market- Technavio - Bubblear News
Tag: Finance
Category: Business
URL: https://bubblear.com/podcast-market-in-europe-to-grow-by-usd-1-12-billion-from-2022-to-2027-alphabet-inc-amazon-com-inc-apple-inc-and-more-among-the-key-companies-in-the-market-technavio/4325/

Processing news for LinkedIn...
Title: linkedin: 3 photos that tell how scam on LinkedIn works
Tag: Cybersecurity
Category: Consumer
URL: https://www.gadgetsnow.com/social/3-photos-that-tell-how-scam-on-linked