In [32]:
import os
import json
import requests
import pandas as pd
from datetime import datetime, timedelta
from typing import List, Dict, Optional

from dotenv import load_dotenv
load_dotenv(override=True)

True

In [33]:
class NewsAPIClient:
    """Base class for news API clients"""
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.session = requests.Session()

class BingNewsAPI(NewsAPIClient):
    """Client for Bing News Search API"""
    def __init__(self, api_key: str):
        super().__init__(api_key)
        self.base_url = "https://api.bing.microsoft.com/v7.0/news/search"
        self.session.headers.update({
            'Ocp-Apim-Subscription-Key': self.api_key
        })

    def search_news(self, query: str, count: int = 10) -> List[Dict]:
        """
        Search news articles using Bing News API
        
        Args:
            query: Search term
            count: Number of results to return (max 100)
            
        Returns:
            List of news articles
        """
        params = {
            'q': query,
            'count': min(count, 100),
            'freshness': 'Day'
        }
        
        response = self.session.get(self.base_url, params=params)
        response.raise_for_status()
        
        results = response.json()
        return results.get('value', [])

class RedditAPI(NewsAPIClient):
    """Client for Reddit API"""
    def __init__(self, client_id: str, client_secret: str):
        self.client_id = client_id
        self.client_secret = client_secret
        self.base_url = "https://oauth.reddit.com"
        self.session = requests.Session()
        self._get_access_token()

    def _get_access_token(self):
        """Authenticate with Reddit and get access token"""
        auth = requests.auth.HTTPBasicAuth(self.client_id, self.client_secret)
        data = {
            'grant_type': 'client_credentials'
        }
        headers = {
            'User-Agent': 'NewsAggregator/1.0'
        }
        
        response = requests.post(
            'https://www.reddit.com/api/v1/access_token',
            auth=auth,
            data=data,
            headers=headers
        )
        response.raise_for_status()
        
        self.session.headers.update({
            'Authorization': f"Bearer {response.json()['access_token']}",
            'User-Agent': 'NewsAggregator/1.0'
        })

    def get_subreddit_top(self, subreddit: str, limit: int = 10) -> List[Dict]:
        """
        Get top posts from a subreddit
        
        Args:
            subreddit: Subreddit name without 'r/'
            limit: Number of posts to return
            
        Returns:
            List of posts
        """
        url = f"{self.base_url}/r/{subreddit}/top"
        params = {
            'limit': limit,
            't': 'day'  # Time filter: hour, day, week, month, year, all
        }
        
        response = self.session.get(url, params=params)
        response.raise_for_status()
        
        posts = response.json()['data']['children']
        return [post['data'] for post in posts]

class NewsAggregator:
    """Aggregate news from multiple sources"""
    def __init__(self, 
                 bing_api_key: Optional[str] = None,
                 reddit_client_id: Optional[str] = None,
                 reddit_client_secret: Optional[str] = None):
        self.clients = {}
        
        if bing_api_key:
            self.clients['bing'] = BingNewsAPI(bing_api_key)
        if reddit_client_id and reddit_client_secret:
            self.clients['reddit'] = RedditAPI(reddit_client_id, reddit_client_secret)

    def get_news(self, query: str, sources: List[str] = None) -> Dict[str, List[Dict]]:
        """
        Get news from all configured sources
        
        Args:
            query: Search term
            sources: List of sources to query (defaults to all configured sources)
            
        Returns:
            Dictionary mapping source names to lists of articles
        """
        if sources is None:
            sources = self.clients.keys()
            
        results = {}
        
        for source in sources:
            if source not in self.clients:
                continue
                
            try:
                if source == 'bing':
                    results[source] = self.clients[source].search_news(query)
                elif source == 'reddit':
                    # For Reddit, use the query as a subreddit name
                    results[source] = self.clients[source].get_subreddit_top(query)
            except Exception as e:
                print(f"Error fetching from {source}: {str(e)}")
                results[source] = []
                
        return results

def main():
    # Load API keys from environment variables
    bing_api_key = os.getenv('BING_API_KEY')
    reddit_client_id = os.getenv('REDDIT_CLIENT_ID')
    reddit_client_secret = os.getenv('REDDIT_CLIENT_SECRET')
    
    # Initialize aggregator
    aggregator = NewsAggregator(
        bing_api_key=bing_api_key,
        reddit_client_id=reddit_client_id,
        reddit_client_secret=reddit_client_secret
    )
    
    # Example usage
    results = aggregator.get_news(
        query="artificial intelligence",
        sources=['bing', 'reddit']
    )
    
    # Print results
    for source, articles in results.items():
        print(f"\n=== {source.upper()} NEWS ===")
        for article in articles[:3]:  # Print first 3 articles
            if source == 'bing':
                print(f"Title: {article.get('name')}")
                print(f"Description: {article.get('description')}")
                print(f"URL: {article.get('url')}\n")
            elif source == 'reddit':
                print(f"Title: {article.get('title')}")
                print(f"Score: {article.get('score')}")
                print(f"URL: https://reddit.com{article.get('permalink')}\n")

In [34]:
import praw
from datetime import datetime
import pandas as pd

def get_trending_topics():
    # Initialize Reddit instance
    # You'll need to replace these with your own credentials from https://www.reddit.com/prefs/apps
    reddit = praw.Reddit(
        client_id=os.getenv('REDDIT_CLIENT_ID'),
        client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
        user_agent="trending_topics_script_v1.0"
    )
    
    # Get trending posts from r/all
    trending_posts = []
    
    # Get top posts from the past 24 hours
    for submission in reddit.subreddit('all').hot(limit=50):
        post_data = {
            'title': submission.title,
            'subreddit': submission.subreddit.display_name,
            'score': submission.score,
            'comments': submission.num_comments,
            'url': f'https://reddit.com{submission.permalink}',
            'created_utc': datetime.fromtimestamp(submission.created_utc).strftime('%Y-%m-%d %H:%M:%S'),
            'upvote_ratio': submission.upvote_ratio
        }
        trending_posts.append(post_data)
    
    # Convert to DataFrame for easier analysis
    df = pd.DataFrame(trending_posts)
    
    # Sort by score to get the most popular posts
    df = df.sort_values('score', ascending=False)
    
    return df

def analyze_trends(df):
    # Get most active subreddits
    subreddit_counts = df['subreddit'].value_counts().head(10)
    
    print("\nMost Active Subreddits:")
    for subreddit, count in subreddit_counts.items():
        print(f"{subreddit}: {count} posts")
    
    print("\nTop 10 Trending Posts:")
    for _, post in df.head(10).iterrows():
        print(f"\nTitle: {post['title']}")
        print(f"Subreddit: r/{post['subreddit']}")
        print(f"Score: {post['score']:,}")
        print(f"Comments: {post['comments']:,}")
        print(f"URL: {post['url']}")

if __name__ == "__main__":
    # Get trending topics
    trending_df = get_trending_topics()
    
    # Analyze and display trends
    analyze_trends(trending_df)
    
    # Optionally save to CSV
    trending_df.to_csv('trending_topics.csv', index=False)


Most Active Subreddits:
BrandNewSentence: 1 posts
memes: 1 posts
meirl: 1 posts
shittymoviedetails: 1 posts
WorkReform: 1 posts
nextfuckinglevel: 1 posts
mildlyinfuriating: 1 posts
Futurology: 1 posts
clevercomebacks: 1 posts
MurderedByWords: 1 posts

Top 10 Trending Posts:

Title: Imagine…
Subreddit: r/BrandNewSentence
Score: 55,555
Comments: 741
URL: https://reddit.com/r/BrandNewSentence/comments/1h88k64/imagine/

Title: It gets dumber 
Subreddit: r/memes
Score: 42,457
Comments: 666
URL: https://reddit.com/r/memes/comments/1h86ihq/it_gets_dumber/

Title: meirl
Subreddit: r/meirl
Score: 42,452
Comments: 88
URL: https://reddit.com/r/meirl/comments/1h884xw/meirl/

Title: The Austin Powers series is a parody of early James Bond movies, this is emphasised by the fact Austin respects a women’s consent 
Subreddit: r/shittymoviedetails
Score: 34,634
Comments: 541
URL: https://reddit.com/r/shittymoviedetails/comments/1h8a25g/the_austin_powers_series_is_a_parody_of_early/

Title: Rep. Dean Ph

In [35]:
bing_news_api = BingNewsAPI(os.getenv('BING_API_KEY'))

In [36]:
bing_data = bing_news_api.search_news('AI trends')
bing_data_df = pd.DataFrame(bing_data)

KeyboardInterrupt: 

In [None]:
bing_data_df

In [None]:
bing_data_df.iloc[0].url

In [None]:
import ssl
import time
import logging
import asyncio
import platform
import nest_asyncio
from typing import List, Dict, Set

import certifi
import aiohttp
from bs4 import BeautifulSoup
from aiohttp import ClientTimeout
from fake_useragent import UserAgent
from aiohttp_retry import RetryClient, ExponentialRetry

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

class FastNewsScraper:
    """Asynchronous news content scraper optimized for speed"""
    
    def __init__(self, max_concurrent: int = 50, timeout: int = 10):
        self.max_concurrent = max_concurrent
        self.timeout = ClientTimeout(total=timeout)
        self.ua = UserAgent()
        self.seen_urls: Set[str] = set()
        
        # Configure logging
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)
        
        # Setup SSL context
        self.ssl_context = ssl.create_default_context(cafile=certifi.where())

    def _get_random_headers(self) -> Dict[str, str]:
        """Generate random headers for each request"""
        return {
            'User-Agent': self.ua.random,
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
        }

    async def _extract_content(self, html: str) -> Dict[str, str]:
        """Extract content from HTML using BeautifulSoup with minimal parsing"""
        try:
            soup = BeautifulSoup(html, 'html.parser')
            
            # Quick removal of unnecessary elements
            for tag in soup(['script', 'style', 'nav', 'footer', 'iframe', 'aside']):
                tag.decompose()
            
            # Fast content extraction using common patterns
            content = {
                'title': '',
                'text': '',
                'metadata': {}
            }
            
            # Quick title extraction
            title_tag = (
                soup.find('h1') or 
                soup.find('meta', property='og:title') or
                soup.find('title')
            )
            if title_tag:
                content['title'] = title_tag.get_text() if hasattr(title_tag, 'get_text') else title_tag.get('content', '')
            
            # Quick main content extraction
            article_tag = (
                soup.find('article') or
                soup.find('div', class_=['article-content', 'story-content', 'post-content']) or
                soup.find('div', {'itemprop': 'articleBody'})
            )
            
            if article_tag:
                # Extract text efficiently
                paragraphs = article_tag.find_all('p')
                content['text'] = '\n'.join(p.get_text(strip=True) for p in paragraphs)
            
            return content
            
        except Exception as e:
            self.logger.error(f"Content extraction error: {str(e)}")
            return {'title': '', 'text': '', 'metadata': {}}

    async def _fetch_url(self, session: aiohttp.ClientSession, url: str) -> Dict[str, str]:
        """Fetch and process a single URL"""
        try:
            retry_options = ExponentialRetry(attempts=2)
            retry_client = RetryClient(client_session=session, retry_options=retry_options)
            
            async with retry_client.get(
                url,
                headers=self._get_random_headers(),
                timeout=self.timeout,
                ssl=self.ssl_context
            ) as response:
                if response.status == 200:
                    html = await response.text()
                    content = await self._extract_content(html)
                    content['url'] = url
                    content['status'] = 'success'
                    return content
                else:
                    return {'url': url, 'status': 'error', 'error': f'HTTP {response.status}'}
                    
        except asyncio.TimeoutError:
            return {'url': url, 'status': 'error', 'error': 'timeout'}
        except Exception as e:
            return {'url': url, 'status': 'error', 'error': str(e)}

    async def scrape_urls(self, urls: List[str]) -> List[Dict[str, str]]:
        """
        Scrape multiple URLs concurrently
        
        Args:
            urls: List of URLs to scrape
            
        Returns:
            List of dictionaries containing scraped content
        """
        # Remove duplicates and already seen URLs
        unique_urls = list(set(urls) - self.seen_urls)
        self.seen_urls.update(unique_urls)
        
        if not unique_urls:
            return []
        
        # Create connection pool
        conn = aiohttp.TCPConnector(
            limit=self.max_concurrent,
            ttl_dns_cache=300,
            ssl=self.ssl_context
        )
        
        async with aiohttp.ClientSession(connector=conn) as session:
            tasks = [
                self._fetch_url(session, url)
                for url in unique_urls
            ]
            
            # Use semaphore to limit concurrent requests
            semaphore = asyncio.Semaphore(self.max_concurrent)
            async def bounded_fetch(task):
                async with semaphore:
                    return await task
            
            # Gather results with timeout
            results = await asyncio.gather(
                *(bounded_fetch(task) for task in tasks),
                return_exceptions=True
            )
            
            # Filter out failures and exceptions
            valid_results = [
                result for result in results
                if isinstance(result, dict) and result.get('status') == 'success'
            ]
            
            return valid_results

def scrape_batch(urls: List[str], max_concurrent: int = 50, timeout: int = 10) -> List[Dict[str, str]]:
    """
    Synchronous wrapper for async scraping
    
    Args:
        urls: List of URLs to scrape
        max_concurrent: Maximum number of concurrent requests
        timeout: Timeout in seconds for each request
        
    Returns:
        List of dictionaries containing scraped content
    """
    scraper = FastNewsScraper(max_concurrent=max_concurrent, timeout=timeout)
    
    # Create new event loop
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    
    try:
        results = loop.run_until_complete(scraper.scrape_urls(urls))
    finally:
        # Clean up
        if platform.system() != 'Windows':  # Windows has issues with loop cleanup
            loop.close()
    
    return results

In [None]:
bing_data_df.url.tolist()

In [None]:
try:
    urls = bing_data_df.url.tolist()
    
    start_time = time.time()
    results = scrape_batch(urls)
    end_time = time.time()
    
    print(f"Scraped {len(results)} articles in {end_time - start_time:.2f} seconds")
    
    # Print first article preview
    if results:
        article = results[0]
        print(f"\nExample article:")
        print(f"Title: {article['title'][:100]}")
        print(f"Content preview: {article['text'][:200]}...")
        
except Exception as e:
    print(f"An error occurred: {str(e)}")
    raise

An error occurred: name 'time' is not defined


NameError: name 'time' is not defined

In [None]:
print(bing_data_df.iloc[0].url)
print(bing_data_df.iloc[1].url)

In [None]:
results

In [None]:
print(results[0]['url'])
print(results[0]['text'])

In [None]:
import praw
from datetime import datetime
import pandas as pd

class RedditNewsSearch:
    def __init__(self):
        # Initialize Reddit instance
        self.reddit = praw.Reddit(
            client_id=os.getenv('REDDIT_CLIENT_ID'),
            client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
            user_agent="news_search_script_v1.0"
        )
    
    def search_reddit(self, query, limit=25, sort='relevance', time_filter='month'):
        """
        Search across all of Reddit for specific topics
        
        Parameters:
        - query: Search term
        - limit: Number of results to return
        - sort: One of 'relevance', 'hot', 'top', 'new', 'comments'
        - time_filter: One of 'all', 'day', 'hour', 'month', 'week', 'year'
        """
        search_results = []
        
        # Search across all subreddits
        for submission in self.reddit.subreddit('all').search(
            query,
            sort=sort,
            time_filter=time_filter,
            limit=limit
        ):
            post_data = {
                'title': submission.title,
                'subreddit': submission.subreddit.display_name,
                'score': submission.score,
                'comments': submission.num_comments,
                'url': submission.url,
                'reddit_url': f'https://reddit.com{submission.permalink}',
                'created_utc': datetime.fromtimestamp(submission.created_utc).strftime('%Y-%m-%d %H:%M:%S'),
                'upvote_ratio': submission.upvote_ratio,
                'is_self': submission.is_self  # True if it's a text post
            }
            
            # Get the post text if it's a self post
            if submission.is_self:
                post_data['text'] = submission.selftext
            
            search_results.append(post_data)
        
        return pd.DataFrame(search_results)

    def filter_news_sources(self, df):
        """Filter results to focus on news-related content"""
        news_subreddits = [
            'news', 'worldnews', 'politics', 'technology',
            'science', 'environment', 'business', 'economics',
            'finance', 'health', 'education'
        ]
        
        # Filter for news subreddits or posts containing news URLs
        news_domains = ['reuters.com', 'apnews.com', 'bbc.com', 'nytimes.com', 
                       'theguardian.com', 'bloomberg.com', 'wsj.com']
        
        return df[
            (df['subreddit'].isin(news_subreddits)) |
            (df['url'].str.contains('|'.join(news_domains), case=False, na=False))
        ]

In [None]:
searcher = RedditNewsSearch()

# Get user input
query = 'How AI will change of way to live'
include_only_news = False

# Perform search
results = searcher.search_reddit(
    query=query,
    limit=50,
    sort='relevance',
    time_filter='month'
)

if include_only_news:
    results = searcher.filter_news_sources(results)

# Display results
if len(results) == 0:
    print("No results found.")
    
print(f"\nFound {len(results)} results:")
for _, post in results.iterrows():
    print(f"\nTitle: {post['title']}")
    print(f"Subreddit: r/{post['subreddit']}")
    print(f"Score: {post['score']:,} | Comments: {post['comments']:,}")
    print(f"Posted: {post['created_utc']}")
    print(f"URL: {post['url']}")
    print(f"Reddit Discussion: {post['reddit_url']}")
    if post['is_self'] and len(post['text']) > 200:
        print(f"Text Preview: {post['text'][:200]}...")
    print("-" * 80)

# Save results to CSV
filename = f"reddit_search_{query.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
results.to_csv(filename, index=False)
print(f"\nResults saved to {filename}")

In [None]:
results

**dist**

In [37]:
import requests
import pandas as pd

class BingNewsAPI():
    """Client for Bing News Search API"""
    def __init__(self, api_key: str):
        self.api_key = api_key
        self.session = requests.Session()

        self.base_url = "https://api.bing.microsoft.com/v7.0/news/search"
        self.session.headers.update({
            'Ocp-Apim-Subscription-Key': self.api_key
        })

    def search_news(self, query: str, count: int = 10) -> List[Dict]:
        """
        Search news articles using Bing News API
        
        Args:
            query: Search term
            count: Number of results to return (max 100)
            
        Returns:
            List of news articles
        """
        params = {
            'q': query,
            'count': min(count, 100),
            'freshness': 'Day'
        }
        
        response = self.session.get(self.base_url, params=params)
        response.raise_for_status()
        
        results = response.json()
        return results.get('value', [])

In [38]:
import ssl
import logging
import asyncio
import platform
import nest_asyncio
from typing import List, Dict, Set

import certifi
import aiohttp
from bs4 import BeautifulSoup
from aiohttp import ClientTimeout
from fake_useragent import UserAgent
from aiohttp_retry import RetryClient, ExponentialRetry

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

class FastNewsScraper:
    """Asynchronous news content scraper optimized for speed"""
    
    def __init__(self, max_concurrent: int = 50, timeout: int = 10):
        self.max_concurrent = max_concurrent
        self.timeout = ClientTimeout(total=timeout)
        self.ua = UserAgent()
        self.seen_urls: Set[str] = set()
        
        # Configure logging
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)
        
        # Setup SSL context
        self.ssl_context = ssl.create_default_context(cafile=certifi.where())

    def _get_random_headers(self) -> Dict[str, str]:
        """Generate random headers for each request"""
        return {
            'User-Agent': self.ua.random,
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
        }

    async def _extract_content(self, html: str) -> Dict[str, str]:
        """Extract content from HTML using BeautifulSoup with minimal parsing"""
        try:
            soup = BeautifulSoup(html, 'html.parser')
            
            # Quick removal of unnecessary elements
            for tag in soup(['script', 'style', 'nav', 'footer', 'iframe', 'aside']):
                tag.decompose()
            
            # Fast content extraction using common patterns
            content = {
                'title': '',
                'text': '',
                'metadata': {}
            }
            
            # Quick title extraction
            title_tag = (
                soup.find('h1') or 
                soup.find('meta', property='og:title') or
                soup.find('title')
            )
            if title_tag:
                content['title'] = title_tag.get_text() if hasattr(title_tag, 'get_text') else title_tag.get('content', '')
            
            # Quick main content extraction
            article_tag = (
                soup.find('article') or
                soup.find('div', class_=['article-content', 'story-content', 'post-content']) or
                soup.find('div', {'itemprop': 'articleBody'})
            )
            
            if article_tag:
                # Extract text efficiently
                paragraphs = article_tag.find_all('p')
                content['text'] = '\n'.join(p.get_text(strip=True) for p in paragraphs)
            
            return content
            
        except Exception as e:
            self.logger.error(f"Content extraction error: {str(e)}")
            return {'title': '', 'text': '', 'metadata': {}}

    async def _fetch_url(self, session: aiohttp.ClientSession, url: str) -> Dict[str, str]:
        """Fetch and process a single URL"""
        try:
            retry_options = ExponentialRetry(attempts=2)
            retry_client = RetryClient(client_session=session, retry_options=retry_options)
            
            async with retry_client.get(
                url,
                headers=self._get_random_headers(),
                timeout=self.timeout,
                ssl=self.ssl_context
            ) as response:
                if response.status == 200:
                    html = await response.text()
                    content = await self._extract_content(html)
                    content['url'] = url
                    content['status'] = 'success'
                    return content
                else:
                    return {'url': url, 'status': 'error', 'error': f'HTTP {response.status}'}
                    
        except asyncio.TimeoutError:
            return {'url': url, 'status': 'error', 'error': 'timeout'}
        except Exception as e:
            return {'url': url, 'status': 'error', 'error': str(e)}

    async def scrape_urls(self, urls: List[str]) -> List[Dict[str, str]]:
        """
        Scrape multiple URLs concurrently
        
        Args:
            urls: List of URLs to scrape
            
        Returns:
            List of dictionaries containing scraped content
        """
        # Remove duplicates and already seen URLs
        unique_urls = list(set(urls) - self.seen_urls)
        self.seen_urls.update(unique_urls)
        
        if not unique_urls:
            return []
        
        # Create connection pool
        conn = aiohttp.TCPConnector(
            limit=self.max_concurrent,
            ttl_dns_cache=300,
            ssl=self.ssl_context
        )
        
        async with aiohttp.ClientSession(connector=conn) as session:
            tasks = [
                self._fetch_url(session, url)
                for url in unique_urls
            ]
            
            # Use semaphore to limit concurrent requests
            semaphore = asyncio.Semaphore(self.max_concurrent)
            async def bounded_fetch(task):
                async with semaphore:
                    return await task
            
            # Gather results with timeout
            results = await asyncio.gather(
                *(bounded_fetch(task) for task in tasks),
                return_exceptions=True
            )
            
            # Filter out failures and exceptions
            valid_results = [
                result for result in results
                if isinstance(result, dict) and result.get('status') == 'success'
            ]
            
            return valid_results

In [39]:
def scrape_batch(urls: List[str], max_concurrent: int = 50, timeout: int = 10) -> List[Dict[str, str]]:
    """
    Synchronous wrapper for async scraping
    
    Args:
        urls: List of URLs to scrape
        max_concurrent: Maximum number of concurrent requests
        timeout: Timeout in seconds for each request
        
    Returns:
        List of dictionaries containing scraped content
    """
    scraper = FastNewsScraper(max_concurrent=max_concurrent, timeout=timeout)
    
    # Create new event loop
    try:
        loop = asyncio.get_event_loop()
    except RuntimeError:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
    
    try:
        results = loop.run_until_complete(scraper.scrape_urls(urls))
    finally:
        # Clean up
        if platform.system() != 'Windows':  # Windows has issues with loop cleanup
            loop.close()
    
    return results

In [40]:
bing_news_api = BingNewsAPI(os.getenv('BING_API_KEY'))
bing_data = bing_news_api.search_news('AI trends', 5)
bing_data_df = pd.DataFrame(bing_data)

In [43]:
scraper = FastNewsScraper(max_concurrent=5, timeout=10)

# Create new event loop
try:
    loop = asyncio.get_event_loop()
except RuntimeError:
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)

try:
    results = loop.run_until_complete(scraper.scrape_urls(bing_data_df.url.tolist()))
finally:
    # Clean up
    if platform.system() != 'Windows':  # Windows has issues with loop cleanup
        loop.close()

In [46]:
type(results[0])

dict

In [47]:
import os
import re

# Sample list of dictionaries
data = results

# Output directory
output_dir = "../data/raw/news/"
os.makedirs(output_dir, exist_ok=True)

# Function to clean title and create a valid filename
def clean_title(title):
    # Replace non-alphanumeric characters with underscores
    clean_name = re.sub(r'[^\w\s]', '', title)  # Remove special characters
    clean_name = re.sub(r'\s+', '_', clean_name.strip())  # Replace spaces with underscores
    return clean_name

# Write each dictionary to a .txt file
for item in data:
    filename = clean_title(item['title']) + ".txt"
    filepath = os.path.join(output_dir, filename)
    with open(filepath, 'w', encoding='utf-8') as file:
        file.write(item['text'])

print(f"Files have been saved in the directory: {output_dir}")

Files have been saved in the directory: ../data/raw/news/


In [44]:
results

[{'title': 'Lately: The year’s biggest YouTube trends, AI weather forecasts and brain rot',
  'text': 'Welcome back to Lately, The Globe’s weekly tech newsletter. If you have feedback or just want to say hello to a real-life human,send me an e-mail.\n\U0001fae0 ‘Brain rot’ is the word of the year\n👩🏻\u200d💻 The biggest YouTubetrends in 2024\n🖼️ Can an AI-generated image be copyrighted?\n✂️ The future of the online harms bill\nYou know that feeling of defeat or ashamed boredom that comes when you’re scrolling mindlessly online, and you’re viewing scores of sloppy, AI-generated memes, unhinged TikTok livestreams or posts of strangers fighting on Facebook? There’s a word for that feeling: brain rot.\nThe Oxford University Press dubbedthe evocative phrase the word of the year, noting it “gained new prominence in 2024.” Oxford defines brain rot as “the supposed deterioration of a person’s mental or intellectual state, especially viewed as the result of overconsumption of material (now parti

In [2]:
results = [{'title': 'Lately: The year’s biggest YouTube trends, AI weather forecasts and brain rot',
  'text': 'Welcome back to Lately, The Globe’s weekly tech newsletter. If you have feedback or just want to say hello to a real-life human,send me an e-mail.\n\U0001fae0 ‘Brain rot’ is the word of the year\n👩🏻\u200d💻 The biggest YouTubetrends in 2024\n🖼️ Can an AI-generated image be copyrighted?\n✂️ The future of the online harms bill\nYou know that feeling of defeat or ashamed boredom that comes when you’re scrolling mindlessly online, and you’re viewing scores of sloppy, AI-generated memes, unhinged TikTok livestreams or posts of strangers fighting on Facebook? There’s a word for that feeling: brain rot.\nThe Oxford University Press dubbedthe evocative phrase the word of the year, noting it “gained new prominence in 2024.” Oxford defines brain rot as “the supposed deterioration of a person’s mental or intellectual state, especially viewed as the result of overconsumption of material (now particularly online content) considered to be trivial or unchallenging.” “Brain rot” was chosen by a combination of public vote and language analysis by Oxford lexicographers. It beat five other finalists: demure, slop, dynamic pricing, romantasy and lore.\nThe photo on the left was used to create the image on the right using an AI image generator.Supplied\nIn Canada’s copyright database, you’ll find an image of a blurry, Van Gogh-esque sunset titledSuryast. It’s registered under two authors, Ankit Sahni and the RAGHAV Artificial Intelligence Painting App, the program Sahni used to create the image. For some, this is a problem. A legal clinic at the University of Ottawa is trying to expunge the registration, arguing that only humans can be authors.As Joe Castaldo reports, the rise of generative AI has brought up many thorny questions, including if AI-generated content deserves copyright protection and how much human involvement is required. These very questions are what inspired Sahni, who is an IP lawyer, to createSuryastin the first place, launching a consequential debate about copyright in the age of generative AI.\nBack in September, the federal government introduced the online harms bill, a law that aimed to combat online child abuse and hate crime by making changes to Canada’s criminal code and putting the onus on platforms to remove harmful content. The legislation faced criticism from Conservatives who said the bill amounted to censorship, and it’s been held up by filibustering in the House of Commons.\nNow, in an effort to pass the bill before a potential federal election,the Liberals are splitting the bill: the first bill would focus on kids’ safety and the second bill would include the new online hate-crime penalties. Child safety advocates, including Carol Todd, the mother of Amanda Todd who died by suicide after falling victim to cyberbullying, have been calling on MPs to end the filibuster and pass the bill. But even though the bill is now split from the contentious hate-crime measures, the Conservatives have indicated they’re still unlikely to support the legislation.\nDeepMind, Google’s AI offshoot, claims its new tool GenCast canoutperform the world’s best weather predicting model, devising 15-day forecasts faster and more accurately.In a new study published inNature, DeepMind reports that in a comparative test between GenCast and the European Center for Medium-Range Weather Forecasts – the premier service that 35 countries around the world rely on to produce their own forecasts – the AI agent was more accurate 97.2 per cent of the time. Accurately predicting the weather could have major benefits: A University of Arizona study from last year found that making forecasts 50 per cent more accurate would save 2,200 lives a year in the U.S., primarily from extreme heat or cold.\nCompanies in Mexico embrace AI to resurrect the dead(Rest of World)\nStop using generative AI as a search engine(The Verge)\nTrans Americans are turning to TikTok to crowdfund their relocations(Wired)\nSupplied\nFilterworld by Kyle Chayka, $37.99\nThis week the Globe’s art section rounded upthe most giftable booksfor everyone on your list, including “the extremely online” person. One of the books they suggest isFilterworldby Kyle Chayka, a New Yorker staff writer who reports on internet culture and technology. In this non-fiction book, he explores how algorithmic feeds have homogenized how we create and consume culture, making it less interesting and fulfilling. Sure, it’s not the lightest read to curl up with next to the fire, but it’ll make you think deeply about how we spend our time online.\nSabrina Carpenter performs in September.Brendan McDermid/Reuters\nEvery December, online platforms release their year-in-review recaps, listing the top artists, influencers and trends of the previous 12 months. YouTube did something a bit differently this year. Rather than listing the most-watched videos of the year, it identified the top trending topics, based on an analysis including the number of views, uploads and activity by creators. This new methodology reflects the nature of YouTube: People aren’t just watching their favourite creators on the platform. They’re making their own reaction videos, posting commentary and making mash-ups, becoming content creators themselves in the process.\nThis year’s top trending topicswere a mix of video games, niche animated series and big news stories. The only celebrity to make the list was pop star Sabrina Carpenter, who had a breakthrough with her single Espresso, and there was only one reference to Hollywood–blockbusterDeadpool & Wolverine. This shows how YouTube nurtures entertainment outside of the pop culture mainstream, but also how traditional media is becoming less influential in our online spaces.',
  'metadata': {},
  'url': 'https://www.theglobeandmail.com/business/article-lately-the-years-biggest-youtube-trends-ai-weather-forecasts-and-brain/',
  'status': 'success'},
 {'title': '8 Game-Changing Manufacturing Trends That Will Define 2025',
  'text': "The manufacturing sector is no stranger to innovation. In fact, it’s always been at the forefront of digital transformation, taking the arrival of robotics, the internet and new developments in material science in its stride.\nHowever, in 2025, it still finds itself braced for disruption, as manufacturers around the world grapple with the implications of artificial intelligence. There’s also the growing importance of improving sustainability as the climate crisis deepens and building resilience in the face of political and societal uncertainty.\nIn order to meet these challenges, the companies responsible for creating products we use every day are enthusiastically investing in breakthrough technologies as well as adapting to cultural changes, such as the need to rethink skills and training.\nSo let’s take a look at how this will unfold over the next year by overviewing the key trends and opportunities:\nIn 2025, we will see manufacturers rolling out many use cases for generative AI designed to speed up and drive efficiency in manufacturing processes. By leveraging the power of generative design, it will be possible to create stronger, lighter components that make more efficient use of available materials. We’ve already seen aerospace companies leveraging genAI to create newaircraft parts, and automotive manufacturers are using it to optimize vehicle designs. I believe we will see many more innovative use cases in the coming year as manufacturers increasingly integrate genAI into their operations.\nRobots are not new in manufacturing; in fact, robots have been working in factories for more than 50 years. What is new, however, is the new generation of intelligent robots that are able to work safely and effectively alongside humans, apply themselves to different tasks, and learn to become more efficient at their jobs and navigating their environments. As robots move away from the assembly line andinto the workforce, humans will develop new skills around leading and interacting with automated co-workers – sometimes referred to as “cobots”.\nPositioning a business asa leaderrather than simply a follower or even a laggard in the AI era will become a growing priority for many manufacturing companies. Put simply, it’s no longer good enough to simply adopt new technologies like robotics, predictive maintenance and automation. As the barriers to entry continue to fall, it has to be done in a way that’s more innovative, effective and efficient than the competition. Developing the capacity not just to follow trends but to identify opportunities to blaze a trail will increasingly become a priority in 2025.\nThere are numerous reasons why sustainability is quickly becoming a business priority for manufacturers in 2025. They include consumer demand, stricter regulations and the simple fact that we’re increasingly seeing the impact of climate disruption in the world around us. Due to this, we can expect to see a strategic switch towards cleaner and greener operations, such as the use of renewable energy, recyclable materials, reductions in emissions, excessive packaging, and water use.\nThere’s a widely-acknowledged skills crisis among industries hoping to reap the opportunities offered by AI, robotics, advanced data analytics and automation. Bridging this skills gap will require manufacturers to rethink the way they hire and train staff, and for many, this will become a critical business priority in 2025. Addressing this challenge may involve investing in upskilling and reskilling, developing apprenticeship programs or forging new relationships with educators and academia.\nThis year, smartphone manufacturer Xiaomiswitched onits fully autonomous dark factory close to Beijing, capable of producing 10 million handsets a year without human intervention. This model will become increasingly common as manufacturers chase improved efficiency, sustainability and reduced waste. While “lights out” factories have been around for a while, Xiaomi’s factory is the first that is able to learn how to operate more efficiently and optimize its own processes thanks to its AI-powered “brain”.\nThere can undoubtedly be cultural barriers to AI adoption. Some people are worried it will replace them or make them redundant, while others believe that decision-making shouldn't be left to machines. While these are all valid concerns, identifying areas where AI can clearly solve problems or create efficiencies while mitigating its potential for causing harm will be a priority for the manufacturing industry in 2025. This will include planning and delivering initiativesfostering an understandingof AI across a workforce and ensuring its benefits are felt by all.\nThe logistical challenges around sourcing components and managing complex inventories and production infrastructure are perfectly suited to automated, intelligent solutions. AI-powered tools leveraging real-time data analytics will enable more accurate demand forecasting and automated decision-making, helping manufacturers to build supply chains that are more resilient and adaptive to changing market conditions. In 2025, AI will enable manufacturers to anticipate disruption more effectively and identify opportunities to improve efficiency, ultimately leading to improved customer experience and business performance.\nAs we move through 2025, the manufacturing sector stands at a pivotal moment of transformation. While challenges around AI adoption, sustainability, and workforce development remain significant, the convergence of smart technologies, automated systems, and sustainable practices is creating unprecedented opportunities for innovation. Companies that successfully navigate these changes – embracing AI-driven efficiency while building resilient, sustainable operations and investing in their workforce – will be best positioned to thrive in manufacturing's next era. The future of manufacturing isn't just about automation and AI – it's about creating smarter, more sustainable, and more adaptive production systems that can meet the challenges of tomorrow.\n\nOne Community. Many Voices.\xa0Create a free account to share your thoughts.\nOur community is about connecting people through open and thoughtful conversations. We want our readers to share their views and exchange ideas and facts in a safe space.\nIn order to do so, please follow the posting rules in our site'sTerms of Service.We've summarized some of those key rules below. Simply put, keep it civil.\nYour post will be rejected if we notice that it seems to contain:\nUser accounts will be blocked if we notice or believe that users are engaged in:\nSo, how can you be a power user?\nThanks for reading our community guidelines.  Please read the full list of posting rules found in our site'sTerms of Service.",
  'metadata': {},
  'url': 'https://www.forbes.com/sites/bernardmarr/2024/12/06/8-game-changing-manufacturing-trends-that-will-define-2025/',
  'status': 'success'},
 {'title': '\nDeep Dive: AI 2024\n',
  'text': "In this issue, we delve into the transformative impact of AI on healthcare and pharma, featuring insights on key AI trends from the floor of Frontiers Health, the ongoing battle against drift, and the promise of biological software. Plus, we explore AI's role in drug discovery, commercialisation, and medical information, and take an in-depth look at why drugs fail.\nDecoding the future: Key AI trends and innovations at Frontiers Health:From patient autonomy to repurposed drugs, Frontiers Health 2024 showcased the transformative potential of AI in healthcare. Here, we explore five key trends and innovations that caught our attention at this year’s event.\nPharma and the ongoing battle against AI drift:Addressing the challenge of AI drift is critical for maintaining the reliability and effectiveness of AI systems in pharma. To find out more, Pharmaphorum web editor, Nicole Raleigh, examines how companies are navigating algorithmic shifts that threaten operational accuracy.\nBig interview: Inceptive's Jakob Uszkoreit on the promise of biological software:In an exclusive interview, deep learning pioneer, Jakob Uszkoreit, shares his journey from groundbreaking success at Google to the uncharted territory of biological software, and discusses how AI is bridging biology and computation for faster, more effective breakthroughs\nFrom R&D to ROI: AI’s impact on pharma commercialisationAI is transforming R&D ROI by accelerating drug discovery timelines and optimising resource allocation. Here, EVERSANA’s, Scott Snyder, explores how AI-powered analytics are enhancing market access and commercialisation strategies.\nRight to reply: ChatGPT and AI in healthcareMuch has been said about the potential (both good and bad) of AI in healthcare. But, to get both sides of the story, and uncover the true intentions of GenAI, Deep Dive sat down with the notorious player that sparked the current hype cycle: Chat-GPT.\nWhy drugs fail: The unrelenting challenge of finding new drugsDespite advances in technology, drug failure remains a significant hurdle for the industry. Jordan Lane, co-founder and CSO for Ignota Labs, breaks down the scientific and structural barriers that contribute to high drug failure rates and explores how AI may help to address them.\nA strategic framework for evaluating AI in drug discovery:Evaluating AI’s effectiveness in drug discovery requires a robust and strategic framework. Here, L.E.K. Consulting provides a roadmap for assessing AI tools, ensuring they align with scientific and commercial goals.\nNavigating the evolving landscape of medical information with AI-powered solutionsIQVIA’s Simon Johns breaks down how AI-powered solutions are reshaping the way medical information is processed and delivered, improving accessibility and accuracy. Learn about the technologies driving this shift and their implications for the future of medical communication.\nRead Deep Dive:AI 2024in full pharmaphorum’s digital magazineDeep Diveprovides objective, issue-driven views, analysis, high-level interviews and unique research for pharmaceutical companies, biotech firms and the wider healthcare sector. Subscribe tofuture issues of Deep Dive.",
  'metadata': {},
  'url': 'https://pharmaphorum.com/digital/deep-dive-ai-2024',
  'status': 'success'},
 {'title': 'How AI can help you attract, engage and retain the best talent in 2025',
  'text': 'Worawut - stock.adobe.com\nAs we move into 2025, the landscape of human resources (HR) is heading for a significant transformation.Artificial intelligence(AI) is set to revolutionise workforce collaboration, efficiency, and talent management.\nFor HR leaders, harnessing the power of AI will be essential toattract, engage, and retain top talentin an increasingly competitive market.\nAI is reshaping and revamping HRby automating routine and mundane tasks such as interview scheduling, data entry, and CV screenings. This automation allows HR teams to focus on strategic initiatives that add real value to employees, such as developing diverse cultures, offering tailored development programmes, and increasing engagement.\nAI-powered analytics can identify workforce trends, predict employee turnover, and suggest to retain top talent. These insights enable HR leaders to make data-driven decisions to support a high-performance culture, ultimately improving employee engagement and organisational performance.\nJust look at Unilever, which uses AI to streamline its recruitment process. By using AI-driven assessments and video interview analytics, Unilever has significantly reduced time-to-hire while enhancing the candidate experience. Additionally, AI can streamline performance management by providing continuous feedback and personalised development plans. This shift towards real-time performance management fosters a culture of continuous improvement, where the team receives timely feedback and support to achieve their goals, leading to higher engagement levels and better retention rates.\nAs the demand on sourcing talent with scarce skills continues in 2025, attracting top talent needs innovative strategies. AI can play a pivotal role in enhancing the candidate experience. Imagine AI-driven chatbots engaging with candidates in real-time, answering their questions and providing personalised information about the company and the role. This immediate engagement can significantly improve the candidate experience, making the organisation more attractive.\nAI can also help create a more inclusive hiring processes by eliminating unconscious biases from recruitment. AI algorithms can analyse job descriptions to ensure they are free from biased language and assess candidates based on objective criteria. This is an incredibly important step to support organisations in attracting and growing a more diverse and inclusive workforce, which is crucial for driving innovation and business success.\nRetaining your team is equally important as attracting it. AI can help HR leaders identify early signs of people’s disengagement or dissatisfaction. For instance, AI-powered sentiment analysis can monitor employee communications and flag any negative sentiments, allowing HR and managers to intervene proactively. By addressing issues before they escalate, organisations can improve the satisfaction, happiness and ultimately retention of the team.\nAI can also facilitate personalised employee development. By analysing skills, performance data, and career aspirations, AI can recommend tailored development programmes and career paths for each individual. This personalised approach to development can help people feel valued and supported.\n24% of all workersare worried that AI will soon make their job obsolete. HR leaders have a crucial role in addressing these concerns and ensuring their teams are ready for AI integration. Providing training and the right tools to integrate AI smoothly is essential. By fostering a culture of continuous improvement and responsible AI use, HR can drive greater efficiency and empower the entire workforce.\nAI is more likely to enhance roles rather than replace them, and HR leaders should embrace AI ethically and transparently. This involves being clear about how AI is used, ensuring data privacy, and maintaining a human touch in all interactions. By doing so, HR can build trust and create a positive environment where AI is seen as a tool for empowerment rather than a threat.\nAs we approach 2025 and beyond, the integration of AI in HR will continue to evolve. Future trends may include more sophisticated AI-driven talent management systems, enhanced predictive analytics for workforce planning, and even more personalised employee experiences powered by AI. HR leaders who stay ahead of these trends and continually innovate will be well-positioned to lead their organisations into the future.\nLooking to the New Year, AI will play a pivotal role in enhancing HR functions, making them more efficient, strategic, and employee centric. By leveraging AI to attract, engage, and retain top talent, organisations can stay competitive in a rapidly evolving job market. HR leaders who embrace AI responsibly and proactively will be well-positioned to drive their organisations forward, creating workplaces that are both productive and fulfilling for their team.\nToria Walters is chief people officer atANS, a digital transformation provider and Microsoft’s UK Services Partner of the Year 2024. Headquartered in Manchester, it offers public and private cloud, security, business applications, low code, and data services to thousands of customers, from enterprise to SMB and public sector organisations.',
  'metadata': {},
  'url': 'https://www.computerweekly.com/opinion/How-AI-can-help-you-attract-engage-and-retain-the-best-talent-in-2025',
  'status': 'success'},
 {'title': 'At AWS Re:Invent, A Look At Reinventing AI',
  'text': 'This year’s Amazon Web Services conference, re:Invent 2024, confirmed a few trends we’ve been tracking in the artificial intelligence boom. These include massive changes in the cloud infrastructure landscape with investment of hundreds of billions of dollars in an AI infrastructure arms race, as well as the struggle of enterprises to find return on investment in AI.\nThis week, both of these trends had interesting twists. The week started off with the resignation of Intel CEO Pat Gelsinger, demonstrating what happens when you miss a huge market shift. Intel, even before Gelsinger took over, was poorly positioned in graphics processing units, the area that NVIDIA pioneered. But Gelsinger perhaps made things worse by focusing on an approach favoring the buildout of domestic and European manufacturing facilities and a foundry business, all based in part on government funding.\nAWShad plenty of announcements in hardwareto demonstrate that it will stay in front of the AI arms race, including ensuring its supply of chips. In addition to buying chips from AI compute leaders NVIDIA and AMD, AWS has its own line of chips, including Graviton and Trainium. Here at re:Invent, it unveiled mass-scale AI clusters, complete with its own networking and interconnect technology, as well as general availability of its Trainium2 AI training chip.\nLAS VEGAS, NEVADA - DECEMBER 3: Amazon Web Services (AWS) CEO Matt Garman delivers a keynote address... [+]during AWS re:Invent 2024, a conference hosted by Amazon Web Services, at The Venetian Las Vegas on December 3, 2024 in Las Vegas, Nevada. (Photo by Noah Berger/Getty Images for Amazon Web Services)\nOn the enterprise ROI front, I saw several examples of this in the re:Invent keynotes as well as the analyst conference, where companies as diverse as Apple, JP Morgan, The Hartford, Novo Nordisk, and others demonstrated how they are using AI to gain ROI in specific use cases.\nLet’s dive into these two areas in more detail.\nWalking around the re:Invent conference, which hosted 60,000 people, you can talk to anybody from cloud engineers to Fortune 500 CEOs. The conference is of course owned and operated by AWS, so it’s an annual tradition for AWS to launch a barrage of technology announcements—literally hundreds—that demonstrate why it’s still the leading cloud operator.\nThe question is, will AI help or hurt AWS in the long run? In addition to AWS competitors Microsoft Azure and Google Cloud, the AI boom has spawned a raft of AI-focused cloud providers, including Lambda and CoreWeave, the latter of which has achieved a valuation of tens of billions of dollars in just a few years.\nAWS is of course determined not to let AI derail its leadership. Its announcements range from huge hardware projects to feature upgrades of its AI services SageMaker and Bedrock.\nOn the hardware front, AWS showed it has a diversification strategy to provide its own hardware, even though it buys from many suppliers, including NVIDIA. AWS announced general availability of Amazon Elastic Compute Cloud (Amazon EC2) on Trn2 instances, which feature clusters of 16 Trn2 chips. And Trn2 can be scaled up using hardware clustering solutions. A separate Trn2 UltraServer, now in preview, features a cluster of 64 Trn2 chips across four Trn2 instances.\nAWS CEO Matt Garman positioned it as a broadening of the market rather than as a competitive alternative to NVIDIA.\n"NVIDIA is an incredibly important partner of ours. AMD is an incredible partner as well. Today it is true that the vast majority of workloads run on NVIDIA. We\'re so early where GenAI can be, that I think Trainium increases the size of the pie. It\'s not going to be at the expense [of others]."\n\nThe company is also planning a massive supercomputer named Project Rainier in partnership with Anthropic (which AWS has invested in), based on UltraServers. It\'s set to debut in 2025. Anthropic isn’t alone. Other Trn2customers includeDatabricks, Datadog, Ricoh, Hugging Face, and PyTorch.\nIt wouldn’t be re:Invent without lots of geeky upgrades to AWS services. AWS has rebranded its SageMaker ML service as SageMaker AI, which sports a bunch of enhancements designed to grease the wheels of ML-based analytics. These include the SageMaker HyperPod, whichcoordinates modeling workloads.\nIn a keynote talk at the conference this week, Swami Sivasubramanian, VP of AI and data at AWS, outlined a raft of new services that streamline model training and inferencing for enterprise customers.\n“We’re seeing the convergence of big data, analytics, machine learning, and generative AI,” said Sivasubramanian. And AWS has built on a range of past successes to meet demand, he said.\nIn another example, the company’s other AI service, Bedrock, offers a range of large language models. This year featuresmulti-agent collaboration, which allows agents to coordinate across different tasks. Example: For a financial services firm, according to AWS’s online announcement, “specialized agents could coordinate to gather data, analyze trends, and provide actionable recommendations—working in parallel to improve response times and precision.” This feature is currently available in the US East (N. Virginia), US West (Oregon), and Europe (Ireland) AWS Regions.\nQueue up the next topic: enterprise AI. The world is filled with chatter about whether AI can fulfill its promise in delivering ROI to the enterprise.\nThere are of course many skeptics, including Render founder and CEO Anurag Goel, who was also a former executive at Stripe. Render has a cloud automation platform that Goel says is being adopted by “hundreds of thousands of developers per month.” He told me that he hadn’t seen a huge demonstration of ROI at the conference.\n“AWS announced a lot of services around AI but they lack a coherent end-to-end theme,” Goel told me in the hallway of the Venetian. “The business value and the ROI remains unclear.”\nDespite some skepticism about the AI endgame, some attendees said there has been a shift in thinking from “god” products to more tactical technology using AI. Adam Jacob, cofounder and CEO of System Initiative, told me the new thinking embodies a more tactical, focused approach he refers to as “micro AI.”\n“As an industry, we started out by thinking, ‘We\'re going to have a single god robot that we just pump all the exabytes of data to and we just let the god machine sort it out.’ We\'re learning that that creates a terrible user experience and is incredibly hard to track and secure. Instead, we\'re starting to build micro AI experiences that form a series of small experiences that build up to really compelling big ones."\nThe case for micro AI makes sense. Generalized AI is difficult and has key challenges such as accuracy, data security, and data provenance. But if you can focus on specific solutions with an AI tool, you might have a better shot at ROI gratification.\nIn its analyst conference, AWS did a good job of presenting ROI examples from customers, which was a noticeable change from last year, when the market was still riding the pink cloud of AI hype.\nIn its briefing sessions for industry analysts, AWS had examples of compelling ROI-driven case studies. Companies presenting included Apple, JP Morgan, The Hartford, Merck, New York Life, and Novo Nordisk, among others.\nLouise Lind Skov, head of content digitalisation with Danish pharmaceutical giant Novo Nordisk. showed how the company reduced the time to create regulatory documentation from two months to several minutes by building its own AI-driven tool on AWS, which it calls NovoScribe.\n“From a medical writing perspective, this would have required thousands of hours of work,” said Skov.\nNovoScribe seems to be a great example of micro AI. By focusing on a specific use case and process, Novo Nordisk was able to deliver specific efficiencies.\nThis will be a big theme going forward as the AI wave takes a circuitous path to technology nirvana. Nobody has a god AI app yet, but there are certainly many tools available to solve everyday problems and deliver tangible results in focused areas.\n\nOne Community. Many Voices.\xa0Create a free account to share your thoughts.\nOur community is about connecting people through open and thoughtful conversations. We want our readers to share their views and exchange ideas and facts in a safe space.\nIn order to do so, please follow the posting rules in our site\'sTerms of Service.We\'ve summarized some of those key rules below. Simply put, keep it civil.\nYour post will be rejected if we notice that it seems to contain:\nUser accounts will be blocked if we notice or believe that users are engaged in:\nSo, how can you be a power user?\nThanks for reading our community guidelines.  Please read the full list of posting rules found in our site\'sTerms of Service.',
  'metadata': {},
  'url': 'https://www.forbes.com/sites/rscottraynovich/2024/12/06/at-aws-reinvent-a-look-at-reinventing-ai/',
  'status': 'success'}]

In [3]:
import pandas as pd

In [7]:
print('\n\n'.join(pd.DataFrame(results).text.tolist()))

Welcome back to Lately, The Globe’s weekly tech newsletter. If you have feedback or just want to say hello to a real-life human,send me an e-mail.
🫠 ‘Brain rot’ is the word of the year
👩🏻‍💻 The biggest YouTubetrends in 2024
🖼️ Can an AI-generated image be copyrighted?
✂️ The future of the online harms bill
You know that feeling of defeat or ashamed boredom that comes when you’re scrolling mindlessly online, and you’re viewing scores of sloppy, AI-generated memes, unhinged TikTok livestreams or posts of strangers fighting on Facebook? There’s a word for that feeling: brain rot.
The Oxford University Press dubbedthe evocative phrase the word of the year, noting it “gained new prominence in 2024.” Oxford defines brain rot as “the supposed deterioration of a person’s mental or intellectual state, especially viewed as the result of overconsumption of material (now particularly online content) considered to be trivial or unchallenging.” “Brain rot” was chosen by a combination of public vote 