In [2]:
#https://markets.businessinsider.com/news/nvda-stock?p={page}  ->> Nvidia stock news
#https://markets.businessinsider.com/news/amzn-stock?p={page}  ->> Amazon stock news
#https://markets.businessinsider.com/news/tsla-stock?p={page}   ->> Tesla stock news
#https://markets.businessinsider.com/news/googl-stock?p={page}  ->> Google stock news
#https://markets.businessinsider.com/news/adbe-stock?p={page} ->> Adobe stock news
#https://markets.businessinsider.com/news/axp-stock?p={page}  ->> American Express stock news
#https://markets.businessinsider.com/news/meta-stock?p={page}->> Meta stock news
#https://markets.businessinsider.com/news/spot-stock?p={page}->> Spotify stock news
#https://markets.businessinsider.com/news/msft-stock?p={page}     ->> Microsoft stock news
#https://markets.businessinsider.com/news/jpm-stock?p={page}  -->JPMorgan stock news

## Imports

In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import yfinance as yf
import nltk
import nest_asyncio
import asyncio
import aiohttp
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')
from tqdm.notebook import tqdm
import plotly.graph_objects as go
# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\luficerg\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Fetcthing data

In [3]:
def fetch_stock_data(ticker):
    """
    This function fetches historical stock price data for a given ticker symbol.

    Args:
    ticker (str): The stock ticker symbol.

    Returns:
    DataFrame: A pandas DataFrame containing the closing prices of the stock.
    """
    print("Fetching stock prices...")

    # Download the stock data for the given ticker for the maximum available period
    stock_data = yf.download(ticker, period="max")

    # Keep only the 'Close' column which contains the closing prices
    stock_data = stock_data[['Close']]

    # Return the DataFrame containing the closing prices
    return stock_data

In [13]:
# Apply nest_asyncio
nest_asyncio.apply()
    
async def fetch_page(session, url, retries=3, delay=2):
    """
    Asynchronously fetch a single page with retry logic.
    
    Args:
    session (aiohttp.ClientSession): The HTTP session to use for the request.
    url (str): The URL of the page to fetch.
    retries (int): Number of retries for the request.
    delay (int): Delay between retries in seconds.

    Returns:
    str: The HTML content of the page.
    """
    for attempt in range(retries):
        try:
            async with session.get(url, timeout=aiohttp.ClientTimeout(total=10)) as response:
                response.raise_for_status()
                return await response.text()
        except (aiohttp.ClientError, aiohttp.ClientResponseError, aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
            print(f"Attempt {attempt + 1} failed with error: {e}")
            if attempt < retries - 1:
                print(f"Retrying in {delay} seconds...")
                await asyncio.sleep(delay)
            else:
                raise

async def fetch_news(ticker):
    """
    Asynchronously fetch news headlines for a given ticker symbol from the Business Insider website.

    Args:
    ticker (str): The stock ticker symbol.

    Returns:
    DataFrame: A pandas DataFrame containing the datetime, ticker, source, and headline of the news articles.
    """
    # Define the columns for the DataFrame
    columns = ['datetime', 'ticker', 'source', 'headline']
    df = pd.DataFrame(columns=columns)
    ticker = str(ticker).lower()
    urls = [f'https://markets.businessinsider.com/news/{ticker}-stock?p={page}' for page in range(1, 200)]

    async with aiohttp.ClientSession() as session:
        tasks = [fetch_page(session, url) for url in urls]
        pages = await asyncio.gather(*tasks)

    counter = 0

    for html in pages:
        soup = BeautifulSoup(html, 'lxml')
        articles = soup.find_all('div', class_='latest-news__story')
        for article in articles:
            datetime = article.find('time', class_='latest-news__date').get('datetime')
            title = article.find('a', class_='news-link').text
            source = article.find('span', class_='latest-news__source').text
            df = pd.concat([pd.DataFrame([[datetime, ticker, source, title]], columns=df.columns), df], ignore_index=True)
            counter += 1

    df['datetime'] = pd.to_datetime(df['datetime'])
    df['date'] = df['datetime'].dt.date
    df['time'] = df['datetime'].dt.time
    df.drop(columns=['datetime'], inplace=True)

    print(f'{counter} headlines scraped from 200 pages')
    return df

# Sentiment Analysis

In [5]:
def calculate_sentiment_scores(headlines_df):
    """
    This function calculates sentiment scores for each headline in the DataFrame using a sentiment analyzer.

    Args:
    headlines_df (DataFrame): A pandas DataFrame containing headlines.

    Returns:
    DataFrame: The input DataFrame with an additional 'sentiment' column containing the sentiment scores.
    """
    # Apply the sentiment analyzer to each headline and store the compound sentiment score in a new 'sentiment' column
    headlines_df['sentiment'] = headlines_df['headline'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
    
    # Convert the 'date' column to datetime type
    headlines_df['date'] = pd.to_datetime(headlines_df['date'])
    
    # Return the DataFrame with the sentiment scores
    return headlines_df

def aggregate_sentiment_scores(headlines_df):
    """
    This function aggregates sentiment scores by date, calculating the mean sentiment score for each date.

    Args:
    headlines_df (DataFrame): A pandas DataFrame containing headlines and their sentiment scores.

    Returns:
    Series: A pandas Series with dates as the index and the mean sentiment score for each date as the values.
    """
    # Group the DataFrame by 'date' and calculate the mean sentiment score for each group
    sentiment_summary = headlines_df.groupby('date')['sentiment'].mean()
    
    # Return the aggregated sentiment scores
    return sentiment_summary

def generate_trading_signals(sentiment_summary):
    """
    This function generates trading signals based on the sentiment scores.

    Args:
    sentiment_summary (Series): A pandas Series containing aggregated sentiment scores by date.

    Returns:
    Series: A pandas Series with trading signals (1 for positive sentiment, -1 for negative sentiment, and 0 for neutral sentiment).
    """
    # Apply a function to generate trading signals: 1 for sentiment > 0.2, -1 for sentiment < -0.2, and 0 for sentiment between -0.2 and 0.2
    signals = sentiment_summary.apply(lambda x: 1 if x > 0.2 else (-1 if x < -0.2 else 0))
    
    # Return the generated trading signals
    return signals


# Plot and calculations

In [6]:
def calculate_portfolio_metrics(portfolio):
    """
    This function calculates key metrics for a given trading portfolio.

    Args:
    portfolio (DataFrame): A pandas DataFrame containing the portfolio trades.

    Returns:
    tuple: A tuple containing the total number of trades, the win percentage, the total profit, loss_percentage, and the total loss.
    """
    # Calculate the total number of trades (each trade consists of a buy and a sell)
    total_trades = len(portfolio) // 2

    # Calculate the number of winning trades (sell trades with a positive profit)
    wins = portfolio[portfolio['type'] == 'sell']['profit'] > 0
    loss = portfolio[portfolio['type'] == 'sell']['profit'] < 0
    
    # Calculate the win percentage
    win_percentage = wins.mean() * 100
    loss_percentage = loss.mean() * 100
    
    # Calculate the total profit from sell trades
    total_profit = portfolio[portfolio['type'] == 'sell']['profit'].sum()
    total_loss = portfolio[portfolio['type'] == 'sell']['profit'].sum()
    
    # Return the total trades, win percentage, and total profit
    return total_trades, win_percentage, total_profit, loss_percentage, total_loss

def calculate_sharpe_ratio(portfolio, risk_free_rate=0.01):
    """
    This function calculates the Sharpe Ratio for the portfolio.

    Args:
    portfolio (DataFrame): A pandas DataFrame containing the portfolio trades.
    risk_free_rate (float): The risk-free rate of return (default is 0.01).

    Returns:
    float: The Sharpe Ratio of the portfolio.
    """
    # Get the daily returns from the sell trades
    daily_returns = portfolio[portfolio['type'] == 'sell']['profit']
    
    # Calculate the excess returns by subtracting the risk-free rate
    excess_returns = daily_returns - risk_free_rate
    
    # Calculate the Sharpe Ratio
    sharpe_ratio = excess_returns.mean() / excess_returns.std()
    
    # Return the Sharpe Ratio
    return sharpe_ratio

def calculate_max_drawdown(portfolio):
    """
    This function calculates the maximum drawdown for the portfolio.

    Args:
    portfolio (DataFrame): A pandas DataFrame containing the portfolio trades.

    Returns:
    float: The maximum drawdown value.
    """
    # Calculate the cumulative profit over time
    portfolio['cumulative_profit'] = portfolio['profit'].cumsum()
    
    # Calculate the cumulative maximum profit
    cumulative_max = portfolio['cumulative_profit'].cummax()
    
    # Calculate the drawdown
    drawdown = portfolio['cumulative_profit'] - cumulative_max
    
    # Get the maximum drawdown
    max_drawdown = drawdown.min()
    
    # Return the maximum drawdown
    return max_drawdown

def plot_signals(stock_data, portfolio, ticker):
    """
    This function plots stock prices along with buy and sell signals for a given ticker.

    Args:
    stock_data (DataFrame): A pandas DataFrame containing the stock price data.
    portfolio (DataFrame): A pandas DataFrame containing the portfolio trades.
    ticker (str): The stock ticker symbol.

    Returns:
    None: Displays an interactive plot using Plotly.
    """
    # Get the start and end dates from the portfolio
    start_date = portfolio['date'].min()
    end_date = portfolio['date'].max()

    # Filter the stock data to the date range available in the portfolio
    stock_data = stock_data[(stock_data.index >= start_date) & (stock_data.index <= end_date)]
    
    # Separate buy and sell signals from the portfolio
    buy_signals = portfolio[portfolio['type'] == 'buy']
    sell_signals = portfolio[portfolio['type'] == 'sell']
    
    # Create a new plotly figure
    fig = go.Figure()
    
    # Add stock price trace
    fig.add_trace(go.Scatter(
        x=stock_data.index, 
        y=stock_data['Close'],
        mode='lines',
        name='Stock Price',
        line=dict(color='blue')
    ))

    # Add buy signals
    fig.add_trace(go.Scatter(
        x=buy_signals['date'], 
        y=buy_signals['price'],
        mode='markers',
        name='Buy Signal',
        marker=dict(symbol='triangle-up', color='green', size=10)
    ))

    # Add sell signals
    fig.add_trace(go.Scatter(
        x=sell_signals['date'], 
        y=sell_signals['price'],
        mode='markers',
        name='Sell Signal',
        marker=dict(symbol='triangle-down', color='red', size=10)
    ))

    # Update layout for better presentation
    fig.update_layout(
        title=f'Stock Price with Buy and Sell Signals for {ticker}',
        xaxis_title='Date',
        yaxis_title='Price',
        legend_title='Legend',
        hovermode='x'
    )

    # Display the plot
    fig.show()


# Simulating Trades

In [7]:
def simulate_trades(stock_data, trading_signals, capital):
    """
    This function simulates trades based on trading signals and initial capital.

    Args:
    stock_data (DataFrame): A pandas DataFrame containing stock price data.
    trading_signals (Series): A pandas Series containing trading signals.
    capital (float): The initial capital for trading.

    Returns:
    DataFrame: A pandas DataFrame containing the portfolio of trades.
    """
    # Initialize the portfolio and variables for tracking position and trading details
    portfolio = []
    position = 0
    buy_price = 0
    quantity = 0

    # Iterate through the stock prices by date
    for date, price in stock_data['Close'].items():
        # Check if there is a trading signal for the current date
        if date in trading_signals.index:
            signal = trading_signals.loc[date]
            
            # Execute buy signal if no position is held
            if signal == 1 and position == 0:
                position = 1
                buy_price = price
                quantity = capital / price
                capital = capital % price
                profit = 0
                portfolio.append({"date": date, "type": "buy", "price": buy_price, "capital": capital,'profit': profit})
            
            # Execute sell signal if a position is held and the sell price is higher than the buy price
            elif signal == -1 and position == 1 and (price > buy_price):
                position = 0
                sell_price = price
                profit = (sell_price - buy_price) * quantity
                capital = capital + quantity * sell_price
                portfolio.append({"date": date, "type": "sell", "price": sell_price, "capital": capital, "profit": profit})

    # Return the portfolio as a DataFrame
    return pd.DataFrame(portfolio)

def final(ticker, rate, initial, df):
    """
    This function orchestrates the entire trading strategy pipeline from fetching data to plotting results.

    Args:
    ticker (str): The stock ticker symbol.
    rate (float): The risk-free rate for Sharpe ratio calculation.
    initial (float): The initial capital for trading.
    df (DataFrame): A pandas DataFrame containing news data.

    Returns:
    None: Displays trade metrics and plots trading signals.
    """
    # Fetch stock data
    stock_data = fetch_stock_data(ticker)

    print(stock_data)
    # Calculate sentiment scores
    news_data_with_scores = calculate_sentiment_scores(df)

    print(news_data_with_scores)
    
    # Aggregate sentiment scores by date
    sentiment_summary = aggregate_sentiment_scores(news_data_with_scores)
    
    # Generate trading signals
    trading_signals = generate_trading_signals(sentiment_summary)
    
    # Simulate trades
    print("\nSimulating trades...")
    portfolio = simulate_trades(stock_data, trading_signals, initial)
    
    # Calculate portfolio metrics
    total_trades, win_percentage, total_profit, loss_percentage , total_loss = calculate_portfolio_metrics(portfolio)
    
    # Print the portfolio and key metrics
    print(portfolio)
    print(f"\nInitial capital: ${initial}")
    print(f"Total Trades: {total_trades}")
    print(f"Win Percentage: {win_percentage:.2f}%")
    print(f"Total Portfolio Returns: ${total_profit:.2f}")
    print(f"Loss Percentage: {loss_percentage:.2f}%")
    print(f"Total Portfolio Returns: ${total_loss:.2f}")
    print(f"Sharpe ratio: {calculate_sharpe_ratio(portfolio, rate):.2f} with risk-free rate of {rate}")
    print(f"Max drawdown: {calculate_max_drawdown(portfolio)}")
    
    # Plot buy and sell signals
    plot_signals(stock_data, portfolio, ticker)


In [9]:
ticker = "NVDA"
news = await fetch_news(ticker)
news

8712 headlines scraped from 200 pages


Unnamed: 0,ticker,source,headline,date,time
0,nvda,Reuters,Nvidia CEO talks up mobile chips with brawny P...,2014-01-06,06:49:36
1,nvda,Reuters,Ex-Nvidia manager settles U.S. SEC charges on ...,2014-04-23,23:26:52
2,nvda,Reuters,Nvidia says it sees revenue from cloud computi...,2015-06-01,05:31:48
3,nvda,Reuters,Nvidia says Volvo is first customer for new au...,2016-01-05,06:03:35
4,nvda,Reuters,BRIEF-Nvidia and TomTom develop mapping system...,2016-09-28,15:10:26
...,...,...,...,...,...
8707,nvda,Seeking Alpha,Nvidia agrees to acquire software startup Shor...,2024-06-18,19:23:09
8708,nvda,Benzinga,A Leading Tech ETF Is Shifting The Balance Fro...,2024-06-18,19:41:05
8709,nvda,InvestorPlace,3 Bargain Stocks to Buy Now: June 2024,2024-06-18,20:47:34
8710,nvda,TipRanks,Nvidia’s Growth Potential Anchored in AI Leade...,2024-06-18,21:05:30


In [10]:
final("NVDA", 0.03, 10000, news)

Fetching stock prices...


[*********************100%%**********************]  1 of 1 completed


                 Close
Date                  
1999-01-22    0.041016
1999-01-25    0.045313
1999-01-26    0.041797
1999-01-27    0.041667
1999-01-28    0.041536
...                ...
2024-06-12  125.199997
2024-06-13  129.610001
2024-06-14  131.880005
2024-06-17  130.979996
2024-06-18  135.580002

[6393 rows x 1 columns]
     ticker         source                                           headline  \
0      nvda        Reuters  Nvidia CEO talks up mobile chips with brawny P...   
1      nvda        Reuters  Ex-Nvidia manager settles U.S. SEC charges on ...   
2      nvda        Reuters  Nvidia says it sees revenue from cloud computi...   
3      nvda        Reuters  Nvidia says Volvo is first customer for new au...   
4      nvda        Reuters  BRIEF-Nvidia and TomTom develop mapping system...   
...     ...            ...                                                ...   
8707   nvda  Seeking Alpha  Nvidia agrees to acquire software startup Shor...   
8708   nvda       Benzinga  

In [14]:
news_googl = await fetch_news("GOOGL")
news_googl

Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 failed with error: 
Retrying in 2 seconds...
Attempt 1 

Unnamed: 0,ticker,source,headline,date,time
0,googl,Business Insider,Google slips after its Pixel 2 and hardware ev...,2017-10-04,20:15:11
1,googl,Business Insider,Google's new $160 wireless earbuds one-up Appl...,2017-10-04,23:24:06
2,googl,Business Insider,Google is using its biggest advantage as a wea...,2017-10-05,02:57:06
3,googl,Business Insider,Google's new $400 speaker is a room-shaking mo...,2017-10-05,14:33:04
4,googl,Business Insider,Here's how Google's approach to its smart spea...,2017-10-05,15:27:08
...,...,...,...,...,...
9945,googl,Seeking Alpha,Least shorted S&P 500 stocks in May,2024-06-18,14:34:48
9946,googl,InvestorPlace,3 AI Stocks to Buy Following the Nvidia Stock ...,2024-06-18,15:56:14
9947,googl,InvestorPlace,3 Tech Stocks to Buy That Are Growing Dividend...,2024-06-18,18:06:21
9948,googl,InvestorPlace,3 Bargain Stocks to Buy Now: June 2024,2024-06-18,20:47:34


In [15]:
final("GOOGL", 0.03, 10000, news_googl)

Fetching stock prices...


[*********************100%%**********************]  1 of 1 completed


                 Close
Date                  
2004-08-19    2.511011
2004-08-20    2.710460
2004-08-23    2.737738
2004-08-24    2.624374
2004-08-25    2.652653
...                ...
2024-06-12  177.789993
2024-06-13  175.160004
2024-06-14  176.789993
2024-06-17  177.240005
2024-06-18  175.089996

[4992 rows x 1 columns]
     ticker            source  \
0     googl  Business Insider   
1     googl  Business Insider   
2     googl  Business Insider   
3     googl  Business Insider   
4     googl  Business Insider   
...     ...               ...   
9945  googl     Seeking Alpha   
9946  googl     InvestorPlace   
9947  googl     InvestorPlace   
9948  googl     InvestorPlace   
9949  googl           RTTNews   

                                               headline       date      time  \
0     Google slips after its Pixel 2 and hardware ev... 2017-10-04  20:15:11   
1     Google's new $160 wireless earbuds one-up Appl... 2017-10-04  23:24:06   
2     Google is using its biggest advan