In [1]:
import os
import requests
from bs4 import BeautifulSoup
from scipy.special import softmax
from transformers import AutoTokenizer
from urllib.parse import unquote
from transformers import AutoModelForSequenceClassification
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import BartTokenizer, BartForConditionalGeneration

In [2]:
def scraping_article(url):
    headers = {
    'User-Agent': 'Your User Agent String',
    }
    r=requests.get(url,headers=headers)
    soup=BeautifulSoup(r.text,'html.parser')
    paragraphs=soup.find_all('p')
    text= [paragraph.text for paragraph in paragraphs]
    words=' '.join(text).split(' ')
    article = ' '.join(words)
    return article

In [3]:
def find_news_url(keyword, start_date, end_date):
    root = "https://www.google.com/"
    search_query = keyword.replace(" ", "+")
    link = f"{root}search?q={search_query}&tbm=nws&tbs=cdr:1,cd_min:{start_date},cd_max:{end_date}"

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}

    response = requests.get(link, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    news_links = []

    for article in soup.select('div.SoaBEf'):
        link = article.select_one('a')
        if link and 'href' in link.attrs:
            url = link['href']
            if url.startswith('/url?q='):
                url = unquote(url.split('/url?q=')[1].split('&sa=')[0])
            news_links.append(url)

    return news_links

In [4]:
def to_chunks(data):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=3000,
        chunk_overlap=50
    )
    docs=text_splitter.split_text(data)
    return docs

In [5]:
model_name="facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

In [6]:
def summarize_text(tokenizer, model, text, max_chunk_length, summary_max_length):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_chunk_length, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=summary_max_length, min_length=200, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [7]:
def summarize_article(tokenizer,model,url):
    data = scraping_article(url)
    chunks = to_chunks(data)
    # tokenizer, model=load_pegasus_model("google/pegasus-xsum")
    # tokenizer, model = load_bart_model(model_name)
    summaries = []
    for chunk in chunks:
        chunk_text = chunk
        summary = summarize_text(tokenizer, model, chunk_text,3000,800)
        summaries.append(summary)
    concatenated_summaries = " ".join(summaries)
    #  Second summarization pass: Summarize the concatenated summaries
    intermediate_chunks = [concatenated_summaries[i:i+3000] for i in range(0, len(concatenated_summaries), 3000)]
    final_summaries = []
    for intermediate_chunk in intermediate_chunks:
        final_summary = summarize_text(tokenizer, model, intermediate_chunk,3000,800)
        final_summaries.append(final_summary)
    final_summary_text = " ".join(final_summaries)
    return final_summary_text

In [9]:
for i in range(8):
    url=find_news_url('tesla',"04/17/2024","04/17/2024")[i]
    summary = summarize_article(tokenizer,model,url)
    print(summary)
    print()


Tesla share price extended fall for 2024, pushing its market valuation briefly below $500 billion. Tesla stock price ended 2.7% lower at $157.11 on Tuesday in New York, after hitting a low of $153.75 during the session. Tesla shares have fallen 37% this year so far, becoming the second-biggest decliner on the S&P 500 Index in 2024. The fading interest from consumers, which is plaguing EV makers globally, is a more dire scenario for Tesla shares than for other carmakers. However, Musk himself has said the company will be “worth basically zero" unless it can solve the problem of self-driving cars. The electric vehicle (EV) manufacturer reported first-quarter sales significantly below analysts’ expectations, raising concerns about Tesla’s growth trajectory, which were exacerbated by news that the company intends to scrap plans to make a cheaper EV and focus on building a so-called robotaxi instead, the Bloomberg report added.

Use the weekly Newsquiz to test your knowledge of stories you 

KeyboardInterrupt: 

In [25]:
import yfinance as yf
import pandas as pd
from datetime import datetime

def get_stock_details(ticker, start_date, end_date):
    # Create a Ticker object
    stock = yf.Ticker(ticker)
    
    # Convert dates to the format required by yfinance (YYYY-MM-DD)
    start_date_yf = datetime.strptime(start_date, "%d-%m-%Y").strftime("%Y-%m-%d")
    end_date_yf = datetime.strptime(end_date, "%d-%m-%Y").strftime("%Y-%m-%d")
    
    # Fetch the historical data
    hist = stock.history(start=start_date_yf, end=end_date_yf)
    
    # Convert to DataFrame and reset index to make Date a column
    df = hist.reset_index()
    
    # Convert Date to DD-MM-YYYY format
    df['Date'] = df['Date'].dt.strftime('%d-%m-%Y')
    
    # Round all float columns to 2 decimal places
    float_columns = df.select_dtypes(include=['float64']).columns
    df[float_columns] = df[float_columns].round(2)
    
    # Print the DataFrame
    print(f"Stock details for {ticker} from {start_date} to {end_date}:")
    print(df)
    
    # Print some current stock info
    print("\nCurrent stock info:")
    current_price = stock.info.get('currentPrice', 'N/A')
    week_high = stock.info.get('fiftyTwoWeekHigh', 'N/A')
    week_low = stock.info.get('fiftyTwoWeekLow', 'N/A')
    
    if isinstance(current_price, float):
        current_price = round(current_price, 2)
    if isinstance(week_high, float):
        week_high = round(week_high, 2)
    if isinstance(week_low, float):
        week_low = round(week_low, 2)
    
    print(f"Current Price: {current_price}")
    print(f"52 Week High: {week_high}")
    print(f"52 Week Low: {week_low}")
    
    return df

# Set the stock ticker symbol
ticker_symbol = "tsla"  # Example: Apple Inc.

# Set the date range
start_date = "01-08-2023"
end_date = "03-01-2024"

# Call the function and get the DataFrame
df = get_stock_details(ticker_symbol, start_date, end_date)

Stock details for tsla from 01-08-2023 to 03-01-2024:
           Date    Open    High     Low   Close     Volume  Dividends  \
0    01-08-2023  266.26  266.47  260.25  261.07   83166000        0.0   
1    02-08-2023  255.57  259.52  250.49  254.11  101752900        0.0   
2    03-08-2023  252.04  260.49  252.00  259.32   97569100        0.0   
3    04-08-2023  260.97  264.77  253.11  253.86   99242600        0.0   
4    07-08-2023  251.45  253.65  242.76  251.45  111097900        0.0   
..          ...     ...     ...     ...     ...        ...        ...   
102  26-12-2023  254.49  257.97  252.91  256.61   86892400        0.0   
103  27-12-2023  258.35  263.34  257.52  261.44  106494400        0.0   
104  28-12-2023  263.66  265.13  252.71  253.18  113619900        0.0   
105  29-12-2023  255.10  255.19  247.43  248.48  100615300        0.0   
106  02-01-2024  250.08  251.25  244.41  248.42  104654200        0.0   

     Stock Splits  
0             0.0  
1             0.0  
2        

In [23]:
df.to_csv("new1.csv")