In [None]:
import os
import pandas as pd
import schedule
import time
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup

def scrape_and_save():
    finviz_url = 'https://finviz.com/quote.ashx?t=TSLA'
    output_file = 'tesla_articles.csv'
    
    # Check if the CSV file exists, if not, create it
    if not os.path.exists(output_file):
        df_empty = pd.DataFrame(columns=['date', 'time', 'title', 'url'])
        df_empty.to_csv(output_file, index=False)
    
    # Read existing data from the CSV file
    df_existing = pd.read_csv(output_file)
    existing_urls = set(df_existing['url'])
    
    news_tables = {}
    
    url = finviz_url 
    
    req = Request(url=url, headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'})
    response = urlopen(req)
    
    html = BeautifulSoup(response, features='html.parser')
    news_table = html.find(id='news-table')
    
    parsed_data = []
    
    for row in news_table.findAll('tr'):
        title = row.a.text
        date_time_cell = row.find('td', align='right')  # Find the cell containing date and time
        
        if date_time_cell:
            date_time_text = date_time_cell.get_text(strip=True)
            date_time_split = date_time_text.split(' ')
            
            if len(date_time_split) == 2:
                date = date_time_split[0]
                time = date_time_split[1]
            else:
                date = None
                time = date_time_text
        else:
            date = None
            time = None
        
        article_url = row.a['href']
        
        # Check if the article URL is new, if yes, add it to the parsed_data
        if article_url not in existing_urls:
            parsed_data.append([date, time, title, article_url])
            existing_urls.add(article_url)
    
    if parsed_data:
        df_new = pd.DataFrame(parsed_data, columns=['date', 'time', 'title', 'url'])
        df_combined = pd.concat([df_existing, df_new], ignore_index=True)
        df_combined.to_csv(output_file, index=False)
        print(f"New data has been scraped and saved to {output_file}.")
    else:
        print("No new articles found.")

# Schedule the scraping to run daily at a specific time
schedule.every().day.at("08:00").do(scrape_and_save)

while True:
    schedule.run_pending()
    time.sleep(1)
