In [1]:
import sqlite3
import pandas as pd
import numpy as np
import datetime as dt
from StockSentimentAnalysis import news_sentiment
from StockSentimentAnalysis.FinancialModelingPrep.indices import Index
from typing import Union, Optional

sql_conn = sqlite3.connect('./data/spx_news_sentiment_price.db')
cursor = sql_conn.cursor()



In [2]:


START_DATE = dt.date(2020, 1, 1)

In [4]:
def main(index: str='SPX', 
    start_date: Union[dt.date, str]=START_DATE,
    print_every: int=10,
    verbose: bool=True,
    output: str='sql'):
    """
    :param index: str
    :param start_date: Union[dt.date, str]
    :param print_every: int
    :param verbose: bool
    :param output: str, takes 'sql', 'csv'
    """
    ticker_list = Index.get_members(index)
    tickers = ticker_list.index.to_list()
    total = len(tickers)
    if isinstance(start_date, str):
        start_date = dt.datetime.strptime(start_date, '%Y-%m-%d').date()
    elif isinstance(start_date, dt.date):
        pass
    else:
        raise TypeError("start_date must be a datetime.date or a string")
    for i, ticker in enumerate(tickers):
        if verbose: print(ticker)
        if i % print_every == 0:
            print(f"{i / total * 100:.2f}% done")
        price_news = news_sentiment.get_daily_sentiment_series(
            ticker, start_date=start_date)
        if output == 'sql':
            price_news.T.to_sql(ticker, sql_conn, if_exists='replace', index=True)
        elif output == 'csv':
            price_news.T.to_csv(f'./data/news_price/{ticker}.csv')

In [None]:
main()