In [1]:
import os
import requests
import logging
import sqlite3
import pandas as pd
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
logging.getLogger("requests").setLevel(logging.WARNING)

LOGIN_URL = 'https://www.tm3.com/homepage/login.jsf'
CUSIP_EVAL_URL_TEMPLATE = 'https://www.tm3.com/mvsearch/cusipEvalHistoryContent.jsf?cusipId={}'
USERNAME = 'T28395M63'
PASSWORD = 'HOWARD'

DB_FILE = 'mmd.db'

def create_database_table(cursor):
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS evaluation_history (
            cusip TEXT,
            date TEXT,
            price TEXT,
            category TEXT
        )
    ''')

def login(session):
    payload = {}
    result = session.get(LOGIN_URL, verify=False)
    soup = BeautifulSoup(result.content, 'html.parser')
    
    hidden_inputs = soup.find_all('input', type='hidden')
    for hidden_input in hidden_inputs:
        name = hidden_input.get('name')
        payload[name] = hidden_input.get('value')
    
    payload['username'] = USERNAME
    payload['password'] = PASSWORD
    payload['loginButton'] = 'Login'
    
    logger.debug('Payload is %s' % payload)
    logger.info('Attempting to login...')
    
    login_response = session.post(LOGIN_URL, data=payload, timeout=30.0)
    
    if "Invalid login" in login_response.text:
        raise Exception("Login failed. Check credentials.")
    else:
        logger.info("Login successful.")

def scrape_cusip_data(cusip, category, session, cursor, conn, cusips_to_scrape):
    base_url = CUSIP_EVAL_URL_TEMPLATE.format(cusip)

    logger.info(f'Logging in before scraping CUSIP {cusip} at {base_url}...')
    logger.debug(f'Base URL: {base_url}')

    login(session)

    logger.info(f'Scraping CUSIP {cusip} at {base_url}...') #Debug messages about which CUSIP is being scraped
    response = session.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    config2a_div = soup.find('div', class_='config2a') #For my specific HTML
    groupA_div = config2a_div.find('div', class_='groupA') if config2a_div else None
    group_a_table = groupA_div.find('table', class_='data') if groupA_div else None

    data = []
    if group_a_table:
        for row in group_a_table.select('tbody tr'):
            cells = row.find_all('td')
            date = cells[0].get_text(strip=True)
            price = cells[1].get_text(strip=True)

            cursor.execute('''
                INSERT INTO evaluation_history (cusip, date, price, category)
                VALUES (?, ?, ?, ?)
            ''', (cusip, date, price, category))

            conn.commit()

    df = pd.DataFrame(data)


def main():
    with requests.Session() as session:
        logging.info('Opening session...')
        session.mount('https://', HTTPAdapter(max_retries=3))
        session.verify = False  # Set to True if SSL verification is required

        # Establish a connection and create a cursor
        conn = sqlite3.connect(DB_FILE)
        cursor = conn.cursor()

        create_database_table(cursor)

        # Specify the single file you want to test (e.g., "Agency.csv")
        csv_file_path = 'J:/Python/ksm/ksm2023/CSV/KSU.csv'

        category = os.path.splitext(os.path.basename(csv_file_path))[0]
        cusips_to_scrape = pd.read_csv(csv_file_path, usecols=['CUSIP'], index_col='CUSIP').index.tolist()

        for cusip in cusips_to_scrape:
            logging.info(f'Scraping CUSIP {cusip} in category {category}...')
            scrape_cusip_data(cusip, category, session, cursor, conn, cusips_to_scrape)

        # Close the connection after processing
        conn.close()

    logging.info('Done...')

if __name__ == '__main__':
    main()

INFO:root:Opening session...
DEBUG:urllib3.util.retry:Converted retries value: 3 -> Retry(total=3, connect=None, read=None, redirect=None, status=None)
INFO:root:Scraping CUSIP 49151FZS2 in category Agency...


TypeError: scrape_cusip_data() missing 1 required positional argument: 'cusips_to_scrape'