#### Libraries import

In [None]:
# Libraries
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from fake_useragent import UserAgent
from bs4 import BeautifulSoup as bs
import warnings
warnings.filterwarnings('ignore')
from selenium import webdriver
from lxml import html
from lxml import etree
import os.path
import sqlite3
import time
import os

#### Finance Class
This class takes care of scraping the data from two given websites.
- Website1 - Gathers 'Most Active Tickers'
- Website2 - Inputs the output of Website1 and gathers a tabular output into a stocks.txt file

In [None]:
class Finance:
    # Automated calling of member functions to avoid space complexity
    def __init__(self):
        self.data = self.get_data_for_stock_txt_file()
        self.stock_txt_file_creation(self.data)
    
    # This function requires chromedriver.exe file in the same folder along with this .ipynb file
    def get_driver(self):
        options = Options()
        options.add_argument('--headless')
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument("--disable-logging")
        options.add_argument('log-level=3')
        options.add_argument('--disable-gpu')
        options.add_argument("--start-maximized")
        options.add_argument("--disable-notifications")
        options.add_argument("--disable-infobars")
        ua = UserAgent()
        userAgent = ua.random
        options.add_argument(f'user-agent={userAgent}')
        driver = webdriver.Chrome("chromedriver.exe",options=options)
        return driver

    # This function scrapes the Most Active Tickers fron money.com website
    def get_symbols(self):
        url  = 'https://money.cnn.com/data/hotstocks/'
        driver = self.get_driver()
        driver.get(url)
        print("{} got loaded".format(url))
        time.sleep(1)
        sym_elements = driver.find_elements_by_xpath("//h3[text()='Most Actives']/following-sibling::table[1]/descendant::td/child::a")
        company_symbols = []
        for i in sym_elements:
            try:
                sym = i.text
                company_symbols.append(sym)
            except:
                continue
        print("Most Actives: {}".format(company_symbols))
        return company_symbols

    # This function scrapes the data from google finance website by accepting input obtained from money.com
    def get_finance_data(self,sym):
        try:
            url = "https://www.google.com/finance/quote/AMD:NASDAQ"
            driver = self.get_driver()
            driver.get(url)
            clear_search = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="Clear search"]')))
            clear_search.click()
            input_search = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//input[@aria-label="Search for stocks, ETFs & more"]')))
            input_search.send_keys(sym)
            input_search.send_keys(Keys.ENTER)
            time.sleep(3)
            driver.refresh()
            time.sleep(2)
            print("Loaded:",driver.title)
            open_price = driver.find_element_by_xpath("//div[contains(text(),'Previous close')]/parent::span/following-sibling::div")
            vol = driver.find_element_by_xpath("//div[contains(text(),'Volume')]/parent::span/following-sibling::div")
            per_ratio = driver.find_element_by_xpath("//div[contains(text(),'P/E ratio')]/parent::span/following-sibling::div")

            open_price = str(open_price.get_attribute('outerHTML')).split('>')[1].split('<')[0]
            vol = str(vol.get_attribute('outerHTML')).split('>')[1].split('<')[0]
            per_ratio = str(per_ratio.get_attribute('outerHTML')).split('>')[1].split('<')[0]
            print([open_price, vol, per_ratio])

            driver.close()
            return [open_price, vol, per_ratio]
        except Exception as e:
            print("ERROR in", sym)
            print(e)
            print("**********")
            return ['-', '-', '-']

    # This function cleanes the numerical data scraped from google finance by removing additional special characters
    def clean_data(self,stats):
        new_stats = []
        open_price = [float(stats[0][1:]) if stats[0] != '-' else 'null'][0]
        volume = [int(float(stats[1][:-1])) if stats[1] != '-' else 'null'][0]
        peratio = [float(stats[2]) if stats[2] != '-' else 'null'][0]
        new_stats.extend([open_price, volume, peratio])
        return new_stats

    # This function appends each row of cleaned output into list which inturn is saved in a text file in next function 
    def get_data_for_stock_txt_file(self):
        company_symbols = self.get_symbols()
        data_for_text_file = []
        for i in company_symbols:
            data = [i]
            stats = self.get_finance_data(i)
            new_stats = self.clean_data(stats)
            data.extend(new_stats)
            data_for_text_file.append(data)
            # Your Text file data, each list as a row
        return data_for_text_file

    # This function creates the stocks.txt file and appends the above list data into the file line by line 
    def stock_txt_file_creation(self, data_for_text_file):
            if os.path.exists('stocks.txt'):
                print("stocks.txt already exists...deleting the file")
                os.remove('stocks.txt')
                print("Deleted existing .txt file")
                with open('stocks.txt', "w") as f:
                    print("stocks.txt file is created...")
                    for line in data_for_text_file:
                        f.write(str(line)[1:-1])
                        f.write('\n')
            else:
                with open('stocks.txt', "w") as f:
                    print("stocks.txt file is created...")
                    for line in data_for_text_file:
                        f.write(str(line)[1:-1])
                        f.write('\n')

#### Class DB
This class takes care of creating the DB and the table irrespective of their presence
- It deletes any exisiting db if db already exists and created a new db

In [None]:
class DB:
    # Automated the member function calling
    def __init__(self):
        self.check_db()
        self.display_data()

    # create_db function checks if db exists and deletes it if exists already
    def create_db(self):
        conn = sqlite3.connect('StocksDatabase.db')
        cursor = conn.cursor()
        cursor.execute("""CREATE TABLE StocksTable (
                            TickerSymb TEXT,
                            OpenPrice REAL,
                            Volume INTEGER,
                            PERatio REAL)""")
        if os.path.exists('StocksDatabase.db'):
            print("StocksDatabase.db is created")
        f = open('Stocks.txt', 'r')
        for line in f:
            ticket_symbol = line.split(',')[0].strip("'")
            open_price = line.split(',')[1]
            volume = line.split(',')[2]
            peratio = line.split(',')[3]
            try: # Inserting the data into the StocksTable
                cursor.execute('INSERT INTO StocksTable (TickerSymb, OpenPrice, Volume, PERatio) VALUES (?, ?, ?, ?)', (ticket_symbol, open_price, volume, peratio))
            except Exception as e:
                print("Excpetion occured: {}".format(e))
        print("StocksTable is created....")
        print("Data insertion completed...")
        conn.commit()
        f.close()
        conn.close()

    # This function will check if DB exists and destroys if exists and created a new DB, else simply creates a new DB 
    def check_db(self):
        if os.path.exists('StocksDatabase.db'):
            print("StocksDatabase.db already exists....deleting the db")
            os.remove('StocksDatabase.db')
            print('db is deleted..')
            self.create_db()
        else:
            print("StocksDatabase.db doesn't exist")
            print("Creating the db....")
            self.create_db()

    # This function is useful for debugging/display data present in the table in db
    def display_data(self):
        if os.path.exists('StocksDatabase.db'):
            conn = sqlite3.connect('StocksDatabase.db')
            cursor = conn.cursor()
            cursor.execute('SELECT * FROM StocksTable')
            print(cursor.fetchall())
            conn.commit()
            conn.close()

### Main function
Appropriate console outputs are displayed to ensure smooth running of the code. 

In [None]:
if __name__ == '__main__':
    finance = Finance()
    db = DB()