In [None]:
from api import APIConnection
from get_data import GetData
import logging
import datetime

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
# Config
CLIENT_ID = 
CLIENT_SECRET = 
USER_AGENT = 

api_inst = APIConnection(client_id=CLIENT_ID,
                         client_secret=CLIENT_SECRET,
                         user_agent=USER_AGENT)

reddit_client = api_inst.initialise_client()

In [None]:
ticker_patterns = [r'\bBank\s+of\s+America\b', r'\bBAC\b',
                    r'\bBank\s+of\s+England\b', r'\bBoE\b',  # Central bank, not publicly traded
                    r'\bBarclays\b', r'\bBCS\b',
                    r'\bCiti\b', r'\bCitigroup\b', r'\bC\b',
                    r'\bCoutts\b',  # Part of NatWest Group, not independently traded
                    r'\bCredit\s+Suisse\b', r'\bCS\b',
                    r'\bGoldman\s+Sachs\b', r'\bGS\b',
                    r'\bHalifax\b',  # Part of Lloyds Banking Group, not independently traded
                    r'\bHSBC\b', r'\bHSBC\b',
                    r'\bLloyds\b', r'\bLYG\b',
                    r'\bMetro\b', r'\bMTRO.L\b', r'\bMTRO\b', 
                    r'\bMorgan\s+Stanley\b', r'\bMS\b',
                    r'\bNatWest\b', r'\bNWG\b',
                    r'\bPNC\b', r'\bpnc\b', r'\bPNC\b',
                    r'\bSantander\b',  r'\bSAN\b', # traded in Spain as 'SAN'
                    r'\bSilicon\s+Valley\s+Bank\b', r'\bSVB\b', r'\bSIVBQ\b' 
                    r'\bStandard\s+Chartered\b', r'\bSCBFF\b',
                    r'\bTruist\b', r'\bTFC\b',
                    r'\bVirgin\s+Money\b', r'\bVMUK\b',
                    r'\bWells\s+Fargo\b', r'\bWFC\b',
                    r'\bRoyal\s+Bank\s+of\s+Scotland\b', r'\bRBS\b', r'\bNWG\b',  # Now rebranded as NatWest Group
                    r'\bThe\s+Co-operative\s+Bank\b',  # Not publicly traded 
                    r'\bTSB\s+Bank\b',  # TSB Banking Group plc was acquired by Banco Sabadell; not independently traded
                    r'\bYorkshire\s+Bank\b', r'\bCYBG\b',  # Part of Clydesdale and Yorkshire Banking Group, traded as 'CYBG' before its acquisition by Virgin Money
                    r'\bAllied\s+Irish\s+Bank\s+(UK)\b', r'\bAIBG.L\b'
                    ]

crisis_patterns = [r'\bCredit\s+Suisse\b', r'\bCS\b',
                   r'\bSilicon\s+Valley\s+Bank\b', r'\bSVB\b', r'\bSIVBQ\b' ]


data_inst = GetData(reddit_client=reddit_client,
                    subreddit='wallstreetbets',
                    search_patterns=ticker_patterns)

last_run_time = datetime.datetime(2023, 2, 28, 10, 0)

df = data_inst.get_comments(comment_target=100, last_run_time=last_run_time)


In [None]:
df_clean = data_inst.clean_comments(df)
#df.head()

In [None]:
import database as db
import sqlite3

# Database file
database = r"reddit-sqlite.db"

# Create a SQLite connection
conn = sqlite3.connect(database)

# Create a table (only if it doesn't exist)
# Customize the SQL CREATE TABLE statement according to your data schema
create_table_sql = """
CREATE TABLE IF NOT EXISTS comments (
    comment_id TEXT PRIMARY KEY,
    post_title TEXT,
    subreddit TEXT,
    comment_date TEXT,
    comment_author TEXT,
    comment TEXT,
    matched_phrase TEXT,
    upvotes INTEGER
);
"""
cursor = conn.cursor()
cursor.execute(create_table_sql)
conn.commit()

# Insert DataFrame into the database without index
# Replace 'comments' with your actual table name
df.to_sql('comments', conn, if_exists='append', index=False)

# Close the connection
conn.close()

print("Data inserted successfully.")