## 

In [1]:
# Import dependencies

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service as FirefoxService
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import sqlite3
from flask import Flask, jsonify
import pandas as pd

# Set path Geckodriver executable
geckodriver_path = "/usr/local/bin/geckodriver"  

# Configure Firefox options
firefox_options = Options()
firefox_options.headless = True  

# Set Firefox web driver
firefox_service = FirefoxService(geckodriver_path)
driver = webdriver.Firefox(service=firefox_service, options=firefox_options)

# Url scrape
url = "https://steamdb.info/charts/"

# Load the page
driver.get(url)

# Add a delay of 3 seconds to allow the table to initialize
time.sleep(3)

wait = WebDriverWait(driver, 30)  # Increased the wait time to 30 seconds

# Wait for the table to become visible using XPath and wait for "Initializing table" message to disappear
try:
    wait.until_not(EC.visibility_of_element_located((By.XPATH, "//div[text()='Initializing table']")))
except TimeoutException:
    print("Table initialization timed out.")
    driver.quit()

data_list = []

# Scrape data from the first sheet (button "1" is already selected by default)
table = driver.find_element(By.CSS_SELECTOR, "table.table-products")
rows = table.find_elements(By.XPATH, "//table[@id='table-apps']/tbody/tr")
for row in rows:
    columns = row.find_elements(By.TAG_NAME, "td")
    number = columns[0].text.strip()
    name = columns[2].text.strip()
    current = columns[3].text.strip()
    peak_24h = columns[4].text.strip()
    peak_all_time = columns[5].text.strip()

    game_data = {
        "Number": number,
        "Name": name,
        "Current": current,
        "24h Peak": peak_24h,
        "All-Time Peak": peak_all_time,
    }

    data_list.append(game_data)

# Click the "2" link to switch to the second sheet
try:
    link_2 = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, '2')))
    link_2.click()
except TimeoutException:
    print("Link '2' not found or clickable.")
    driver.quit()

# Add a delay to allow the second sheet to load
time.sleep(3)  # Adjust the delay as needed

# Scrape data from the second sheet
table = driver.find_element(By.CSS_SELECTOR, "table.table-products")
rows = table.find_elements(By.XPATH, "//table[@id='table-apps']/tbody/tr")
for row in rows:
    columns = row.find_elements(By.TAG_NAME, "td")
    number = columns[0].text.strip()
    name = columns[2].text.strip()
    current = columns[3].text.strip()
    peak_24h = columns[4].text.strip()
    peak_all_time = columns[5].text.strip()

    game_data = {
        "Number": number,
        "Name": name,
        "Current": current,
        "24h Peak": peak_24h,
        "All-Time Peak": peak_all_time,
    }

    data_list.append(game_data)

# Close the browser
driver.quit()

# Now, data_list contains the scraped data from both sheets
for game in data_list:
    print(game)


{'Number': '1.', 'Name': 'Counter-Strike: Global Offensive', 'Current': '993,121', '24h Peak': '1,361,281', 'All-Time Peak': '1,818,773'}
{'Number': '2.', 'Name': 'Dota 2', 'Current': '556,964', '24h Peak': '739,724', 'All-Time Peak': '1,295,114'}
{'Number': '3.', 'Name': "Baldur's Gate 3", 'Current': '386,480', '24h Peak': '386,480', 'All-Time Peak': '875,343'}
{'Number': '4.', 'Name': 'Starfield', 'Current': '186,640', '24h Peak': '186,640', 'All-Time Peak': '330,723'}
{'Number': '5.', 'Name': 'Source SDK Base 2007', 'Current': '175,760', '24h Peak': '180,805', 'All-Time Peak': '213,168'}
{'Number': '6.', 'Name': 'Apex Legends', 'Current': '147,106', '24h Peak': '433,064', 'All-Time Peak': '624,473'}
{'Number': '7.', 'Name': 'Cyberpunk 2077', 'Current': '144,986', '24h Peak': '169,711', 'All-Time Peak': '1,054,388'}
{'Number': '8.', 'Name': 'PUBG: BATTLEGROUNDS', 'Current': '142,124', '24h Peak': '402,587', 'All-Time Peak': '3,257,248'}
{'Number': '9.', 'Name': 'Grand Theft Auto V', 

In [2]:
# SQLite Database

# Create a connection to the SQLite database 
conn = sqlite3.connect("steam_data.db")

# Create a cursor object
cursor = conn.cursor()

In [3]:
# Define the table schema
cursor.execute('''
    CREATE TABLE IF NOT EXISTS games (
        Number TEXT,
        Name TEXT,
        Current TEXT,
        Peak_24h TEXT,
        Peak_All_Time TEXT
    )
''')

<sqlite3.Cursor at 0x10a4fb2c0>

In [4]:
# Insert data into the table
for game in data_list:
    cursor.execute('''
        INSERT INTO games (Number, Name, Current, Peak_24h, Peak_All_Time)
        VALUES (?, ?, ?, ?, ?)
    ''', (game["Number"], game["Name"], game["Current"], game["24h Peak"], game["All-Time Peak"]))

In [5]:
# Commit changes
conn.commit()
cursor.close()
conn.close()


In [6]:
# Python Flask API

# Initialize Flask
app = Flask(__name__)




# Define an API endpoint to retrieve Steam Data
@app.route("/api/games", methods=["GET"])
def get_games():
    conn = sqlite3.connect("steam_data.db")
    cursor = conn.cursor()

    # Fetch all game data from the database
    cursor.execute("SELECT * FROM games")
    games = cursor.fetchall()
    
    # Return the data as JSON
    return jsonify({"games": games})
    
if __name__ == "__main__":
    app.run(debug=True, port=5002)  # Use a different port, e.g., 5002



 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


 * Running on http://127.0.0.1:5002
[33mPress CTRL+C to quit[0m
 * Restarting with stat
Traceback (most recent call last):
  File "/Users/terrycleek/anaconda3/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/terrycleek/anaconda3/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/terrycleek/anaconda3/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/Users/terrycleek/anaconda3/lib/python3.10/site-packages/traitlets/config/application.py", line 991, in launch_instance
    app.initialize(argv)
  File "/Users/terrycleek/anaconda3/lib/python3.10/site-packages/traitlets/config/application.py", line 113, in inner
    return method(app, *args, **kwargs)
  File "/Users/terrycleek/anaconda3/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 689, in initialize
    self.init_sockets()
  File "/Users/terrycleek/anacon

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [7]:
# Create a DataFrame from the SQLite database
conn = sqlite3.connect("steam_data.db")
df = pd.read_sql_query("SELECT * FROM games", conn)
conn.close()
df.head()

Unnamed: 0,Number,Name,Current,Peak_24h,Peak_All_Time
0,1.0,Counter-Strike: Global Offensive,963043,1410713,1818773
1,2.0,Dota 2,388645,716286,1295114
2,3.0,Apex Legends,264291,454151,624473
3,4.0,Baldur's Gate 3,246991,360993,875343
4,5.0,PUBG: BATTLEGROUNDS,208802,400528,3257248


In [8]:

# Connect to the SQLite database
conn = sqlite3.connect("steam_data.db")
cursor = conn.cursor()

# Execute the SQL query
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='games';")

# Fetch the result
result = cursor.fetchone()

# Check if the result is not None (table exists)
if result:
    print("The 'games' table exists in the database.")
else:
    print("The 'games' table does not exist in the database.")

# Close the cursor (optional, but recommended)
cursor.close()

# Close the database connection
conn.close()



The 'games' table exists in the database.
