In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

URL = "https://finance.yahoo.com/most-active"

headers = {
    "User-Agent": "Mozilla/5.0"
}

response = requests.get(URL, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# Find the table
table = soup.find("table",class_='yf-1570k0a bd')
# print(table.prettify())

# Extract the rows from the table
rows = table.find_all("tr")[1:]
# print(f"Number of rows found: {len(rows)}")

data = []
for row in rows:
    cols = row.find_all("td")
    if len(cols) < 7:  # Ensure there are enough columns
        continue
    price_cell = cols[3].text.strip()
    try:
        # Check if the price is a valid number
        price_parts = price_cell.split(" ")
        price = price_parts[0]
        change = price_parts[1]
        percent_change = price_parts[2]
    except IndexError:
        print(f"Skipping row due to IndexError: {row}")
        continue
    stock_data = {
        "Symbol": cols[0].text.strip(),
        "Name": cols[1].text.strip(),
        "Price": price,
        "Change": change,
        "Percent Change": percent_change,
        "Volume": cols[6].text.strip(),
        "Market Cap": cols[7].text.strip()
    }
    data.append(stock_data)

print(f"Number of valid rows extracted: {len(data)}")

# Create a DataFrame
df = pd.DataFrame(data)
print(df.head())

# Save to CSV
df.to_csv("most_active_stocks.csv", index=False)

Number of valid rows extracted: 25
  Symbol                         Name   Price   Change Percent Change  \
0   APLD  Applied Digital Corporation   10.14    +3.31      (+48.46%)   
1   LCID            Lucid Group, Inc.  2.2000  -0.0300       (-1.35%)   
2   NVDA           NVIDIA Corporation  137.38    +2.25       (+1.67%)   
3      F           Ford Motor Company    9.98    -0.40       (-3.85%)   
4    CLF        Cleveland-Cliffs Inc.    7.18    +1.35      (+23.16%)   

     Volume Market Cap  
0  237.182M    31.485M  
1  200.371M   114.431M  
2  194.625M   279.548M  
3  167.807M   128.663M  
4   115.34M    24.199M  
