In [7]:
# Excel file path and sheet name
file_path = r"G:\My Drive\stocks\finviz_scrape.xlsm"
sheet_name = "2025-03-01"

# File name to be saved pickle df
pickle_file_name = "df_finviz.pkl"

In [8]:
import pandas as pd


try:
    df = pd.read_excel(file_path, sheet_name=sheet_name)

    # Set the 'Ticker' column as the index
    if 'Ticker' in df.columns:  # Check if 'Ticker' column exists
        df = df.set_index('Ticker')
        print(df)  # Print the DataFrame to verify it's loaded correctly with the new index
    else:
        print("Error: 'Ticker' column not found in the Excel file.")

except FileNotFoundError:
    print(f"Error: File not found at path: {file_path}")
except Exception as e:
    print(f"An error occurred: {e}")

                                        Industry Market Cap    P/E  Fwd P/E  \
Ticker                                                                        
AAPL                        Consumer Electronics   3632.94B  38.45    29.53   
ABBV                Drug Manufacturers - General    369.01B  87.40    15.03   
ABNB                             Travel Services     87.56B  33.59    27.56   
ABT                              Medical Devices    239.35B  18.01    24.19   
ACN              Information Technology Services    217.98B  29.21    24.90   
...                                          ...        ...    ...      ...   
WFC                          Banks - Diversified    257.53B  14.57    11.31   
WM                              Waste Management     93.60B  34.19    26.77   
WMB                          Oil & Gas Midstream     70.94B  24.75    24.70   
WMT                              Discount Stores    792.17B  40.97    33.37   
ZTS     Drug Manufacturers - Specialty & Generic    

In [9]:
import numpy as np

# Remove "B" and convert to numeric
df['Market Cap'] = (
    df['Market Cap']
    .astype(str)  # Ensure we're working with strings
    .str.replace('B', '', regex=False)  # Remove "B" explicitly
    .str.replace(',', '', regex=False)  # Optional: Remove commas if present
    .apply(pd.to_numeric, errors='coerce')  # Convert to float, invalid → NaN
)

# Columns to process (all except 'Industry')
cols_to_process = df.columns.difference(['Industry'])

# Step 1: Create a mask for cells that are exactly "-" (standalone hyphen)
mask = df[cols_to_process].apply(lambda x: x.astype(str) == '-')

# Step 2: Replace standalone hyphens with NaN
df[cols_to_process] = df[cols_to_process].mask(mask, np.nan)

# Step 3: Convert to float (coerce invalid values like "150-200" to NaN)
df[cols_to_process] = df[cols_to_process].apply(pd.to_numeric, errors='coerce')

# Convert Industry column from object to string
df = df.convert_dtypes()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, AAPL to ZTS
Data columns (total 28 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Industry       200 non-null    string 
 1   Market Cap     200 non-null    Float64
 2   P/E            200 non-null    Float64
 3   Fwd P/E        200 non-null    Float64
 4   PEG            199 non-null    Float64
 5   P/FCF          184 non-null    Float64
 6   Dividend       168 non-null    Float64
 7   Payout Ratio   199 non-null    Float64
 8   EPS this Y     200 non-null    Float64
 9   EPS next Y     200 non-null    Float64
 10  EPS next 5Y    199 non-null    Float64
 11  Sales past 5Y  200 non-null    Float64
 12  Sales Q/Q      199 non-null    Float64
 13  EPS Q/Q        197 non-null    Float64
 14  Inst Own       200 non-null    Float64
 15  ROA            200 non-null    Float64
 16  ROE            188 non-null    Float64
 17  Curr R         182 non-null    Float64
 18  Quick R     

In [10]:
import os

# Get the current working directory
current_path = os.getcwd()

# Create temp directory if it doesn't exist
temp_dir_path = os.path.join(current_path, 'temp')
if not os.path.exists(temp_dir_path):
  os.makedirs(temp_dir_path)
  print(f"Created temp directory at: {temp_dir_path}")
else:
  print(f"Temp directory already exists at: {temp_dir_path}")

Temp directory already exists at: c:\Users\ping\Files_win10\python\py310\stocks\temp


In [11]:
import pickle

# Create the full path for the pickle file
pickle_path = os.path.join(temp_dir_path, pickle_file_name)

# Save the DataFrame to pickle file
with open(pickle_path, 'wb') as f:
  pickle.dump(df, f)

print(f"DataFrame saved to: {pickle_path}")

DataFrame saved to: c:\Users\ping\Files_win10\python\py310\stocks\temp\df_finviz.pkl
