*   **Action:** Read Excel Sheet to Dataframe
*   **Source:** `G:\My Drive\stocks\finviz_scrape.xlsm, Sheet:yyyy-mm-dd`
*   **Destination:** `c:\Users\ping\Files_win10\python\py310\stocks\temp\df_finviz.pkl`

In [None]:
# Excel file path and sheet name
file_path = r"G:\My Drive\stocks\finviz_scrape.xlsm"  # r for raw string
sheet_name = "S20250306"

# File name to be saved pickle df
pickle_file_name = "df_finviz_stocks.pkl"

print(f'file_path: "{file_path}"')
print(f'sheet_name: {sheet_name}')

In [None]:
import pandas as pd

try:
    df = pd.read_excel(file_path, sheet_name=sheet_name)

    # Set the 'Ticker' column as the index
    if 'Ticker' in df.columns:  # Check if 'Ticker' column exists
        df = df.set_index('Ticker')
        print(df)  # Print the DataFrame to verify it's loaded correctly with the new index
    else:
        print("Error: 'Ticker' column not found in the Excel file.")

except FileNotFoundError:
    print(f"Error: File not found at path: {file_path}")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
import numpy as np

# Remove "B" and convert to numeric
df['Market Cap'] = (
    df['Market Cap']
    .astype(str)  # Ensure we're working with strings
    .str.replace('B', '', regex=False)  # Remove "B" explicitly
    .str.replace(',', '', regex=False)  # Optional: Remove commas if present
    .apply(pd.to_numeric, errors='coerce')  # Convert to float, invalid → NaN
)

# Columns to process (all except 'Industry')
cols_to_process = df.columns.difference(['Industry'])

# Step 1: Create a mask for cells that are exactly "-" (standalone hyphen)
mask = df[cols_to_process].apply(lambda x: x.astype(str) == '-')

# Step 2: Replace standalone hyphens with NaN
df[cols_to_process] = df[cols_to_process].mask(mask, np.nan)

# Step 3: Convert to float (coerce invalid values like "150-200" to NaN)
df[cols_to_process] = df[cols_to_process].apply(pd.to_numeric, errors='coerce')

# # Convert Industry column from object to string
df['Industry'] = df['Industry'].convert_dtypes()

df.info()

In [None]:
df

In [None]:
import os

# Get the current working directory
current_path = os.getcwd()

# Create temp directory if it doesn't exist
temp_dir_path = os.path.join(current_path, 'temp')
if not os.path.exists(temp_dir_path):
  os.makedirs(temp_dir_path)
  print(f"Created temp directory at: {temp_dir_path}")
else:
  print(f"Temp directory already exists at: {temp_dir_path}")

In [None]:
import pickle

# Create the full path for the pickle file
pickle_path = os.path.join(temp_dir_path, pickle_file_name)

# Save the DataFrame to pickle file
with open(pickle_path, 'wb') as f:
  pickle.dump(df, f)

print(f"DataFrame saved to: {pickle_path}")