In [1]:
# Excel file path and sheet name
file_path = r"G:\My Drive\stocks\finviz_scrape.xlsm"
sheet_name = "2025-03-01"

# Pickle File name for merged df to be saved
pickle_file_name = "merged_df.pkl"

In [2]:
import pandas as pd


try:
    df = pd.read_excel(file_path, sheet_name=sheet_name)

    # Set the 'Ticker' column as the index
    if 'Ticker' in df.columns:  # Check if 'Ticker' column exists
        df = df.set_index('Ticker')
        print(df)  # Print the DataFrame to verify it's loaded correctly with the new index
    else:
        print("Error: 'Ticker' column not found in the Excel file.")

except FileNotFoundError:
    print(f"Error: File not found at path: {file_path}")
except Exception as e:
    print(f"An error occurred: {e}")

                                        Industry Market Cap    P/E  Fwd P/E  \
Ticker                                                                        
AAPL                        Consumer Electronics   3632.94B  38.45    29.53   
ABBV                Drug Manufacturers - General    369.01B  87.40    15.03   
ABNB                             Travel Services     87.56B  33.59    27.56   
ABT                              Medical Devices    239.35B  18.01    24.19   
ACN              Information Technology Services    217.98B  29.21    24.90   
...                                          ...        ...    ...      ...   
WFC                          Banks - Diversified    257.53B  14.57    11.31   
WM                              Waste Management     93.60B  34.19    26.77   
WMB                          Oil & Gas Midstream     70.94B  24.75    24.70   
WMT                              Discount Stores    792.17B  40.97    33.37   
ZTS     Drug Manufacturers - Specialty & Generic    

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, AAPL to ZTS
Data columns (total 28 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Industry       200 non-null    object 
 1   Market Cap     200 non-null    object 
 2   P/E            200 non-null    float64
 3   Fwd P/E        200 non-null    float64
 4   PEG            200 non-null    object 
 5   P/FCF          200 non-null    object 
 6   Dividend       200 non-null    object 
 7   Payout Ratio   200 non-null    object 
 8   EPS this Y     200 non-null    float64
 9   EPS next Y     200 non-null    float64
 10  EPS next 5Y    200 non-null    object 
 11  Sales past 5Y  200 non-null    float64
 12  Sales Q/Q      200 non-null    object 
 13  EPS Q/Q        200 non-null    object 
 14  Inst Own       200 non-null    float64
 15  ROA            200 non-null    float64
 16  ROE            200 non-null    object 
 17  Curr R         200 non-null    object 
 18  Quick R     

In [4]:
import pandas as pd

# Step 1: Remove "B" and convert to numeric
df['Market Cap'] = (
    df['Market Cap']
    .astype(str)  # Ensure we're working with strings
    .str.replace('B', '', regex=False)  # Remove "B" explicitly
    .str.replace(',', '', regex=False)  # Optional: Remove commas if present
    .apply(pd.to_numeric, errors='coerce')  # Convert to float, invalid → NaN
)

print(df)
print("\nData types after conversion:")
print(df.dtypes)

                                        Industry  Market Cap    P/E  Fwd P/E  \
Ticker                                                                         
AAPL                        Consumer Electronics     3632.94  38.45    29.53   
ABBV                Drug Manufacturers - General      369.01  87.40    15.03   
ABNB                             Travel Services       87.56  33.59    27.56   
ABT                              Medical Devices      239.35  18.01    24.19   
ACN              Information Technology Services      217.98  29.21    24.90   
...                                          ...         ...    ...      ...   
WFC                          Banks - Diversified      257.53  14.57    11.31   
WM                              Waste Management       93.60  34.19    26.77   
WMB                          Oil & Gas Midstream       70.94  24.75    24.70   
WMT                              Discount Stores      792.17  40.97    33.37   
ZTS     Drug Manufacturers - Specialty &

In [5]:
import pandas as pd
import numpy as np

# Columns to process (all except 'Industry')
cols_to_process = df.columns.difference(['Industry'])

# Step 1: Create a mask for cells that are exactly "-" (standalone hyphen)
mask = df[cols_to_process].apply(lambda x: x.astype(str) == '-')

# Step 2: Replace standalone hyphens with NaN
df[cols_to_process] = df[cols_to_process].mask(mask, np.nan)

# Step 3: Convert to float (coerce invalid values like "150-200" to NaN)
df[cols_to_process] = df[cols_to_process].apply(pd.to_numeric, errors='coerce')

print("Processed DataFrame:")
# print(df)
print("\nData types:")
print(df.dtypes)

Processed DataFrame:

Data types:
Industry          object
Market Cap       float64
P/E              float64
Fwd P/E          float64
PEG              float64
P/FCF            float64
Dividend         float64
Payout Ratio     float64
EPS this Y       float64
EPS next Y       float64
EPS next 5Y      float64
Sales past 5Y    float64
Sales Q/Q        float64
EPS Q/Q          float64
Inst Own         float64
ROA              float64
ROE              float64
Curr R           float64
Quick R          float64
Gross M          float64
Oper M           float64
Profit M         float64
Beta             float64
ATR              float64
Volatility W     float64
Volatility M     float64
Recom            float64
Target Price     float64
dtype: object


In [6]:
# Convert Industry column to string
df = df.convert_dtypes()
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, AAPL to ZTS
Data columns (total 28 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Industry       200 non-null    string 
 1   Market Cap     200 non-null    Float64
 2   P/E            200 non-null    Float64
 3   Fwd P/E        200 non-null    Float64
 4   PEG            199 non-null    Float64
 5   P/FCF          184 non-null    Float64
 6   Dividend       168 non-null    Float64
 7   Payout Ratio   199 non-null    Float64
 8   EPS this Y     200 non-null    Float64
 9   EPS next Y     200 non-null    Float64
 10  EPS next 5Y    199 non-null    Float64
 11  Sales past 5Y  200 non-null    Float64
 12  Sales Q/Q      199 non-null    Float64
 13  EPS Q/Q        197 non-null    Float64
 14  Inst Own       200 non-null    Float64
 15  ROA            200 non-null    Float64
 16  ROE            188 non-null    Float64
 17  Curr R         182 non-null    Float64
 18  Quick R     

In [7]:
# Save df.Ticker to a file
with open('tickers.txt', 'w') as f:
    for ticker in df.index:
        f.write(f"{ticker}\n")

# Retrieve tickers from the file into a list
tickers = []
with open('tickers.txt', 'r') as f:
    for line in f:
        tickers.append(line.strip())

print(tickers)


['AAPL', 'ABBV', 'ABNB', 'ABT', 'ACN', 'ADBE', 'ADI', 'ADP', 'ADSK', 'AJG', 'AMAT', 'AMD', 'AMGN', 'AMT', 'AMZN', 'ANET', 'AON', 'APD', 'APH', 'APO', 'APP', 'ARM', 'ASML', 'AVGO', 'AXP', 'AZN', 'AZO', 'BABA', 'BAC', 'BAM', 'BCS', 'BDX', 'BK', 'BKNG', 'BLK', 'BMO', 'BNS', 'BP', 'BSX', 'BTI', 'BUD', 'BX', 'C', 'CDNS', 'CEG', 'CHTR', 'CI', 'CL', 'CM', 'CME', 'CMG', 'CNI', 'COF', 'COP', 'COST', 'CP', 'CRH', 'CRM', 'CRWD', 'CSX', 'CTAS', 'CVS', 'CVX', 'DASH', 'DELL', 'DHR', 'DIS', 'DUK', 'ECL', 'ELV', 'EMR', 'ENB', 'EPD', 'EQIX', 'EQNR', 'ET', 'ETN', 'FDX', 'FI', 'FTNT', 'GD', 'GE', 'GEV', 'GILD', 'GOOG', 'GOOGL', 'GS', 'GSK', 'HCA', 'HDB', 'HLT', 'HON', 'IBKR', 'IBM', 'IBN', 'ICE', 'INTU', 'ISRG', 'JD', 'JNJ', 'JPM', 'KKR', 'KLAC', 'KMI', 'KO', 'LIN', 'LLY', 'LMT', 'LOW', 'LRCX', 'MA', 'MAR', 'MCD', 'MCK', 'MCO', 'MDT', 'MELI', 'MET', 'META', 'MFG', 'MMC', 'MMM', 'MO', 'MRK', 'MS', 'MSFT', 'MSI', 'MU', 'MUFG', 'NEE', 'NFLX', 'NOC', 'NOW', 'NTES', 'NVDA', 'NVO', 'NVS', 'OKE', 'ORCL', 'ORLY'

append df with performance ratio

In [8]:
import pandas as pd

# Load the performance ratios from the pickle file
_df = pd.read_pickle("performance_ratios.pkl")

# Display the first few rows of _df to understand its structure
print(_df.head())

# Merge the DataFrames based on the index
df_merged = pd.merge(df, _df, left_index=True, right_index=True, how='left')

df_merged.info()


      Sharpe Ratio  Sortino Ratio  Omega Ratio
AAPL      1.220949       1.826844     1.236133
ABBV      0.744732       0.995488     1.153213
ABNB     -0.328764      -0.448952     0.939889
ABT       0.791461       1.222935     1.145934
ACN      -0.337535      -0.460173     0.938945
<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, AAPL to ZTS
Data columns (total 31 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Industry       200 non-null    string 
 1   Market Cap     200 non-null    Float64
 2   P/E            200 non-null    Float64
 3   Fwd P/E        200 non-null    Float64
 4   PEG            199 non-null    Float64
 5   P/FCF          184 non-null    Float64
 6   Dividend       168 non-null    Float64
 7   Payout Ratio   199 non-null    Float64
 8   EPS this Y     200 non-null    Float64
 9   EPS next Y     200 non-null    Float64
 10  EPS next 5Y    199 non-null    Float64
 11  Sales past 5Y  200 non-null    Float64

In [9]:
df_merged

Unnamed: 0_level_0,Industry,Market Cap,P/E,Fwd P/E,PEG,P/FCF,Dividend,Payout Ratio,EPS this Y,EPS next Y,...,Profit M,Beta,ATR,Volatility W,Volatility M,Recom,Target Price,Sharpe Ratio,Sortino Ratio,Omega Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAPL,Consumer Electronics,3632.94,38.45,29.53,3.89,36.96,0.0042,0.1611,0.0847,0.1186,...,0.243,1.18,5.89,0.0276,0.0239,2.16,252.73,1.220949,1.826844,1.236133
ABBV,Drug Manufacturers - General,369.01,87.4,15.03,5.79,20.69,0.0311,2.6315,0.2132,0.1329,...,0.0752,0.59,4.1,0.0198,0.0216,1.69,209.96,0.744732,0.995488,1.153213
ABNB,Travel Services,87.56,33.59,27.56,2.5,19.53,,0.0,0.0602,0.1562,...,0.2385,1.09,5.13,0.0286,0.0311,2.74,157.12,-0.328764,-0.448952,0.939889
ABT,Medical Devices,239.35,18.01,24.19,1.7,37.69,0.0172,0.2933,0.1033,0.1074,...,0.3183,0.69,2.44,0.0175,0.018,1.73,135.52,0.791461,1.222935,1.145934
ACN,Information Technology Services,217.98,29.21,24.9,3.2,24.07,0.0164,0.4512,0.0729,0.0918,...,0.1141,1.23,8.45,0.0234,0.0218,1.79,406.52,-0.337535,-0.460173,0.938945
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WFC,Banks - Diversified,257.53,14.57,11.31,1.04,8.67,0.0217,0.2796,0.1072,0.1644,...,0.1473,1.16,1.73,0.0266,0.0211,2.04,85.45,1.341553,2.204764,1.287815
WM,Waste Management,93.6,34.19,26.77,3.58,43.36,0.014,0.4407,0.0558,0.139,...,0.1245,0.76,3.3,0.0146,0.0149,2.04,241.5,0.630657,0.866401,1.132698
WMB,Oil & Gas Midstream,70.94,24.75,24.7,2.4,30.9,0.0346,1.044,0.0998,0.1155,...,0.2089,1.07,1.81,0.0366,0.0309,2.25,60.32,2.221235,3.259412,1.466137
WMT,Discount Stores,792.17,40.97,33.37,4.15,62.57,0.0093,0.3444,0.0504,0.1208,...,0.0285,0.54,2.31,0.0269,0.0207,1.4,110.39,2.604496,4.336011,1.606503


In [10]:
df_merged.to_pickle(pickle_file_name)

print(f"DataFrame saved to {pickle_file_name}")

DataFrame saved to merged_df.pkl
