In [5]:
%pip install pandas
# Import the pandas library for data manipulation
import pandas as pd
import numpy as np

Collecting pandas
  Downloading pandas-2.3.1-cp310-cp310-win_amd64.whl (11.3 MB)
     ---------------------------------------- 11.3/11.3 MB 5.5 MB/s eta 0:00:00
Collecting tzdata>=2022.7
  Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
     -------------------------------------- 347.8/347.8 kB 2.0 MB/s eta 0:00:00
Installing collected packages: tzdata, pandas
Successfully installed pandas-2.3.1 tzdata-2025.2
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
# --- Step 1: Read the New, Larger Data File ---
# Load the dataset from the newly uploaded CSV file with 200+ companies.
try:
    df = pd.read_csv('pharma_data.csv')
    print("✅ New data file with 200+ companies loaded successfully.")
except FileNotFoundError:
    print("❌ Error: pharma_data.csv not found. Please ensure the file is uploaded correctly.")
    exit()


✅ New data file with 200+ companies loaded successfully.


In [7]:
# --- Step 2: Prepare the Data for Analysis ---
print("\n⚙️  Preparing data for analysis...")

# We will use the same column names that worked successfully on your last file.
required_columns = [
    'Name',
    'Market Capitalization',
    'Price to Earning',
    'Debt to equity',
    'Sales growth 3Years',
    'Return on capital employed'
]

# Check if all required columns exist in the dataframe
missing_cols = set(required_columns) - set(df.columns)
if missing_cols:
    print(f"❌ Error: The following required columns are missing from this new file: {missing_cols}")
    print("The file format seems to have changed again. Please check your CSV file.")
    exit()

# Select only the columns we need
df_selected = df[required_columns].copy()

# Clean and convert columns to numeric types.
columns_to_convert = [
    'Market Capitalization',
    'Price to Earning',
    'Debt to equity',
    'Sales growth 3Years',
    'Return on capital employed'
]
for col in columns_to_convert:
    df_selected[col] = pd.to_numeric(df_selected[col], errors='coerce')

# Remove any rows that have missing values after conversion
original_rows = len(df_selected)
df_selected.dropna(inplace=True)
cleaned_rows = len(df_selected)
print(f"Data cleaned. {original_rows - cleaned_rows} rows with missing data were removed.")




⚙️  Preparing data for analysis...
Data cleaned. 50 rows with missing data were removed.


In [8]:
# --- Step 3: Apply the Screening Criteria ---
print("\n🔬 Applying the same screening filters to the new dataset...")

# Define the criteria for our high-quality company screen
MIN_MARKET_CAP = 20000
MIN_SALES_GROWTH = 15
MAX_DEBT_TO_EQUITY = 0.5
MIN_ROCE = 15
MAX_PE_RATIO = 50

# Apply the filters to find companies that meet all criteria
screened_df = df_selected[
    (df_selected['Market Capitalization'] > MIN_MARKET_CAP) &
    (df_selected['Sales growth 3Years'] > MIN_SALES_GROWTH) &
    (df_selected['Debt to equity'] < MAX_DEBT_TO_EQUITY) &
    (df_selected['Return on capital employed'] > MIN_ROCE) &
    (df_selected['Price to Earning'] < MAX_PE_RATIO)
]




🔬 Applying the same screening filters to the new dataset...


In [9]:
# --- Step 4: Display the Final Results ---
print("\n\n--- ✅ ANALYSIS COMPLETE: Here is the final shortlist from the 200+ company file ---")

if screened_df.empty:
    print("\nNo companies in this larger dataset passed all the screening criteria. You might want to try adjusting the filters in the code.")
else:
    # Sort the results by Market Capitalization for a clean presentation and reset the index
    final_results = screened_df.sort_values(by='Market Capitalization', ascending=False).reset_index(drop=True)
    print(final_results)



--- ✅ ANALYSIS COMPLETE: Here is the final shortlist from the 200+ company file ---
              Name  Market Capitalization  Price to Earning  Debt to equity  \
0   Zydus Lifesci.              100192.08             21.58            0.13   
1  J B Chemicals &               28188.60             44.08            0.01   

   Sales growth 3Years  Return on capital employed  
0                15.43                       24.31  
1                17.35                       25.79  
