In [38]:
import os
import pandas as pd
import numpy as np

In [39]:
# Folder path
folder_path = "Datasets"

# Company name mapping
company_names = {
    "ADEL.csv": "Adani Enterprises",
    "HDBK.csv": "HDFC Bank",
    "INFY.csv": "Infosys",
    "TISC.csv": "Tata Steel",
    "RELI.csv": "Reliance Industries",
    "NEST.csv": "Nestle India",
    "CIPL.csv": "Cipla",
    "HIAE.csv": "Hindustan Aeronautics Limited"
}

# Loop through files
for filename in os.listdir(folder_path):
    if filename.endswith(".csv") and filename in company_names:
        
        company = company_names[filename]
        file_path = os.path.join(folder_path, filename)
        
        df = pd.read_csv(file_path)
        
        print(f"\n\n------------ Company: {company} ------------\n")
        print(df.head(5))



------------ Company: Adani Enterprises ------------

         Date     Price      Open      High       Low     Vol. Change %
0  18-12-2025  2,229.90  2,236.40  2,245.00  2,211.20  501.82K   -0.12%
1  17-12-2025  2,232.50  2,247.60  2,255.00  2,221.00  651.51K   -0.69%
2  16-12-2025  2,247.90  2,270.00  2,276.80  2,232.10  654.71K   -1.36%
3  15-12-2025  2,278.90  2,282.40  2,300.00  2,273.00  573.03K   -0.15%
4  12-12-2025  2,282.40  2,277.70  2,296.80  2,270.20  929.20K    0.21%


------------ Company: Cipla ------------

         Date     Price      Open      High       Low     Vol. Change %
0  18-12-2025  1,496.90  1,494.80  1,511.00  1,494.00  974.09K    0.00%
1  17-12-2025  1,496.90  1,497.60  1,505.30  1,491.00    1.00M   -0.18%
2  16-12-2025  1,499.60  1,500.00  1,508.90  1,496.10  789.49K   -0.56%
3  15-12-2025  1,508.00  1,505.00  1,515.50  1,498.00  855.51K   -0.62%
4  12-12-2025  1,517.40  1,520.00  1,524.70  1,510.20    1.05M    0.34%


------------ Company: HDFC Bank --

In [40]:
def convert_volume(vol):
    
    # If volume is missing, return NaN
    if pd.isna(vol):
        return np.nan
    
    # Convert to string and clean commas/spaces
    vol = str(vol)
    vol = vol.replace(",", "")
    vol = vol.strip()
    
    # Convert based on suffix
    if vol.endswith("K"):
        number = float(vol[:-1])
        return number * 1000
    
    elif vol.endswith("M"):
        number = float(vol[:-1])
        return number * 1000000
    
    elif vol.endswith("B"):
        number = float(vol[:-1])
        return number * 1000000000
    
    else:
        return float(vol)

all_stocks_data = {}

# Loop through all CSV files in the folder
for file in os.listdir(folder_path):
    
    # Only process files that exist in company list
    if file.endswith(".csv") and file in company_names:
        
        print("\nProcessing file:", file)
        
        # Create full file path
        file_path = os.path.join(folder_path, file)
        
        # Read CSV into DataFrame
        original_df = pd.read_csv(file_path)
        
        # Create a copy so original data is untouched
        df = original_df.copy()
        
        # Replace invalid dates *********
        df["Date"] = df["Date"].replace("*********", np.nan)
        
        # Convert to datetime format
        df["Date"] = pd.to_datetime(
            df["Date"],
            format="%d-%m-%Y",
            errors="coerce"
        )
        
        # Convert to Indian date display format
        df["Date"] = df["Date"].dt.strftime("%d-%m-%Y")
        
        price_columns = ["Price", "Open", "High", "Low"]
        
        for col in price_columns:
            
            df[col] = df[col].astype(str)
            
            df[col] = df[col].str.replace(",", "")
 
            df[col] = pd.to_numeric(df[col], errors="coerce")
        
        df["Vol."] = df["Vol."].apply(convert_volume)
       
        df["Change %"] = df["Change %"].str.replace("%", "")
        df["Change %"] = df["Change %"].astype(float)
        df["Change %"] = df["Change %"] / 100
        
        df = df.sort_values("Date")
       
        company = company_names[file]
        all_stocks_data[company] = df
        
        print("\n------------ Company:", company, "------------\n")
        print(df.head(5).to_string(index=False))


print("\nAll stock datasets loaded & cleaned successfully!")


Processing file: ADEL.csv

------------ Company: Adani Enterprises ------------

      Date   Price    Open    High     Low      Vol.  Change %
01-01-2021  491.15  477.00  493.25  477.00 5040000.0    0.0242
01-01-2024 2917.20 2852.30 2947.00 2842.05 2900000.0    0.0240
01-01-2025 2554.85 2536.00 2644.20 2518.25 2880000.0    0.0104
01-02-2021  537.20  510.50  539.50  496.90 4000000.0    0.0603
01-02-2022 1746.90 1727.95 1765.10 1699.15 1570000.0    0.0186

Processing file: CIPL.csv

------------ Company: Cipla ------------

      Date  Price    Open    High     Low       Vol.  Change %
01-01-2021  826.6  822.80  828.95  820.65  2470000.0    0.0081
01-01-2024 1251.0 1252.45 1256.80 1246.70   343580.0    0.0038
01-01-2025 1529.1 1531.95 1547.85 1518.70  1580000.0    0.0001
01-02-2021  806.4  815.00  831.00  779.95 13860000.0   -0.0236
01-02-2022  946.4  948.00  959.00  938.80  2030000.0    0.0015

Processing file: HDBK.csv

------------ Company: HDFC Bank ------------

      Date  Price 