### Import required libraries

In [7]:
import pandas as pd
import re


### Load your Excel file

In [8]:
file_path = r"C:\Users\omar\work\ml_projects\laptops\Laptops.csv"
df = pd.read_csv(file_path)

df.head()


Unnamed: 0,brand,model,price_egp,cpu,cpu_cores,cpu_max_speed,gpu,vram,ram,max_ram,...,display_size,refresh_rates,resolution,battery_watt,battery_life_hours,warranty,warranty_duration,windows_11,seller,link_to_product
0,Acer,Nitro V15 ANV15-51-58S2,31499,Intel Core i5-13420H,,4.6GHz,NVIDIA GeForce RTX 2050,4GB,8GB,,...,"15.6""",,1920x1080,,,1.0,2.0,1.0,2B,https://2b.com.eg/en/acer-nitro-v15-anv15-51-5...
1,Lenovo,LOQ 15IAX9 83GS00SKED,33099,Intel Core i5-12450HX,8C,4.4GHz,NVIDIA GeForce RTX 2050,4GB,12GB,,...,"15.6""",,1920x1080,,,1.0,2.0,1.0,2B,https://2b.com.eg/en/lenovo-loq-15iax9-laptop-...
2,MSI,Thin 15 B13UC 9S7-16R831-2615,34799,Intel Core i7-13620H,,4.9GHz,NVIDIA GeForce RTX 3050,4GB,16GB,,...,"15.6""",,1920x1080,,,1.0,1.0,0.0,2B,https://2b.com.eg/en/msi-thin-15-b13uc-laptop-...
3,Lenovo,LOQ 15IAX9 83GS00RXED,35299,Intel Core i5-12600HX,12C,4.6GHz,NVIDIA GeForce RTX 3050,6GB,12GB,,...,"15.6""",,1920x1080,,,1.0,2.0,0.0,2B,https://2b.com.eg/en/lenovo-loq-15iax9-laptop-...
4,HP,Victus 15-fa1162ne C0PQ3EA#ABV,36699,Intel Core i5-13420H,,4.6GHz,NVIDIA GeForce RTX 3050,6GB,8GB,,...,"15.6""",,,,,1.0,1.0,1.0,2B,https://2b.com.eg/en/hp-victus-15-fa1162ne-int...


### Clean the sheet

In [9]:
def normalize_storage(value):
    """Convert storage size (like '512GB', '1TB') into GB (int)."""
    if pd.isna(value):
        return None
    value = str(value).upper().replace(" ", "")
    if "TB" in value:
        return int(float(re.findall(r"[\d.]+", value)[0]) * 1024)
    if "GB" in value:
        return int(float(re.findall(r"[\d.]+", value)[0]))
    return None

def normalize_ram(value):
    """Convert RAM/VRAM into GB (int)."""
    return normalize_storage(value)

def normalize_speed(value):
    """Convert CPU speed (like '4.6GHz') into float (GHz)."""
    if pd.isna(value):
        return None
    match = re.findall(r"[\d.]+", str(value))
    return float(match[0]) if match else None

def normalize_display_size(value):
    """Convert display size (like '15.6"') into float (inches)."""
    if pd.isna(value):
        return None
    match = re.findall(r"[\d.]+", str(value))
    return float(match[0]) if match else None

def clean_general(df: pd.DataFrame) -> pd.DataFrame:
    cleaned = df.copy()
    for col in cleaned.columns:
        if "ram" in col.lower() or "vram" in col.lower():
            cleaned[col] = cleaned[col].apply(normalize_ram)
        elif "storage" in col.lower():
            cleaned[col] = cleaned[col].apply(normalize_storage)
        elif "speed" in col.lower():
            cleaned[col] = cleaned[col].apply(normalize_speed)
        elif "display" in col.lower() or "screen" in col.lower():
            cleaned[col] = cleaned[col].apply(normalize_display_size)
        elif "brand" in col.lower() or "model" in col.lower() or "gpu" in col.lower() or "cpu" in col.lower():
            cleaned[col] = cleaned[col].astype(str).str.strip()
    return cleaned

# Apply cleaning
cleaned_df = clean_general(df)

cleaned_df.head()


Unnamed: 0,brand,model,price_egp,cpu,cpu_cores,cpu_max_speed,gpu,vram,ram,max_ram,...,display_size,refresh_rates,resolution,battery_watt,battery_life_hours,warranty,warranty_duration,windows_11,seller,link_to_product
0,Acer,Nitro V15 ANV15-51-58S2,31499,Intel Core i5-13420H,,4.6,NVIDIA GeForce RTX 2050,4.0,8,,...,15.6,,1920x1080,,,1.0,2.0,1.0,2B,https://2b.com.eg/en/acer-nitro-v15-anv15-51-5...
1,Lenovo,LOQ 15IAX9 83GS00SKED,33099,Intel Core i5-12450HX,8C,4.4,NVIDIA GeForce RTX 2050,4.0,12,,...,15.6,,1920x1080,,,1.0,2.0,1.0,2B,https://2b.com.eg/en/lenovo-loq-15iax9-laptop-...
2,MSI,Thin 15 B13UC 9S7-16R831-2615,34799,Intel Core i7-13620H,,4.9,NVIDIA GeForce RTX 3050,4.0,16,,...,15.6,,1920x1080,,,1.0,1.0,0.0,2B,https://2b.com.eg/en/msi-thin-15-b13uc-laptop-...
3,Lenovo,LOQ 15IAX9 83GS00RXED,35299,Intel Core i5-12600HX,12C,4.6,NVIDIA GeForce RTX 3050,6.0,12,,...,15.6,,1920x1080,,,1.0,2.0,0.0,2B,https://2b.com.eg/en/lenovo-loq-15iax9-laptop-...
4,HP,Victus 15-fa1162ne C0PQ3EA#ABV,36699,Intel Core i5-13420H,,4.6,NVIDIA GeForce RTX 3050,6.0,8,,...,15.6,,,,,1.0,1.0,1.0,2B,https://2b.com.eg/en/hp-victus-15-fa1162ne-int...


### Save the cleaned DataFrame into a new Excel file

In [10]:
output_path = r"C:\Users\omar\work\ml_projects\laptops\Laptops_cleaned.csv"
cleaned_df.to_csv(output_path, index=False)

print(f"✅ Cleaned CSV saved to: {output_path}")


✅ Cleaned CSV saved to: C:\Users\omar\work\ml_projects\laptops\Laptops_cleaned.csv
