In [1]:
import re
import pandas as pd

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
df = pd.read_csv("/content/drive/MyDrive/Data_Analysis_Projects/my_data_analysis_project/data/raw/df.csv")

To clean the Brand column with a threshold of 15 and preserve the names of brands grouped into "Other", we can follow these steps:


Identify brands with counts below the threshold.

Group these brands into "Other" in the Brand column.

Preserve the original brand names in a new column for reference.

In [4]:
# Display the unique brand names and their counts to identify inconsistencies
brand_counts = df['Brand'].value_counts()

# Define the threshold for grouping brands into "Other"
threshold = 15

# Identify brands to be grouped into "Other"
brands_to_group = brand_counts[brand_counts < threshold].index.tolist()

# Preserve the original brand names in a new column
df['Original_Brand'] = df['Brand']

# Group rare brands into "Other" in the 'Brand' column
df.loc[df['Brand'].isin(brands_to_group), 'Brand'] = 'Other'

# Display the updated brand counts for the 'Brand' column
updated_brand_counts = df['Brand'].value_counts()
updated_brand_counts_df = updated_brand_counts.to_frame(name='Count')
updated_brand_counts_df.reset_index(inplace=True)
updated_brand_counts_df.rename(columns={'index': 'Brand'}, inplace=True)
updated_brand_counts_df


Unnamed: 0,Brand,Count
0,Asus,685
1,Lenovo,558
2,HP,508
3,Dell,292
4,MSI,230
5,Acer,122
6,Other,96
7,Apple,62
8,Samsung,53
9,Infinix,39


In [5]:
df.columns

Index(['Name', 'Brand', 'Model_Name', 'Price', 'Price_Range', 'Spec Score',
       'Processor', 'Processor_Brand', 'Core Configuration', 'Series',
       'Clock-speed', 'Operating System', 'OS Type', 'RAM Type',
       'Graphics Processor', 'Graphics_Brand', 'Display Size (Inches)',
       'Resolution Width', 'Resolution Height', 'PPI', 'Aspect Ratio',
       'Capacity', 'Weight(kg)', 'Touchscreen', 'Screen_Protection',
       'Colour(s)', 'Outlier_Flag', 'Original_Brand'],
      dtype='object')

In [6]:
# Standardize naming conventions in the Model_Name column
df['Model_Name'] = df['Model_Name'].str.lower().str.strip().str.replace(r'[^\w\s]', '', regex=True)

# Display some sample model names to verify
df['Model_Name'].sample(10)

1249                              vostro 3510
2539                         ideapad gaming 3
1836                         legion s7 15ach6
1627                       thinkpad p14s gen3
1090                               15fc0028au
1581                         inspiron 14 7430
1786    macbook pro m3 pro mrw23hna ultrabook
765                      rog strix g17 g713rc
1009                   ideapad slim 5i 14iah8
1419                       creator z16 a11uet
Name: Model_Name, dtype: object

In [7]:
# Function to extract series and model number
def extract_series_model(name):
    match = re.match(r'(\w+)', name)
    if match:
        series = match.group(1)
        model_number = name[len(series):].strip()
        return pd.Series([series, model_number])
    return pd.Series([name, ''])

# Apply the function to the Model_Name column
df[['Series', 'Model_Number']] = df['Model_Name'].apply(lambda x: extract_series_model(x))

# Display the first few rows to verify the extraction
extracted_components_step = df[['Model_Name', 'Series', 'Model_Number']].head(10)
extracted_components_step


Unnamed: 0,Model_Name,Series,Model_Number
0,one 14 z8415,one,14 z8415
1,15fc0026au,15fc0026au,
2,15sfq5007tu,15sfq5007tu,
3,predator helios neo 16 phn1671,predator,helios neo 16 phn1671
4,tuf gaming f17 fx706hf,tuf,gaming f17 fx706hf
5,15sfq5112tu,15sfq5112tu,
6,tuf gaming f15 fx506hf,tuf,gaming f15 fx506hf
7,15sfq5330tu,15sfq5330tu,
8,galaxy book 2 np750xedkc1in 156,galaxy,book 2 np750xedkc1in 156
9,vivobook 15 x1502za,vivobook,15 x1502za


In [8]:
# Step 3: Clean the Model_Number column
df['Model_Number'] = df['Model_Number'].str.strip()

# Display some sample model numbers to verify the cleaning
df[['Series', 'Model_Number']].sample(10)


Unnamed: 0,Series,Model_Number
1970,vivobook,pro 15 oled m3500qc
1260,vivobook,15x k3504vab
1604,legion,pro 5 16arx8
2574,surface,4
1298,galaxy,book 3 pro 360 np960qfg
1631,envy,x360 15ew0041tu
908,thinkpad,e15
479,ideapad,15alc05
163,15sgy0501au,
1219,250,g8


In [9]:
# Reorder columns to place Series and Model_Number after Model_Name
columns = list(df.columns)
model_name_index = columns.index('Model_Name')
columns.insert(model_name_index + 1, columns.pop(columns.index('Series')))
columns.insert(model_name_index + 2, columns.pop(columns.index('Model_Number')))
df = df[columns]

In [10]:
df.sample()

Unnamed: 0,Name,Brand,Model_Name,Series,Model_Number,Price,Price_Range,Spec Score,Processor,Processor_Brand,Core Configuration,Clock-speed,Operating System,OS Type,RAM Type,Graphics Processor,Graphics_Brand,Display Size (Inches),Resolution Width,Resolution Height,PPI,Aspect Ratio,Capacity,Weight(kg),Touchscreen,Screen_Protection,Colour(s),Outlier_Flag,Original_Brand
2276,Dell G15-5530 (GN5530D83M6001ORB1) Laptop (Cor...,Dell,g155530,g155530,,79880.0,Medium,67.0,Intel Core i5-13450HX (13th Gen),Intel,Deca Core,4.7,Windows 11 Home Basic,Windows,DDR5,NVIDIA GeForce RTX 3050,NVIDIA,15.6,1920,1080,141.21,1.78,16 GB,2.65,No,No,Grey,0,Dell


In [11]:
df[df['Series']=="14sef1001tu"][['Name','Model_Name','Series','Model_Number']]

Unnamed: 0,Name,Model_Name,Series,Model_Number
1355,HP 14s-ef1001tu (510L4PA) Laptop (Core i5 11th...,14sef1001tu,14sef1001tu,


In [12]:
df.columns

Index(['Name', 'Brand', 'Model_Name', 'Series', 'Model_Number', 'Price',
       'Price_Range', 'Spec Score', 'Processor', 'Processor_Brand',
       'Core Configuration', 'Clock-speed', 'Operating System', 'OS Type',
       'RAM Type', 'Graphics Processor', 'Graphics_Brand',
       'Display Size (Inches)', 'Resolution Width', 'Resolution Height', 'PPI',
       'Aspect Ratio', 'Capacity', 'Weight(kg)', 'Touchscreen',
       'Screen_Protection', 'Colour(s)', 'Outlier_Flag', 'Original_Brand'],
      dtype='object')

In [17]:
data = df.copy()

In [15]:
# Remove columns 'Model_Number' and 'Spec Score'
df_cleaned = df.drop(columns=['Model_Number', 'Spec Score'])

# Confirm the columns have been removed
df_cleaned.columns

Index(['Name', 'Brand', 'Model_Name', 'Series', 'Price', 'Price_Range',
       'Processor', 'Processor_Brand', 'Core Configuration', 'Clock-speed',
       'Operating System', 'OS Type', 'RAM Type', 'Graphics Processor',
       'Graphics_Brand', 'Display Size (Inches)', 'Resolution Width',
       'Resolution Height', 'PPI', 'Aspect Ratio', 'Capacity', 'Weight(kg)',
       'Touchscreen', 'Screen_Protection', 'Colour(s)', 'Outlier_Flag',
       'Original_Brand'],
      dtype='object')

In [18]:
df_cleaned.isnull().sum()

Name                     0
Brand                    0
Model_Name               0
Series                   0
Price                    0
Price_Range              0
Processor                0
Processor_Brand          0
Core Configuration       0
Clock-speed              0
Operating System         0
OS Type                  0
RAM Type                 0
Graphics Processor       0
Graphics_Brand           0
Display Size (Inches)    0
Resolution Width         0
Resolution Height        0
PPI                      0
Aspect Ratio             0
Capacity                 0
Weight(kg)               0
Touchscreen              0
Screen_Protection        0
Colour(s)                0
Outlier_Flag             0
Original_Brand           0
dtype: int64

In [20]:
df_cleaned.columns

Index(['Name', 'Brand', 'Model_Name', 'Series', 'Price', 'Price_Range',
       'Processor', 'Processor_Brand', 'Core Configuration', 'Clock-speed',
       'Operating System', 'OS Type', 'RAM Type', 'Graphics Processor',
       'Graphics_Brand', 'Display Size (Inches)', 'Resolution Width',
       'Resolution Height', 'PPI', 'Aspect Ratio', 'Capacity', 'Weight(kg)',
       'Touchscreen', 'Screen_Protection', 'Colour(s)', 'Outlier_Flag',
       'Original_Brand'],
      dtype='object')

In [28]:
df_cleaned['Capacity'].unique()

array(['16 GB', '8 GB', '4 GB', '2 GB', '32 GB', '64 GB', '18 GB',
       '36 GB', '12 GB', '48 GB'], dtype=object)

In [29]:
# Remove text and convert 'Capacity' column to numeric
df_cleaned['Ram_Capacity(GB)'] = df_cleaned['Capacity'].str.replace(' GB', '').astype(int)

# Drop the old 'Capacity' column
df_cleaned = df_cleaned.drop(columns=['Capacity'])

# Confirm the changes
df_cleaned[['Ram_Capacity(GB)']].head()


Unnamed: 0,Ram_Capacity(GB)
0,16
1,8
2,8
3,16
4,8


In [31]:
# Reorder columns to place 'Ram_Capacity(GB)' after 'RAM Type'
columns = df_cleaned.columns.tolist()
ram_index = columns.index('RAM Type') + 1
columns.insert(ram_index, columns.pop(columns.index('Ram_Capacity(GB)')))
df_cleaned = df_cleaned[columns]

# Confirm the new column order
df_cleaned.head()


Unnamed: 0,Name,Brand,Model_Name,Series,Price,Price_Range,Processor,Processor_Brand,Core Configuration,Clock-speed,Operating System,OS Type,RAM Type,Ram_Capacity(GB),Graphics Processor,Graphics_Brand,Display Size (Inches),Resolution Width,Resolution Height,PPI,Aspect Ratio,Weight(kg),Touchscreen,Screen_Protection,Colour(s),Outlier_Flag,Original_Brand
0,Acer One 14 Z8-415 (UN.599SI.020) Laptop (Core...,Acer,one 14 z8415,one,34990.0,Medium-Low,Intel Core i5-1155G7 (11th Gen),Intel,Quad Core,2.5,Windows 11 Home Basic,Windows,DDR4,16,Intel Iris Xe,Intel,14.0,1920,1080,157.35,1.78,1.49,No,Yes,Silver,0,Acer
1,HP 15-fc0026AU (7L030PA) Laptop (AMD Quad Core...,HP,15fc0026au,15fc0026au,36990.0,Medium-Low,AMD Quad Core Ryzen 3 - 7320U,AMD,Quad Core,2.4,Windows 11 Home Basic,Windows,LPDDR5,8,AMD Radeon,AMD,15.6,1920,1080,141.21,1.78,1.75,No,Yes,Silver,0,HP
2,HP 15s-fq5007TU (67V50PA) Laptop (Core i3 12th...,HP,15sfq5007tu,15sfq5007tu,37166.0,Medium-Low,Intel Core i3-1215U (12th Gen),Intel,Hexa Core,4.2,Windows 11 Home Basic,Windows,DDR4,8,Intel UHD,Intel,15.6,1920,1080,141.21,1.78,1.69,No,Yes,Silver,0,HP
3,Acer Predator Helios Neo 16 PHN16-71 (NH.QLTSI...,Acer,predator helios neo 16 phn1671,predator,112980.0,Medium-High,Intel Core i7-13700HX (13th Gen),Intel,Octa Core,2.1,Windows 11 Home Basic,Windows,DDR5,16,NVIDIA GeForce RTX 4050,NVIDIA,16.0,1920,1200,141.51,1.6,2.6,No,No,Black,0,Acer
4,Asus TUF Gaming F17 FX706HF-HX018W Laptop (Cor...,Asus,tuf gaming f17 fx706hf,tuf,57990.0,Medium-Low,Intel Core i5-11400H (11th Gen),Intel,Hexa Core,2.7,Windows 11 Home Basic,Windows,DDR4,8,NVIDIA GeForce RTX 2050,NVIDIA,17.3,1920,1080,127.34,1.78,2.6,No,Yes,Black,0,Asus


In [32]:
df_cleaned.columns

Index(['Name', 'Brand', 'Model_Name', 'Series', 'Price', 'Price_Range',
       'Processor', 'Processor_Brand', 'Core Configuration', 'Clock-speed',
       'Operating System', 'OS Type', 'RAM Type', 'Ram_Capacity(GB)',
       'Graphics Processor', 'Graphics_Brand', 'Display Size (Inches)',
       'Resolution Width', 'Resolution Height', 'PPI', 'Aspect Ratio',
       'Weight(kg)', 'Touchscreen', 'Screen_Protection', 'Colour(s)',
       'Outlier_Flag', 'Original_Brand'],
      dtype='object')

In [36]:
# Remove columns 'Name', 'Operating System', and 'Graphics Processor'
df_cleaned = df_cleaned.drop(columns=['Name', 'Operating System', 'Graphics Processor'])

# Confirm the columns have been removed
df_cleaned.columns


Index(['Brand', 'Model_Name', 'Series', 'Price', 'Price_Range', 'Processor',
       'Processor_Brand', 'Core Configuration', 'Clock-speed', 'OS Type',
       'RAM Type', 'Ram_Capacity(GB)', 'Graphics_Brand',
       'Display Size (Inches)', 'Resolution Width', 'Resolution Height', 'PPI',
       'Aspect Ratio', 'Weight(kg)', 'Touchscreen', 'Screen_Protection',
       'Colour(s)', 'Outlier_Flag', 'Original_Brand'],
      dtype='object')

In [37]:
# Define the new price range names
price_range_mapping = {
    'Low': 'Budget',
    'Medium-Low': 'Economy',
    'Medium': 'Mid-Range',
    'Medium-High': 'Premium',
    'High': 'Luxury'
}

# Update the 'Price_Range' column with new names
df_cleaned['Price_Range'] = df_cleaned['Price_Range'].map(price_range_mapping)

# Confirm the changes
price_range_distribution_updated = df_cleaned['Price_Range'].value_counts()

price_range_distribution_updated


Price_Range
Economy      1026
Mid-Range     750
Luxury        440
Premium       292
Budget        161
Name: count, dtype: int64

In [38]:
df_cleaned.columns

Index(['Brand', 'Model_Name', 'Series', 'Price', 'Price_Range', 'Processor',
       'Processor_Brand', 'Core Configuration', 'Clock-speed', 'OS Type',
       'RAM Type', 'Ram_Capacity(GB)', 'Graphics_Brand',
       'Display Size (Inches)', 'Resolution Width', 'Resolution Height', 'PPI',
       'Aspect Ratio', 'Weight(kg)', 'Touchscreen', 'Screen_Protection',
       'Colour(s)', 'Outlier_Flag', 'Original_Brand'],
      dtype='object')

In [43]:
# Create a separate dataframe where clock speed is less than 2
low_clock_speed_df = df_cleaned[df_cleaned['Clock-speed'] < 2][['Brand', 'Model_Name', 'Price', 'Processor', 'Clock-speed']]

low_clock_speed_df.shape

(435, 5)

Let's create the Utility column based on the criteria discussed earlier. I'll add this new column to the dataframe and place it after the Price_Range column. The criteria will be as follows:


Gaming: High-end processors (i7, Ryzen 7, etc.), high clock speeds, high core count, dedicated graphics cards (NVIDIA, AMD), high RAM capacity (16GB or more), higher price range.

Business: Mid to high-end processors, moderate to high clock speeds, adequate core count (quad-core or higher), integrated or mid-range graphics, moderate to high RAM capacity (8GB or more), higher price range.

Personal: Mid to low-end processors, lower clock speeds, lower core count, integrated graphics, moderate RAM capacity (4GB or more), lower price range.
Ultrabook: Lightweight, high portability, moderate to high performance, often used for business or personal use on the go.

Workstation: High-end processors, very high RAM capacity (32GB or more), professional graphics cards (NVIDIA Quadro, AMD FirePro), used for specialized tasks like video editing, CAD, etc.

In [47]:
# Define a function to determine the utility of a laptop based on the specified criteria
def determine_utility(row):
    processor = row['Processor']
    clock_speed = row['Clock-speed']
    ram_capacity = row['Ram_Capacity(GB)']
    price_range = row['Price_Range']
    weight = row['Weight(kg)']
    graphics_brand = row['Graphics_Brand']

    if ('i7' in processor or 'Ryzen 7' in processor or 'i9' in processor or 'Ryzen 9' in processor) and clock_speed >= 3.0 and ram_capacity >= 16 and graphics_brand in ['NVIDIA', 'AMD']:
        return 'Gaming'
    elif clock_speed >= 2.0 and ram_capacity >= 8 and price_range in ['Premium', 'Luxury'] and 'Quad Core' in row['Core Configuration']:
        return 'Business'
    elif clock_speed < 2.0 and ram_capacity < 8 and price_range in ['Budget', 'Economy']:
        return 'Personal'
    elif weight < 1.5 and price_range in ['Mid-Range', 'Premium'] and ram_capacity >= 8:
        return 'Ultrabook'
    elif ram_capacity >= 32 and graphics_brand in ['NVIDIA Quadro', 'AMD FirePro']:
        return 'Workstation'
    else:
        return 'General Use'

# Add the 'Utility' column
df_cleaned['Utility'] = df_cleaned.apply(determine_utility, axis=1)

# Place the 'Utility' column after the 'Price_Range' column
columns = df_cleaned.columns.tolist()
price_range_index = columns.index('Price_Range') + 1
columns.insert(price_range_index, columns.pop(columns.index('Utility')))
df_cleaned = df_cleaned[columns]

df_cleaned.head()


Unnamed: 0,Brand,Model_Name,Series,Price,Price_Range,Utility,Processor,Processor_Brand,Core Configuration,Clock-speed,OS Type,RAM Type,Ram_Capacity(GB),Graphics_Brand,Display Size (Inches),Resolution Width,Resolution Height,PPI,Aspect Ratio,Weight(kg),Touchscreen,Screen_Protection,Colour(s),Outlier_Flag,Original_Brand
0,Acer,one 14 z8415,one,34990.0,Economy,General Use,Intel Core i5-1155G7 (11th Gen),Intel,Quad Core,2.5,Windows,DDR4,16,Intel,14.0,1920,1080,157.35,1.78,1.49,No,Yes,Silver,0,Acer
1,HP,15fc0026au,15fc0026au,36990.0,Economy,General Use,AMD Quad Core Ryzen 3 - 7320U,AMD,Quad Core,2.4,Windows,LPDDR5,8,AMD,15.6,1920,1080,141.21,1.78,1.75,No,Yes,Silver,0,HP
2,HP,15sfq5007tu,15sfq5007tu,37166.0,Economy,General Use,Intel Core i3-1215U (12th Gen),Intel,Hexa Core,4.2,Windows,DDR4,8,Intel,15.6,1920,1080,141.21,1.78,1.69,No,Yes,Silver,0,HP
3,Acer,predator helios neo 16 phn1671,predator,112980.0,Premium,General Use,Intel Core i7-13700HX (13th Gen),Intel,Octa Core,2.1,Windows,DDR5,16,NVIDIA,16.0,1920,1200,141.51,1.6,2.6,No,No,Black,0,Acer
4,Asus,tuf gaming f17 fx706hf,tuf,57990.0,Economy,General Use,Intel Core i5-11400H (11th Gen),Intel,Hexa Core,2.7,Windows,DDR4,8,NVIDIA,17.3,1920,1080,127.34,1.78,2.6,No,Yes,Black,0,Asus


In [48]:
# Update the 'Utility' column to replace 'General Use' with 'Everyday Use'
df_cleaned['Utility'] = df_cleaned['Utility'].replace('General Use', 'Everyday Use')

# Confirm the changes
df_cleaned['Utility'].value_counts()


Utility
Everyday Use    1955
Gaming           373
Ultrabook        240
Personal          67
Business          34
Name: count, dtype: int64

In [49]:
# Define a function to calculate the performance score
def calculate_performance_score(row):
    processor_score = 0
    if 'i7' in row['Processor'] or 'Ryzen 7' in row['Processor']:
        processor_score = 3
    elif 'i5' in row['Processor'] or 'Ryzen 5' in row['Processor']:
        processor_score = 2
    elif 'i3' in row['Processor'] or 'Ryzen 3' in row['Processor']:
        processor_score = 1

    clock_speed_score = row['Clock-speed'] / 2.0  # Normalizing clock speed with a divisor of 2 for scaling
    ram_score = row['Ram_Capacity(GB)'] / 4.0  # Normalizing RAM capacity with a divisor of 4 for scaling

    return processor_score + clock_speed_score + ram_score

# Add the 'Spec_Score' column
df_cleaned['Spec_Score'] = df_cleaned.apply(calculate_performance_score, axis=1)

# Place the 'Spec_Score' column after the 'Brand' column
columns = df_cleaned.columns.tolist()
brand_index = columns.index('Brand') + 1
columns.insert(brand_index, columns.pop(columns.index('Spec_Score')))
df_cleaned = df_cleaned[columns]
df_cleaned.head()


Unnamed: 0,Brand,Spec_Score,Model_Name,Series,Price,Price_Range,Utility,Processor,Processor_Brand,Core Configuration,Clock-speed,OS Type,RAM Type,Ram_Capacity(GB),Graphics_Brand,Display Size (Inches),Resolution Width,Resolution Height,PPI,Aspect Ratio,Weight(kg),Touchscreen,Screen_Protection,Colour(s),Outlier_Flag,Original_Brand
0,Acer,7.25,one 14 z8415,one,34990.0,Economy,Everyday Use,Intel Core i5-1155G7 (11th Gen),Intel,Quad Core,2.5,Windows,DDR4,16,Intel,14.0,1920,1080,157.35,1.78,1.49,No,Yes,Silver,0,Acer
1,HP,4.2,15fc0026au,15fc0026au,36990.0,Economy,Everyday Use,AMD Quad Core Ryzen 3 - 7320U,AMD,Quad Core,2.4,Windows,LPDDR5,8,AMD,15.6,1920,1080,141.21,1.78,1.75,No,Yes,Silver,0,HP
2,HP,5.1,15sfq5007tu,15sfq5007tu,37166.0,Economy,Everyday Use,Intel Core i3-1215U (12th Gen),Intel,Hexa Core,4.2,Windows,DDR4,8,Intel,15.6,1920,1080,141.21,1.78,1.69,No,Yes,Silver,0,HP
3,Acer,8.05,predator helios neo 16 phn1671,predator,112980.0,Premium,Everyday Use,Intel Core i7-13700HX (13th Gen),Intel,Octa Core,2.1,Windows,DDR5,16,NVIDIA,16.0,1920,1200,141.51,1.6,2.6,No,No,Black,0,Acer
4,Asus,5.35,tuf gaming f17 fx706hf,tuf,57990.0,Economy,Everyday Use,Intel Core i5-11400H (11th Gen),Intel,Hexa Core,2.7,Windows,DDR4,8,NVIDIA,17.3,1920,1080,127.34,1.78,2.6,No,Yes,Black,0,Asus


In [51]:
# Round the 'Spec_Score' column to one decimal place
df_cleaned['Spec_Score'] = df_cleaned['Spec_Score'].round(1)

df_cleaned.head()


Unnamed: 0,Brand,Spec_Score,Model_Name,Series,Price,Price_Range,Utility,Processor,Processor_Brand,Core Configuration,Clock-speed,OS Type,RAM Type,Ram_Capacity(GB),Graphics_Brand,Display Size (Inches),Resolution Width,Resolution Height,PPI,Aspect Ratio,Weight(kg),Touchscreen,Screen_Protection,Colour(s),Outlier_Flag,Original_Brand
0,Acer,7.2,one 14 z8415,one,34990.0,Economy,Everyday Use,Intel Core i5-1155G7 (11th Gen),Intel,Quad Core,2.5,Windows,DDR4,16,Intel,14.0,1920,1080,157.35,1.78,1.49,No,Yes,Silver,0,Acer
1,HP,4.2,15fc0026au,15fc0026au,36990.0,Economy,Everyday Use,AMD Quad Core Ryzen 3 - 7320U,AMD,Quad Core,2.4,Windows,LPDDR5,8,AMD,15.6,1920,1080,141.21,1.78,1.75,No,Yes,Silver,0,HP
2,HP,5.1,15sfq5007tu,15sfq5007tu,37166.0,Economy,Everyday Use,Intel Core i3-1215U (12th Gen),Intel,Hexa Core,4.2,Windows,DDR4,8,Intel,15.6,1920,1080,141.21,1.78,1.69,No,Yes,Silver,0,HP
3,Acer,8.0,predator helios neo 16 phn1671,predator,112980.0,Premium,Everyday Use,Intel Core i7-13700HX (13th Gen),Intel,Octa Core,2.1,Windows,DDR5,16,NVIDIA,16.0,1920,1200,141.51,1.6,2.6,No,No,Black,0,Acer
4,Asus,5.4,tuf gaming f17 fx706hf,tuf,57990.0,Economy,Everyday Use,Intel Core i5-11400H (11th Gen),Intel,Hexa Core,2.7,Windows,DDR4,8,NVIDIA,17.3,1920,1080,127.34,1.78,2.6,No,Yes,Black,0,Asus


In [53]:
# Remove the 'Processor' column
df_cleaned = df_cleaned.drop(columns=['Processor'])

In [54]:
df_cleaned.columns

Index(['Brand', 'Spec_Score', 'Model_Name', 'Series', 'Price', 'Price_Range',
       'Utility', 'Processor_Brand', 'Core Configuration', 'Clock-speed',
       'OS Type', 'RAM Type', 'Ram_Capacity(GB)', 'Graphics_Brand',
       'Display Size (Inches)', 'Resolution Width', 'Resolution Height', 'PPI',
       'Aspect Ratio', 'Weight(kg)', 'Touchscreen', 'Screen_Protection',
       'Colour(s)', 'Outlier_Flag', 'Original_Brand'],
      dtype='object')