In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
data_path = Path.home() / "datasets" / "tabular_practice"

laptops = pd.read_csv(data_path / "laptops.csv", encoding="Latin-1")

In [3]:
laptops.head()

Unnamed: 0,Manufacturer,Model Name,Category,Screen Size,Screen,CPU,RAM,Storage,GPU,Operating System,Operating System Version,Weight,Price (Euros)
0,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37kg,133969
1,Apple,Macbook Air,Ultrabook,"13.3""",1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34kg,89894
2,HP,250 G6,Notebook,"15.6""",Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,,1.86kg,57500
3,Apple,MacBook Pro,Ultrabook,"15.4""",IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,,1.83kg,253745
4,Apple,MacBook Pro,Ultrabook,"13.3""",IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37kg,180360


In [4]:
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Manufacturer              1303 non-null   object
 1   Model Name                1303 non-null   object
 2   Category                  1303 non-null   object
 3   Screen Size               1303 non-null   object
 4   Screen                    1303 non-null   object
 5   CPU                       1303 non-null   object
 6   RAM                       1303 non-null   object
 7    Storage                  1303 non-null   object
 8   GPU                       1303 non-null   object
 9   Operating System          1303 non-null   object
 10  Operating System Version  1133 non-null   object
 11  Weight                    1303 non-null   object
 12  Price (Euros)             1303 non-null   object
dtypes: object(13)
memory usage: 132.5+ KB


First, we change the column names to values we can work with more easily:

* Strip leading/trailing whitespace
* Remove "(", ")"
* "Operating System" -> "os"
* All lower
* " " -> "_"

In [5]:
def sanitize(name):
    name = name.strip().lower()
    replaces = [
        ("(", ""),
        (")", ""),
        ("operating system", "os"),
        (" ", "_"),
    ]
    for fr, to in replaces:
        name = name.replace(fr, to)
    return name

new_columns = [sanitize(name) for name in laptops.columns]
laptops.columns = new_columns
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   manufacturer  1303 non-null   object
 1   model_name    1303 non-null   object
 2   category      1303 non-null   object
 3   screen_size   1303 non-null   object
 4   screen        1303 non-null   object
 5   cpu           1303 non-null   object
 6   ram           1303 non-null   object
 7   storage       1303 non-null   object
 8   gpu           1303 non-null   object
 9   os            1303 non-null   object
 10  os_version    1133 non-null   object
 11  weight        1303 non-null   object
 12  price_euros   1303 non-null   object
dtypes: object(13)
memory usage: 132.5+ KB


Next, we convert columns to numeric types:

* "screen_size" -> "screen_size_inches"
* "ram" -> "ram_gb"
* "weight" -> "weight_kg"
* "price_euros": "," becomes "."

In [6]:
# Check whether all "screen_size" values end on '"'
laptops["screen_size"].str.split('"').str[-1].value_counts()

screen_size
    1303
Name: count, dtype: int64

In [7]:
laptops["screen_size"] = laptops["screen_size"].str.rstrip('"').astype(float)
laptops.rename({"screen_size": "screen_sizes_inches"}, axis=1, inplace=True)
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_sizes_inches,screen,cpu,ram,storage,gpu,os,os_version,weight,price_euros
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37kg,133969
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34kg,89894
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,,1.86kg,57500
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,,1.83kg,253745
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37kg,180360


In [8]:
laptops["ram"].unique()

array(['8GB', '16GB', '4GB', '2GB', '12GB', '6GB', '32GB', '24GB', '64GB'],
      dtype=object)

In [9]:
laptops["ram"] = laptops["ram"].str.replace("GB", "").astype(int)
laptops.rename({"ram": "ram_gb"}, axis=1, inplace=True)
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_sizes_inches,screen,cpu,ram_gb,storage,gpu,os,os_version,weight,price_euros
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37kg,133969
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34kg,89894
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,,1.86kg,57500
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,,1.83kg,253745
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37kg,180360


In [10]:
# Check whether all "weight" values end on 'kg'
laptops["weight"].str.split("k").str[-1].value_counts()

weight
g     1302
gs       1
Name: count, dtype: int64

In [11]:
# In fact, "weight" values end on "kg" or "kgs"
laptops["weight"] = laptops["weight"].str.split("k").str[0].astype(float)
laptops.rename({"weight": "weight_kg"}, axis=1, inplace=True)
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_sizes_inches,screen,cpu,ram_gb,storage,gpu,os,os_version,weight_kg,price_euros
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37,133969
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34,89894
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,,1.86,57500
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,,1.83,253745
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37,180360


In [12]:
laptops["price_euros"] = laptops["price_euros"].str.replace(",", ".").astype(float)
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   manufacturer         1303 non-null   object 
 1   model_name           1303 non-null   object 
 2   category             1303 non-null   object 
 3   screen_sizes_inches  1303 non-null   float64
 4   screen               1303 non-null   object 
 5   cpu                  1303 non-null   object 
 6   ram_gb               1303 non-null   int64  
 7   storage              1303 non-null   object 
 8   gpu                  1303 non-null   object 
 9   os                   1303 non-null   object 
 10  os_version           1133 non-null   object 
 11  weight_kg            1303 non-null   float64
 12  price_euros          1303 non-null   float64
dtypes: float64(3), int64(1), object(9)
memory usage: 132.5+ KB


We also enhance the table by "cpu_manufacturer" and "gpu_manufacturer".

In [13]:
laptops["gpu"].unique()

array(['Intel Iris Plus Graphics 640', 'Intel HD Graphics 6000',
       'Intel HD Graphics 620', 'AMD Radeon Pro 455',
       'Intel Iris Plus Graphics 650', 'AMD Radeon R5',
       'Intel Iris Pro Graphics', 'Nvidia GeForce MX150',
       'Intel UHD Graphics 620', 'Intel HD Graphics 520',
       'AMD Radeon Pro 555', 'AMD Radeon R5 M430',
       'Intel HD Graphics 615', 'AMD Radeon Pro 560',
       'Nvidia GeForce 940MX', 'Intel HD Graphics 400',
       'Nvidia GeForce GTX 1050', 'AMD Radeon R2', 'AMD Radeon 530',
       'Nvidia GeForce 930MX', 'Intel HD Graphics',
       'Intel HD Graphics 500', 'Nvidia GeForce 930MX ',
       'Nvidia GeForce GTX 1060', 'Nvidia GeForce 150MX',
       'Intel Iris Graphics 540', 'AMD Radeon RX 580',
       'Nvidia GeForce 920MX', 'AMD Radeon R4 Graphics', 'AMD Radeon 520',
       'Nvidia GeForce GTX 1070', 'Nvidia GeForce GTX 1050 Ti',
       'Nvidia GeForce MX130', 'AMD R4 Graphics',
       'Nvidia GeForce GTX 940MX', 'AMD Radeon RX 560',
       'Nvid

In [14]:
laptops["cpu"].unique()

array(['Intel Core i5 2.3GHz', 'Intel Core i5 1.8GHz',
       'Intel Core i5 7200U 2.5GHz', 'Intel Core i7 2.7GHz',
       'Intel Core i5 3.1GHz', 'AMD A9-Series 9420 3GHz',
       'Intel Core i7 2.2GHz', 'Intel Core i7 8550U 1.8GHz',
       'Intel Core i5 8250U 1.6GHz', 'Intel Core i3 6006U 2GHz',
       'Intel Core i7 2.8GHz', 'Intel Core M m3 1.2GHz',
       'Intel Core i7 7500U 2.7GHz', 'Intel Core i7 2.9GHz',
       'Intel Core i3 7100U 2.4GHz', 'Intel Atom x5-Z8350 1.44GHz',
       'Intel Core i5 7300HQ 2.5GHz', 'AMD E-Series E2-9000e 1.5GHz',
       'Intel Core i5 1.6GHz', 'Intel Core i7 8650U 1.9GHz',
       'Intel Atom x5-Z8300 1.44GHz', 'AMD E-Series E2-6110 1.5GHz',
       'AMD A6-Series 9220 2.5GHz',
       'Intel Celeron Dual Core N3350 1.1GHz',
       'Intel Core i3 7130U 2.7GHz', 'Intel Core i7 7700HQ 2.8GHz',
       'Intel Core i5 2.0GHz', 'AMD Ryzen 1700 3GHz',
       'Intel Pentium Quad Core N4200 1.1GHz',
       'Intel Atom x5-Z8550 1.44GHz',
       'Intel Celeron Du

In [15]:
for name in ["cpu", "gpu"]:
    laptops[name + "_manufacturer"] = laptops[name].str.split().str[0]
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_sizes_inches,screen,cpu,ram_gb,storage,gpu,os,os_version,weight_kg,price_euros,cpu_manufacturer,gpu_manufacturer
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,,1.37,1339.69,Intel,Intel
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,,1.34,898.94,Intel,Intel
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,,1.86,575.0,Intel,Intel
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,,1.83,2537.45,Intel,AMD
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,,1.37,1803.6,Intel,Intel


In [16]:
laptops["cpu_manufacturer"].value_counts()

cpu_manufacturer
Intel      1240
AMD          62
Samsung       1
Name: count, dtype: int64

In [17]:
laptops["gpu_manufacturer"].value_counts()

gpu_manufacturer
Intel     722
Nvidia    400
AMD       180
ARM         1
Name: count, dtype: int64

In general, we want to check every column which looks categorical on whether it contains differently spelled values.

In [18]:
laptops["category"].unique()

array(['Ultrabook', 'Notebook', 'Netbook', 'Gaming', '2 in 1 Convertible',
       'Workstation'], dtype=object)

In [19]:
laptops["manufacturer"].unique()

array(['Apple', 'HP', 'Acer', 'Asus', 'Dell', 'Lenovo', 'Chuwi', 'MSI',
       'Microsoft', 'Toshiba', 'Huawei', 'Xiaomi', 'Vero', 'Razer',
       'Mediacom', 'Samsung', 'Google', 'Fujitsu', 'LG'], dtype=object)

In [20]:
laptops["os"].unique()

array(['macOS', 'No OS', 'Windows', 'Mac OS', 'Linux', 'Android',
       'Chrome OS'], dtype=object)

In the "os" column, we need to map "Mac OS" to "macOS".

**Note**: If several values have to be changed, we could use `map`.

In [22]:
# Map "Mac OS" to "macOS"
laptops.loc[laptops["os"] == "Mac OS", "os"] = "macOS"
laptops["os"].unique()

array(['macOS', 'No OS', 'Windows', 'Linux', 'Android', 'Chrome OS'],
      dtype=object)

Finally, we want to look at missing values. The fewer missing values, the better.

In [23]:
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   manufacturer         1303 non-null   object 
 1   model_name           1303 non-null   object 
 2   category             1303 non-null   object 
 3   screen_sizes_inches  1303 non-null   float64
 4   screen               1303 non-null   object 
 5   cpu                  1303 non-null   object 
 6   ram_gb               1303 non-null   int64  
 7   storage              1303 non-null   object 
 8   gpu                  1303 non-null   object 
 9   os                   1303 non-null   object 
 10  os_version           1133 non-null   object 
 11  weight_kg            1303 non-null   float64
 12  price_euros          1303 non-null   float64
 13  cpu_manufacturer     1303 non-null   object 
 14  gpu_manufacturer     1303 non-null   object 
dtypes: float64(3), int64(1), object(11)
me

The "os_version" column has 170 missing values, all other columns are complete. Let us check which OSes have missing values.

In [24]:
laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()

os
No OS        66
Linux        62
Chrome OS    27
macOS        13
Android       2
Name: count, dtype: int64

In [26]:
# If "os" is "No OS", the version should also be "No OS"
laptops.loc[laptops["os"] == "No OS", "os_version"] = "No OS"
laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()

os
Linux        62
Chrome OS    27
macOS        13
Android       2
Name: count, dtype: int64

In [27]:
# What about "macOS"?
laptops.loc[laptops["os"] == "macOS", "os_version"].value_counts(dropna=False)

os_version
NaN    13
X       8
Name: count, dtype: int64

In [28]:
# If "os" is "macOS", the version should be "X"
laptops.loc[laptops["os"] == "macOS", "os_version"] = "X"
laptops.loc[laptops["os_version"].isnull(), "os"].value_counts()

os
Linux        62
Chrome OS    27
Android       2
Name: count, dtype: int64

In [29]:
# What about "Linux"?
laptops.loc[laptops["os"] == "Linux", "os_version"].value_counts(dropna=False)

os_version
NaN    62
Name: count, dtype: int64

Let us extract the screen resolution as well

In [30]:
laptops["screen"].unique()

array(['IPS Panel Retina Display 2560x1600', '1440x900',
       'Full HD 1920x1080', 'IPS Panel Retina Display 2880x1800',
       '1366x768', 'IPS Panel Full HD 1920x1080',
       'IPS Panel Retina Display 2304x1440',
       'IPS Panel Full HD / Touchscreen 1920x1080',
       'Full HD / Touchscreen 1920x1080',
       'Touchscreen / Quad HD+ 3200x1800',
       'IPS Panel Touchscreen 1920x1200', 'Touchscreen 2256x1504',
       'Quad HD+ / Touchscreen 3200x1800', 'IPS Panel 1366x768',
       'IPS Panel 4K Ultra HD / Touchscreen 3840x2160',
       'IPS Panel Full HD 2160x1440',
       '4K Ultra HD / Touchscreen 3840x2160', 'Touchscreen 2560x1440',
       '1600x900', 'IPS Panel 4K Ultra HD 3840x2160',
       '4K Ultra HD 3840x2160', 'Touchscreen 1366x768',
       'IPS Panel Full HD 1366x768', 'IPS Panel 2560x1440',
       'IPS Panel Full HD 2560x1440',
       'IPS Panel Retina Display 2736x1824', 'Touchscreen 2400x1600',
       '2560x1440', 'IPS Panel Quad HD+ 2560x1440',
       'IPS Panel 

In [32]:
laptops["screen"].str.split().str[-1].value_counts(dropna=False)

screen
1920x1080    841
1366x768     308
3840x2160     43
3200x1800     27
2560x1440     23
1600x900      23
2560x1600      6
2304x1440      6
2256x1504      6
1920x1200      5
1440x900       4
2880x1800      4
2400x1600      4
2160x1440      2
2736x1824      1
Name: count, dtype: int64

In [33]:
laptops["screen_resolution"] = laptops["screen"].str.split().str[-1]
laptops.head()

Unnamed: 0,manufacturer,model_name,category,screen_sizes_inches,screen,cpu,ram_gb,storage,gpu,os,os_version,weight_kg,price_euros,cpu_manufacturer,gpu_manufacturer,screen_resolution
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,X,1.37,1339.69,Intel,Intel,2560x1600
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,X,1.34,898.94,Intel,Intel,1440x900
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8,256GB SSD,Intel HD Graphics 620,No OS,No OS,1.86,575.0,Intel,Intel,1920x1080
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16,512GB SSD,AMD Radeon Pro 455,macOS,X,1.83,2537.45,Intel,AMD,2880x1800
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,X,1.37,1803.6,Intel,Intel,2560x1600


Finally, let us extract the CPU speed from "cpu" and the storage size from "storage". This would be more elegant with a regular expression.

In [34]:
laptops["cpu"].str.split().str[-1].value_counts(dropna=False)

cpu
2.5GHz     290
2.7GHz     165
2.8GHz     165
1.6GHz     133
2.3GHz      86
1.8GHz      78
2.6GHz      76
2GHz        67
1.1GHz      53
2.4GHz      52
2.9GHz      21
3GHz        19
2.0GHz      19
1.2GHz      15
1.44GHz     12
2.2GHz      11
1.5GHz      10
1.3GHz       6
3.6GHz       5
0.9GHz       4
3.1GHz       3
2.1GHz       3
2.50GHz      3
1.9GHz       2
1.60GHz      1
3.2GHz       1
1.0GHz       1
1.92GHz      1
2.70GHz      1
Name: count, dtype: int64

In [35]:
laptops["cpu_speed_ghz"] = laptops["cpu"].str.split().str[-1].str.replace("GHz", "").astype(float)
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   manufacturer         1303 non-null   object 
 1   model_name           1303 non-null   object 
 2   category             1303 non-null   object 
 3   screen_sizes_inches  1303 non-null   float64
 4   screen               1303 non-null   object 
 5   cpu                  1303 non-null   object 
 6   ram_gb               1303 non-null   int64  
 7   storage              1303 non-null   object 
 8   gpu                  1303 non-null   object 
 9   os                   1303 non-null   object 
 10  os_version           1212 non-null   object 
 11  weight_kg            1303 non-null   float64
 12  price_euros          1303 non-null   float64
 13  cpu_manufacturer     1303 non-null   object 
 14  gpu_manufacturer     1303 non-null   object 
 15  screen_resolution    1303 non-null   o

In [36]:
laptops["storage"].str.split().str[0].value_counts(dropna=False)

storage
256GB    508
1TB      250
128GB    177
512GB    140
500GB    132
32GB      45
64GB      17
2TB       16
16GB      10
1GB        5
240GB      1
8GB        1
508GB      1
Name: count, dtype: int64

In [37]:
laptops["storage_size_gb"] = laptops["storage"].str.split().str[0].str.replace("1TB", "1024GB").str.replace("2TB", "2048GB").str.replace("GB", "").astype(int)
laptops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   manufacturer         1303 non-null   object 
 1   model_name           1303 non-null   object 
 2   category             1303 non-null   object 
 3   screen_sizes_inches  1303 non-null   float64
 4   screen               1303 non-null   object 
 5   cpu                  1303 non-null   object 
 6   ram_gb               1303 non-null   int64  
 7   storage              1303 non-null   object 
 8   gpu                  1303 non-null   object 
 9   os                   1303 non-null   object 
 10  os_version           1212 non-null   object 
 11  weight_kg            1303 non-null   float64
 12  price_euros          1303 non-null   float64
 13  cpu_manufacturer     1303 non-null   object 
 14  gpu_manufacturer     1303 non-null   object 
 15  screen_resolution    1303 non-null   o

In [39]:
laptops["storage_size_gb"].value_counts()

storage_size_gb
256     508
1024    250
128     177
512     140
500     132
32       45
64       17
2048     16
16       10
1         5
240       1
8         1
508       1
Name: count, dtype: int64

In [40]:
laptops.to_csv(data_path / "laptops_cleaned.csv", index=False)