In [20]:
import pandas as pd

In [21]:
df_laptops = pd.read_csv('laptop_price.csv')

In [22]:
df_laptops.head(3)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0


## 1. Creating a Condition Column from more than 2 choices using np.select()

In [23]:
import numpy as np

In [24]:
# creating an aeeay based on multiple price tiers (+2 choices)

# creating "conditions" and "values" variables
conditions = [
    df_laptops['Price_euros'] > 3000,
    (df_laptops['Price_euros'] > 2000) & (df_laptops['Price_euros'] <= 3000),
    (df_laptops['Price_euros'] > 800) & (df_laptops['Price_euros'] <= 2000),
    df_laptops['Price_euros'] > 800
]

values = ['Too Expensive','Expensive','Afforable','Cheap']   # this list contains all the values that I'm going to assign 
                                                             # based on the condition that I'm going to create

# we're going to decide whether a laptop is too expensive, expensive, afforable or cheap based on the price.

# So let's say that if the laptop costs > 3000 euros, this is going to be too expensive
# and the 2nd condition corresponds to expensive
# the 3rd condition corresponds to affordable

In [25]:
# setting it to a new column
df_laptops['Price_tiers'] = np.select(conditions, values, default='Cheap')

# np.select(<conditions>, <values I want to assign>,  default='Unknown')

In [26]:
# showing the dataframe
df_laptops

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tiers
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Afforable
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Afforable
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.00,Cheap
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60,Afforable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,638.00,Cheap
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1499.00,Afforable
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,229.00,Cheap
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,764.00,Cheap


In [15]:
# As we can see in the last column of the dataframe, the instruction assigned a tier price to each 
# laptop acording to their price in euros.

# So for example, the 1st laptop has a price of 1339.69 and is between 800 and 2000 euros, and the 
# 3rd condition corresponds to affordable, and the last column says affordable, so it's correct.

# and the 4th laptop has a price of 2537.45 and it's between 2000 and 3000, and that matches with
# the 2nd condition which corresponds to expensive, and the last column says expensive, so it's correct

In [27]:
# counting values inside "price_tier" column
df_laptops['Price_tiers'].value_counts()

Price_tiers
Afforable        655
Cheap            511
Expensive        118
Too Expensive     19
Name: count, dtype: int64

In [30]:
# And now we can see that the majority of the laptops are considered affordable, and cheap.

In [31]:
# Creating an array based on multiple screen size tiers (+2 choices)
# Too Big > 16, Big > 14, Small > 12, Too Small < 12

# Creating "conditions" and "values variables"

conditions = [
    df_laptops['Inches'] > 16,
    (df_laptops['Inches'] > 14) & (df_laptops['Inches'] <= 16),
    (df_laptops['Inches'] > 12) & (df_laptops['Inches'] <= 14),
    df_laptops['Inches'] <= 12
]

values = ['Too Big', 'Big', 'Small', 'Too Small']

In [32]:
# Setting it to a new column
df_laptops['Screen_size'] = np.select(conditions,values, default='Too Small')

In [33]:
# Showing dataframe
df_laptops

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tiers,Screen_size
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Afforable,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Afforable,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.00,Cheap,Big
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive,Big
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60,Afforable,Small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,638.00,Cheap,Small
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1499.00,Afforable,Small
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,229.00,Cheap,Small
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,764.00,Cheap,Big


In [None]:
# And in the last column, we have the column 'Screen_size' and we can see that 
# it classified the screens according to their size

In [34]:
# Counting values inside "scree_size" column
df_laptops['Screen_size'].value_counts()

Screen_size
Big          674
Small        419
Too Big      166
Too Small     44
Name: count, dtype: int64

In [35]:
# And now we have that the majority of laptops are considered Big.