## Pandas: Condition-based Column Creation

In [27]:
import numpy as np
import pandas as pd

In [28]:
df = pd.read_csv("datasets/laptop_price.csv", encoding="unicode-escape")
df

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.00
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,638.00
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1499.00
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,229.00
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,764.00


### **Two choices: Using `numpy.where()`**

In [29]:
# Create array based on price tiers (0-2000 EUR, 2000+ EUR)
price_tiers = np.where(df["Price_euros"] > 2000, 'Premium', 'Budget')
price_tiers

array(['Budget', 'Budget', 'Budget', ..., 'Budget', 'Budget', 'Budget'],
      shape=(1303,), dtype='<U7')

In [30]:
# Set it to a new column
df["Price_range"] = price_tiers

In [31]:
df[["laptop_ID", "Product", "Price_euros", "Price_range"]]

Unnamed: 0,laptop_ID,Product,Price_euros,Price_range
0,1,MacBook Pro,1339.69,Budget
1,2,Macbook Air,898.94,Budget
2,3,250 G6,575.00,Budget
3,4,MacBook Pro,2537.45,Premium
4,5,MacBook Pro,1803.60,Budget
...,...,...,...,...
1298,1316,Yoga 500-14ISK,638.00,Budget
1299,1317,Yoga 900-13ISK,1499.00,Budget
1300,1318,IdeaPad 100S-14IBR,229.00,Budget
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,764.00,Budget


In [32]:
# Creating an array based on screen size

classify_screensizes = np.where(df["Inches"] > 15, "Large", "Small")
classify_screensizes

array(['Small', 'Small', 'Large', ..., 'Small', 'Large', 'Large'],
      shape=(1303,), dtype='<U5')

In [33]:
df['ScreenSize_Type'] = classify_screensizes
df[['laptop_ID', "Product", "Inches", "ScreenSize_Type"]]

Unnamed: 0,laptop_ID,Product,Inches,ScreenSize_Type
0,1,MacBook Pro,13.3,Small
1,2,Macbook Air,13.3,Small
2,3,250 G6,15.6,Large
3,4,MacBook Pro,15.4,Large
4,5,MacBook Pro,13.3,Small
...,...,...,...,...
1298,1316,Yoga 500-14ISK,14.0,Small
1299,1317,Yoga 900-13ISK,13.3,Small
1300,1318,IdeaPad 100S-14IBR,14.0,Small
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,15.6,Large


In [34]:
df.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_range,ScreenSize_Type
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Budget,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Budget,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0,Budget,Large
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Premium,Large
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6,Budget,Small


In [35]:
df.value_counts("ScreenSize_Type")

ScreenSize_Type
Large    835
Small    468
Name: count, dtype: int64

### **N choices: using `np.select()`**

In [57]:
df2 = pd.read_csv("datasets/laptop_price.csv", encoding="unicode-escape")

In [58]:
# Step 1: Create condition and value variables

conditions = [
    df2.Price_euros > 3000,
    (df2.Price_euros > 2000) & (df2.Price_euros <= 3000),
    (df2.Price_euros > 800) & (df2.Price_euros <= 2000),
    df2.Price_euros <= 800
]

values = [
    "Too Expensive", "Expensive", "Affordable", "Cheap"
]

In [59]:
# Step 2: Set it to a new column
price_tiers = np.select(conditions, values, "N/A")

df2["Price_tiers"] = price_tiers

In [60]:
df2[["laptop_ID", "Product", "Price_euros", "Price_tiers"]]

Unnamed: 0,laptop_ID,Product,Price_euros,Price_tiers
0,1,MacBook Pro,1339.69,Affordable
1,2,Macbook Air,898.94,Affordable
2,3,250 G6,575.00,Cheap
3,4,MacBook Pro,2537.45,Expensive
4,5,MacBook Pro,1803.60,Affordable
...,...,...,...,...
1298,1316,Yoga 500-14ISK,638.00,Cheap
1299,1317,Yoga 900-13ISK,1499.00,Affordable
1300,1318,IdeaPad 100S-14IBR,229.00,Cheap
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,764.00,Cheap


In [61]:
df2.value_counts('Price_tiers')

Price_tiers
Affordable       655
Cheap            511
Expensive        118
Too Expensive     19
Name: count, dtype: int64

In [62]:
# Exercise

conditions_2 = [
    df2.Inches > 16,
    (df2.Inches > 14) & (df2.Inches <= 16),
    (df2.Inches > 12) & (df2.Inches <= 14),
    (df2.Inches <= 12),
]

values_2 = ["Too Big", "Big", "Small", "Too Small"]

df2["screen_size"] = np.select(conditions_2, values_2, "N/A")
df2

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Price_tiers,screen_size
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,Affordable,Small
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94,Affordable,Small
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.00,Cheap,Big
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45,Expensive,Big
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60,Affordable,Small
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,638.00,Cheap,Small
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1499.00,Affordable,Small
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,229.00,Cheap,Small
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,764.00,Cheap,Big


In [63]:
df2.value_counts("screen_size")

screen_size
Big          674
Small        419
Too Big      166
Too Small     44
Name: count, dtype: int64