In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

In [2]:
data = pd.read_csv('../Crawl data/Data/Products',index_col=0)
data.head()

Unnamed: 0,Title,Link,Brand,Price,Review,Total Review,Compatible Devices,Color,Number of Keys,Connectivity Technology,Rank
0,"Computer Keyboard Wired,7-Color Rainbow LED Ba...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Acebaff,$21.95,4.3 out of 5,450 global ratings,"Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS...",Black,117.0,USB,#112 in Computer Keyboards
1,"MARVO Large Print Backlit Keyboard, WK712 Wire...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,MARVO,$28.99,4.5 out of 5,146 global ratings,"Laptop, PC, Smart TV",Black,109.0,USB-A,#277 in Computer Keyboards
2,X9 Wired Ergonomic Keyboard with Cushioned Wri...,https://www.amazon.com/X9-Performance-Ergonomi...,X9 Performance,$49.99,4.4 out of 5,"1,415 global ratings","Laptop, PC, Lenovo, Dell, Linux, HP, Micros...",Black,110.0,USB-A,#69 in Computer Keyboards
3,"Perixx PERIBOARD-106M, Wired Performance Full-...",https://www.amazon.com/Perixx-PERIBOARD-106M-P...,Perixx,$39.99,4.6 out of 5,"1,442 global ratings",PC,Beige,104.0,Wired,#316 in Computer Keyboards
4,Logitech MK120 Wired Keyboard and Mouse Combo ...,https://www.amazon.com/Logitech-Keyboard-Windo...,Logitech,Page 1 of 1,4.6 out of 5,"21,152 global ratings","Laptop, Personal Computer",Black,,Usb,#9 in Computer Keyboards


In [3]:
#see some information of data
data.describe()

Unnamed: 0,Number of Keys
count,285.0
mean,96.378947
std,24.338941
min,17.0
25%,104.0
50%,104.0
75%,106.0
max,117.0


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 311 entries, 0 to 310
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Title                    311 non-null    object 
 1   Link                     311 non-null    object 
 2   Brand                    311 non-null    object 
 3   Price                    311 non-null    object 
 4   Review                   311 non-null    object 
 5   Total Review             311 non-null    object 
 6   Compatible Devices       311 non-null    object 
 7   Color                    306 non-null    object 
 8   Number of Keys           285 non-null    float64
 9   Connectivity Technology  311 non-null    object 
 10  Rank                     311 non-null    object 
dtypes: float64(1), object(10)
memory usage: 29.2+ KB


#### To do list to cleaning before analysting data
1. Extract Numbers
* Price Column: Extract numeric values from the price column and "Page 1 of 1" value to null.
* Review Column: Extract numeric values from the review column.
* Total Review Column: Extract numeric values from the total review column.
* Rank Column: Extract numeric values from the rank column.
2. Change Data Types
* Price Column: Convert the price column data type to float.
* Review Column: Convert the review column data type to float.
* Total Review Column: Convert the total review column data type to float.
* Rank Column: Convert the rank column data type to integer.
3. Handle Null and Duplicated Values
* Check for Null Values: Identify null values in all columns.
* Replace Null Values: Replace null values with the mean value of their respective columns.
* Check for Duplicates: Identify any duplicate rows in the dataset.
* Remove Duplicates: Remove duplicate rows to ensure data integrity.

In [5]:
#Extract Numbers
data["Review"] = data["Review"].str.split(" ").str[0].astype(float)
data["Total Review"] = data["Total Review"].str.split(" ").str[0].str.replace(",","").astype(int)
data["Rank"] = data["Rank"].str.strip().str.split(" ").str[0].str.replace("#","").str.replace (",","").astype(int)

In [6]:
data["Price"] = data["Price"].replace("Page 1 of 1",np.nan)
data["Price"] = data["Price"].str.replace("$","").astype(float)

In [7]:
data.head()

Unnamed: 0,Title,Link,Brand,Price,Review,Total Review,Compatible Devices,Color,Number of Keys,Connectivity Technology,Rank
0,"Computer Keyboard Wired,7-Color Rainbow LED Ba...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Acebaff,21.95,4.3,450,"Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS...",Black,117.0,USB,112
1,"MARVO Large Print Backlit Keyboard, WK712 Wire...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,MARVO,28.99,4.5,146,"Laptop, PC, Smart TV",Black,109.0,USB-A,277
2,X9 Wired Ergonomic Keyboard with Cushioned Wri...,https://www.amazon.com/X9-Performance-Ergonomi...,X9 Performance,49.99,4.4,1415,"Laptop, PC, Lenovo, Dell, Linux, HP, Micros...",Black,110.0,USB-A,69
3,"Perixx PERIBOARD-106M, Wired Performance Full-...",https://www.amazon.com/Perixx-PERIBOARD-106M-P...,Perixx,39.99,4.6,1442,PC,Beige,104.0,Wired,316
4,Logitech MK120 Wired Keyboard and Mouse Combo ...,https://www.amazon.com/Logitech-Keyboard-Windo...,Logitech,,4.6,21152,"Laptop, Personal Computer",Black,,Usb,9


In [8]:
data.isnull().sum()

Title                       0
Link                        0
Brand                       0
Price                      59
Review                      0
Total Review                0
Compatible Devices          0
Color                       5
Number of Keys             26
Connectivity Technology     0
Rank                        0
dtype: int64

In [9]:
mean_val = data["Price"].mean()
mean_val

np.float64(49.04496031746031)

In [10]:
data["Price"] = data["Price"].replace(np.nan,mean_val)

In [11]:
data["Price"].isna().sum()

np.int64(0)

In [12]:
mean_val_keys = data["Number of Keys"].mean()
mean_val_keys

np.float64(96.37894736842105)

In [13]:
data["Number of Keys"] = data["Number of Keys"].replace(np.nan,mean_val_keys).astype(int)

In [14]:
data["Number of Keys"].isna().sum()

np.int64(0)

In [15]:
data.duplicated().sum()

np.int64(1)

In [16]:
data.drop_duplicates(inplace=True)

In [17]:
data.duplicated().sum()

np.int64(0)

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 310 entries, 0 to 310
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Title                    310 non-null    object 
 1   Link                     310 non-null    object 
 2   Brand                    310 non-null    object 
 3   Price                    310 non-null    float64
 4   Review                   310 non-null    float64
 5   Total Review             310 non-null    int64  
 6   Compatible Devices       310 non-null    object 
 7   Color                    305 non-null    object 
 8   Number of Keys           310 non-null    int64  
 9   Connectivity Technology  310 non-null    object 
 10  Rank                     310 non-null    int64  
dtypes: float64(2), int64(3), object(6)
memory usage: 29.1+ KB


In [19]:
data["Connectivity Technology"].unique()

array([' USB ', ' USB-A ', ' Wired ', ' Usb ', ' Wireless, Bluetooth ',
       ' wired ', ' Bluetooth ', ' wireless ', ' USB-A, USB 2.0 ',
       ' USB only (Non-Bluetooth) ', ' Bluetooth, USB-C ', ' USB-C ',
       ' RF, USB ', ' USB, USB-A ', ' Micro USB ', ' USB Wireless ',
       ' Bluetooth, USB ', ' Wireless ', ' RF, Unifying Receiver ',
       ' Bluetooth, 2.4G USB ', ' USB-A, USB-C '], dtype=object)

In [20]:
data["Number of Keys"].unique()

array([117, 109, 110, 104,  96,  19, 101, 105,  79, 111,  78,  61, 108,
        17, 114, 112, 102, 106,  84])

In [21]:
data["Compatible Devices"].unique()

array([' Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS, Linux ',
       ' Laptop, PC, Smart TV ',
       '   Laptop, PC, Lenovo, Dell, Linux, HP, Microsof Surface, Acer, Asus, Samsung, Chromebook, Alienware, Razer, LG   See more  ',
       ' PC ', ' Laptop, Personal Computer ',
       ' iPhone, Apple Mac Pro / Mini, Macbook Pro / Air, iMac, iPad ',
       ' Laptop ', ' Laptop, Gaming Console, PC ', ' Laptop, PC ',
       ' Compatible on Windows, macOS, Linux and Chrome OS ',
       ' Personal Computer ', ' Laptop, PC, Tablet, Smartphone ',
       ' Laptop, PC, Windows, Mac ', ' Gaming Console ',
       '   Windows, Xbox, PlayStation, and Mac OS X*. USB port required *Not all software features supported on Mac OS   See more  ',
       ' computers and devices with USB connectivity ',
       ' PC, Tablet, Smartphone ', ' Laptop, PC, Tablet ',
       ' Laptop, PC, Gaming Console, Smart TV ',
       ' Computers, Laptop, PC, Desktop ',
       ' Laptop, Gaming Console, Smart TV, PC ',
       

In [22]:
data["Brand"].unique()

array([' Acebaff ', ' MARVO ', ' X9 Performance ', ' Perixx ',
       ' Logitech ', ' Macally ', ' HP ', ' EVGA ', ' Amazon Basics ',
       ' SABLUTE ', ' Microsoft ', ' Azio ', ' SteelSeries ', ' Razer ',
       ' Cherry ', ' ROCCAT ', ' LIZRROT ', ' MOWUX ', ' KOPJIPPOM ',
       ' Atelus ', ' Soueto ', ' Arteck ', ' TopMate ', ' HeengYanson ',
       ' JOYACCESS ', ' Lenovo ', ' Fosmon ', ' MANHATTAN ', ' FOPETT ',
       ' Nulea '], dtype=object)

### Exploratory Data Analysis

1. **Calculate Average, Min, Max, Mean Price of Each Brand**
   - **Objective**: Determine the pricing statistics for each brand.
   - **Method**: Use groupby operations to aggregate statistics for each brand.

In [23]:
data["Brand"] = data["Brand"].str.strip()
data["Brand"].unique()

array(['Acebaff', 'MARVO', 'X9 Performance', 'Perixx', 'Logitech',
       'Macally', 'HP', 'EVGA', 'Amazon Basics', 'SABLUTE', 'Microsoft',
       'Azio', 'SteelSeries', 'Razer', 'Cherry', 'ROCCAT', 'LIZRROT',
       'MOWUX', 'KOPJIPPOM', 'Atelus', 'Soueto', 'Arteck', 'TopMate',
       'HeengYanson', 'JOYACCESS', 'Lenovo', 'Fosmon', 'MANHATTAN',
       'FOPETT', 'Nulea'], dtype=object)

In [24]:
brand_data = data.groupby("Brand")["Price"].agg(["min","max","count","mean"]).reset_index()
brand_data["mean"] = round(brand_data["mean"],2)
brand_data

Unnamed: 0,Brand,min,max,count,mean
0,Acebaff,20.99,21.95,20,21.52
1,Amazon Basics,16.82,19.54,19,18.25
2,Arteck,35.99,54.99,4,50.24
3,Atelus,19.99,23.99,2,21.99
4,Azio,97.42,97.42,10,97.42
5,Cherry,84.27,84.27,10,84.27
6,EVGA,59.99,59.99,9,59.99
7,FOPETT,37.99,37.99,1,37.99
8,Fosmon,29.98,29.98,1,29.98
9,HP,49.04496,49.04496,10,49.04


2. **Calculate Average, Min, Max, Mean Review of Each Brand**
   - **Objective**: Analyze review statistics for each brand.
   - **Method**: Group data by brand and compute the required statistics for reviews.

In [25]:
review_data = data.groupby("Brand")["Review"].agg(["min","max","count","mean"]).reset_index()
review_data["mean"] = review_data["mean"].round(2)
review_data

Unnamed: 0,Brand,min,max,count,mean
0,Acebaff,4.3,4.3,20,4.3
1,Amazon Basics,4.0,4.1,19,4.05
2,Arteck,4.4,4.5,4,4.43
3,Atelus,4.5,4.5,2,4.5
4,Azio,4.5,4.5,10,4.5
5,Cherry,4.5,4.5,10,4.5
6,EVGA,4.4,4.4,9,4.4
7,FOPETT,4.3,4.3,1,4.3
8,Fosmon,4.1,4.1,1,4.1
9,HP,4.4,4.4,10,4.4


3. **Calculate Average, Min, Max, Mean Rank of Each Brand**
   - **Objective**: Evaluate ranking statistics for each brand.
   - **Method**: Similar to the previous tasks, use groupby to aggregate rank statistics.

In [26]:
rank_data = data.groupby("Brand")["Rank"].agg(["min","max","count","mean"]).reset_index()
rank_data["mean"] = rank_data["mean"].round(2)
rank_data

Unnamed: 0,Brand,min,max,count,mean
0,Acebaff,112,1497,20,735.25
1,Amazon Basics,1596,2215,19,1889.21
2,Arteck,55,62,4,60.25
3,Atelus,128,225,2,176.5
4,Azio,700,729,10,726.1
5,Cherry,248,248,10,248.0
6,EVGA,1571,1571,9,1571.0
7,FOPETT,422,422,1,422.0
8,Fosmon,28,28,1,28.0
9,HP,3,3,10,3.0


4. **Calculate Mean, Max, Min, Average of Each Brand’s Total Reviews (Units Sold)**
<br> Because total reviews show the number of people who writing reviews -> considers columns as units sold at least
   - **Objective**: Analyze sales data for each brand.
   - **Method**: Aggregate total reviews (units sold) by brand and compute the required statistics.

In [27]:
sold_data = data.groupby("Brand")["Total Review"].agg(["min","max","count","mean"]).reset_index()
sold_data["mean"] = sold_data["mean"].round(2)
sold_data

Unnamed: 0,Brand,min,max,count,mean
0,Acebaff,277,451,20,372.5
1,Amazon Basics,800,4995,19,3007.89
2,Arteck,790,2272,4,1161.0
3,Atelus,907,907,2,907.0
4,Azio,846,846,10,846.0
5,Cherry,88,89,10,88.7
6,EVGA,2425,2425,9,2425.0
7,FOPETT,2905,2905,1,2905.0
8,Fosmon,11975,11975,1,11975.0
9,HP,3700,3701,10,3700.7


5. **Count Number of Products of Each Brand and Each Connectivity Technology**
   - **Objective**: Determine product counts for brands and connectivity technologies.
   - **Method**: Use value counts or groupby to tally the number of products.

In [28]:
data.columns

Index(['Title', 'Link', 'Brand', 'Price', 'Review', 'Total Review',
       'Compatible Devices', 'Color', 'Number of Keys',
       'Connectivity Technology', 'Rank'],
      dtype='object')

In [29]:
data["Connectivity Technology"].unique()

array([' USB ', ' USB-A ', ' Wired ', ' Usb ', ' Wireless, Bluetooth ',
       ' wired ', ' Bluetooth ', ' wireless ', ' USB-A, USB 2.0 ',
       ' USB only (Non-Bluetooth) ', ' Bluetooth, USB-C ', ' USB-C ',
       ' RF, USB ', ' USB, USB-A ', ' Micro USB ', ' USB Wireless ',
       ' Bluetooth, USB ', ' Wireless ', ' RF, Unifying Receiver ',
       ' Bluetooth, 2.4G USB ', ' USB-A, USB-C '], dtype=object)

In [30]:
connectivity = data[["Brand",'Review', 'Total Review','Connectivity Technology']]
connectivity

Unnamed: 0,Brand,Review,Total Review,Connectivity Technology
0,Acebaff,4.3,450,USB
1,MARVO,4.5,146,USB-A
2,X9 Performance,4.4,1415,USB-A
3,Perixx,4.6,1442,Wired
4,Logitech,4.6,21152,Usb
...,...,...,...,...
306,Logitech,4.4,958,"RF, USB"
307,Arteck,4.4,791,USB Wireless
308,Acebaff,4.3,277,USB
309,Soueto,5.0,1,"USB-A, USB-C"


In [31]:
def add_binary_columns(df,col_name,keywords):
    for keyword in keywords:
        df[keyword.capitalize()] = df[col_name].apply(lambda x: 1 if keyword.lower() in x.lower() else 0)
    return df

In [32]:
keywords = ["usb","wireless","bluetooth","rf"]
df = add_binary_columns(connectivity,'Connectivity Technology',keywords=keywords)
df

Unnamed: 0,Brand,Review,Total Review,Connectivity Technology,Usb,Wireless,Bluetooth,Rf
0,Acebaff,4.3,450,USB,1,0,0,0
1,MARVO,4.5,146,USB-A,1,0,0,0
2,X9 Performance,4.4,1415,USB-A,1,0,0,0
3,Perixx,4.6,1442,Wired,0,0,0,0
4,Logitech,4.6,21152,Usb,1,0,0,0
...,...,...,...,...,...,...,...,...
306,Logitech,4.4,958,"RF, USB",1,0,0,1
307,Arteck,4.4,791,USB Wireless,1,1,0,0
308,Acebaff,4.3,277,USB,1,0,0,0
309,Soueto,5.0,1,"USB-A, USB-C",1,0,0,0


In [33]:
brand_connect_tech = df.groupby("Brand")[["Usb","Wireless","Bluetooth","Rf"]].sum().reset_index()
brand_connect_tech

Unnamed: 0,Brand,Usb,Wireless,Bluetooth,Rf
0,Acebaff,20,0,0,0
1,Amazon Basics,19,0,19,0
2,Arteck,4,3,0,0
3,Atelus,2,0,0,0
4,Azio,0,0,0,0
5,Cherry,10,0,0,0
6,EVGA,9,0,0,0
7,FOPETT,1,0,0,0
8,Fosmon,0,1,0,0
9,HP,0,10,0,0


In [34]:
data_tech = df[["Usb","Wireless","Bluetooth","Rf"]].sum().reset_index()
data_tech.columns = ["Category", "Count"]
data_tech

Unnamed: 0,Category,Count
0,Usb,217
1,Wireless,34
2,Bluetooth,68
3,Rf,11


6. **Analyze Keyboard Data**
   - **Add Layout Column**: Categorize keyboards by layout type based on key count.
   - **Find Most Common Layout**: Identify the most frequent keyboard layout.
   - **Sum Layouts by Brand**: Aggregate layout counts for each brand.
   - **Identify Most Popular Color**: Determine the most frequent color.


In [35]:
def keyboard_layout(keyboard_num):
    layout = ""
    if keyboard_num >= 104:
        layout += "Full Size"
    elif keyboard_num >= 87:
        layout += "Tenkeyless"
    elif keyboard_num >= 84:
        layout += "75%"
    elif keyboard_num >= 64:
        layout += "60%"
    else:
        layout += "40%"
    return layout

data["Layout"] = data["Number of Keys"].apply(keyboard_layout)
data

Unnamed: 0,Title,Link,Brand,Price,Review,Total Review,Compatible Devices,Color,Number of Keys,Connectivity Technology,Rank,Layout
0,"Computer Keyboard Wired,7-Color Rainbow LED Ba...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Acebaff,21.95000,4.3,450,"Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS...",Black,117,USB,112,Full Size
1,"MARVO Large Print Backlit Keyboard, WK712 Wire...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,MARVO,28.99000,4.5,146,"Laptop, PC, Smart TV",Black,109,USB-A,277,Full Size
2,X9 Wired Ergonomic Keyboard with Cushioned Wri...,https://www.amazon.com/X9-Performance-Ergonomi...,X9 Performance,49.99000,4.4,1415,"Laptop, PC, Lenovo, Dell, Linux, HP, Micros...",Black,110,USB-A,69,Full Size
3,"Perixx PERIBOARD-106M, Wired Performance Full-...",https://www.amazon.com/Perixx-PERIBOARD-106M-P...,Perixx,39.99000,4.6,1442,PC,Beige,104,Wired,316,Full Size
4,Logitech MK120 Wired Keyboard and Mouse Combo ...,https://www.amazon.com/Logitech-Keyboard-Windo...,Logitech,49.04496,4.6,21152,"Laptop, Personal Computer",Black,96,Usb,9,Tenkeyless
...,...,...,...,...,...,...,...,...,...,...,...,...
306,Logitech K350 Keyboard - Wireless Connectivity...,https://www.amazon.com/Logitech-K350-Keyboard-...,Logitech,75.19000,4.4,958,PC,Black,17,"RF, USB",320,40%
307,Arteck Split Ergonomic Keyboard with Cushioned...,https://www.amazon.com/Arteck-Ergonomic-Keyboa...,Arteck,54.99000,4.4,791,"Computers, Laptop, PC, Desktop",Black,102,USB Wireless,62,Tenkeyless
308,"Gaming Keyboard with Large Print Keys, 7-Color...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Acebaff,20.99000,4.3,277,"Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS...",Black,104,USB,1497,Full Size
309,"RGB Wireless Keyboard with 7-Color Backlit, Co...",https://www.amazon.com/sspa/click?ie=UTF8&spc=...,Soueto,39.99000,5.0,1,"Laptop, Gaming Console, PC, Smart TV",Black,102,"USB-A, USB-C",540,Tenkeyless


In [36]:
layout_data = data[["Brand","Layout","Color","Price","Rank"]]
layout_data

Unnamed: 0,Brand,Layout,Color,Price,Rank
0,Acebaff,Full Size,Black,21.95000,112
1,MARVO,Full Size,Black,28.99000,277
2,X9 Performance,Full Size,Black,49.99000,69
3,Perixx,Full Size,Beige,39.99000,316
4,Logitech,Tenkeyless,Black,49.04496,9
...,...,...,...,...,...
306,Logitech,40%,Black,75.19000,320
307,Arteck,Tenkeyless,Black,54.99000,62
308,Acebaff,Full Size,Black,20.99000,1497
309,Soueto,Tenkeyless,Black,39.99000,540


In [37]:
df_key=pd.pivot_table(data = layout_data,columns="Layout",index="Brand",aggfunc="size",fill_value=0)
df_key["Total"] = df_key["40%"]+df_key["60%"]+df_key["75%"]+df_key["Full Size"]+df_key["Tenkeyless"]
df_key.sort_values(by="Total",ascending=False)

Layout,40%,60%,75%,Full Size,Tenkeyless,Total
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Logitech,10,9,1,45,22,87
Acebaff,0,0,0,20,0,20
Perixx,0,0,0,19,0,19
Amazon Basics,0,0,0,9,10,19
SABLUTE,0,0,0,14,0,14
X9 Performance,0,0,0,11,0,11
Macally,10,0,0,0,0,10
Azio,0,0,0,10,0,10
Cherry,0,0,0,10,0,10
SteelSeries,10,0,0,0,0,10


In [38]:
df_color = layout_data["Color"].value_counts().reset_index()
df_color

Unnamed: 0,Color,count
0,Black,171
1,black,25
2,Graphite,18
3,Matte Black,10
4,Bronze W/MX2A Brown,10
5,Black and Silver,10
6,White,10
7,TTC Brown,10
8,silver,10
9,Space Grey,9


7. **Separate Compatible Devices Column to Find Most Popular Devices**
    - **Objective**: Analyze which devices are most popular.
    - **Method**: Split the compatible devices column and count occurrences of each device type.

In [39]:
data["Compatible Devices"].unique()

array([' Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS, Linux ',
       ' Laptop, PC, Smart TV ',
       '   Laptop, PC, Lenovo, Dell, Linux, HP, Microsof Surface, Acer, Asus, Samsung, Chromebook, Alienware, Razer, LG   See more  ',
       ' PC ', ' Laptop, Personal Computer ',
       ' iPhone, Apple Mac Pro / Mini, Macbook Pro / Air, iMac, iPad ',
       ' Laptop ', ' Laptop, Gaming Console, PC ', ' Laptop, PC ',
       ' Compatible on Windows, macOS, Linux and Chrome OS ',
       ' Personal Computer ', ' Laptop, PC, Tablet, Smartphone ',
       ' Laptop, PC, Windows, Mac ', ' Gaming Console ',
       '   Windows, Xbox, PlayStation, and Mac OS X*. USB port required *Not all software features supported on Mac OS   See more  ',
       ' computers and devices with USB connectivity ',
       ' PC, Tablet, Smartphone ', ' Laptop, PC, Tablet ',
       ' Laptop, PC, Gaming Console, Smart TV ',
       ' Computers, Laptop, PC, Desktop ',
       ' Laptop, Gaming Console, Smart TV, PC ',
       

In [40]:
data_devices = data[["Brand","Compatible Devices"]]

In [41]:
keywords = ["windows","mac","laptop","pc","smart tv","gaming console"]
df = add_binary_columns(data_devices,"Compatible Devices",keywords)
df

Unnamed: 0,Brand,Compatible Devices,Windows,Mac,Laptop,Pc,Smart tv,Gaming console
0,Acebaff,"Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS...",1,1,0,0,0,0
1,MARVO,"Laptop, PC, Smart TV",0,0,1,1,1,0
2,X9 Performance,"Laptop, PC, Lenovo, Dell, Linux, HP, Micros...",0,0,1,1,0,0
3,Perixx,PC,0,0,0,1,0,0
4,Logitech,"Laptop, Personal Computer",0,0,1,0,0,0
...,...,...,...,...,...,...,...,...
306,Logitech,PC,0,0,0,1,0,0
307,Arteck,"Computers, Laptop, PC, Desktop",0,0,1,1,0,0
308,Acebaff,"Windows 11/10/8/7/XP/VISTA, Mac OS, Chrome OS...",1,1,0,0,0,0
309,Soueto,"Laptop, Gaming Console, PC, Smart TV",0,0,1,1,1,1


In [42]:
df_brand_device = df.groupby("Brand")[["Windows","Mac","Laptop","Pc","Smart tv","Gaming console"]].sum().reset_index()
df_brand_device

Unnamed: 0,Brand,Windows,Mac,Laptop,Pc,Smart tv,Gaming console
0,Acebaff,20,20,0,0,0,0
1,Amazon Basics,0,0,0,0,0,0
2,Arteck,0,0,4,4,0,0
3,Atelus,0,0,2,2,0,0
4,Azio,0,0,10,10,0,0
5,Cherry,0,0,0,10,0,0
6,EVGA,0,0,9,9,0,0
7,FOPETT,0,0,1,0,0,0
8,Fosmon,0,0,0,0,0,1
9,HP,0,0,10,10,0,0


In [43]:
new_df = df_brand_device[["Windows","Mac","Laptop","Pc","Smart tv","Gaming console"]].sum().reset_index()
new_df.columns = ["Devices","Count"]
new_df

Unnamed: 0,Devices,Count
0,Windows,53
1,Mac,63
2,Laptop,146
3,Pc,178
4,Smart tv,19
5,Gaming console,53
