## Examining and preprocessing the racquet data set

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [2]:
racquets = pd.read_csv("../data/raw/racquet_features_raw.csv", index_col = 0)

In [3]:
racquets.info()

<class 'pandas.core.frame.DataFrame'>
Index: 397 entries, 0 to 0
Data columns (total 37 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   racquet_img      397 non-null    object 
 1   racquet_name     397 non-null    object 
 2   racquet_rating   343 non-null    float64
 3   racquet_price    397 non-null    float64
 4   racquet_desc     397 non-null    object 
 5   Head Size        365 non-null    object 
 6   Length           365 non-null    object 
 7   Strung Weight    307 non-null    object 
 8   Balance          309 non-null    object 
 9   Swingweight      309 non-null    float64
 10  Stiffness        308 non-null    object 
 11  Beam Width       309 non-null    object 
 12  Composition      364 non-null    object 
 13  Power Level      309 non-null    object 
 14  Stroke Style     309 non-null    object 
 15  Swing Speed      309 non-null    object 
 16  Racquet Colors   353 non-null    object 
 17  Grip Type        309 no

In [4]:
racquets.describe()

Unnamed: 0,racquet_rating,racquet_price,Swingweight,Balance:,Swingweight:,Stiffness:,Beam Width:,Composition:,Power Level:,Stroke Style:,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:
count,343.0,397.0,309.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,4.730612,198.241033,317.0,,,,,,,,,,,,
std,0.471989,87.496613,11.11364,,,,,,,,,,,,
min,1.0,13.95,270.0,,,,,,,,,,,,
25%,4.6,134.0,310.0,,,,,,,,,,,,
50%,4.9,199.0,318.0,,,,,,,,,,,,
75%,5.0,269.0,325.0,,,,,,,,,,,,
max,5.0,579.0,345.0,,,,,,,,,,,,


In [5]:
racquets.shape

(397, 37)

In [6]:
racquets.columns

Index(['racquet_img', 'racquet_name', 'racquet_rating', 'racquet_price',
       'racquet_desc', 'Head Size', 'Length', 'Strung Weight', 'Balance',
       'Swingweight', 'Stiffness', 'Beam Width', 'Composition', 'Power Level',
       'Stroke Style', 'Swing Speed', 'Racquet Colors', 'Grip Type',
       'String Pattern', 'String Tension', 'Balance:', 'Swingweight:',
       'Stiffness:', 'Beam Width:', 'Composition:', 'Power Level:',
       'Stroke Style:', 'Swing Speed:', 'Racquet Colors:', 'Grip Type:',
       'String Pattern:', 'String Tension:', 'Age', 'Weight', 'Height',
       'Other', 'Strung  Weight'],
      dtype='object')

In [7]:
racquets = racquets.reset_index().drop(columns = "index")

In [8]:
racquets

Unnamed: 0,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,Swingweight,...,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:,Age,Weight,Height,Other,Strung Weight.1
0,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 2025,4.8,289.00,The Pure Drive is popular for a reason. Boast...,100 in² / 645.16 cm²,27in / 68.58cm,11.2oz / 318g,12.99in / 32.99cm / 4 pts HL,317.0,...,,,,,,,,,,
1,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2025,4.5,299.00,Originally launched in 2019 under the VS moni...,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,326.0,...,,,,,,,,,,
2,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2-Pack 2025,5.0,579.00,This product is for 2 Pure Drive 98 racquets....,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,323.0,...,,,,,,,,,,
3,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Plus 2025,5.0,289.00,Babolat adds another chapter to one of the ga...,100 in² / 645.16 cm²,27.5in / 69.85cm,11.2oz / 318g,13in / 33.02cm / 6 pts HL,325.0,...,,,,,,,,,,
4,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Team 2025,5.0,269.00,The Pure Drive Team 2025 is defined by its us...,100 in² / 645.16 cm²,27in / 68.58cm,10.6oz / 301g,12.85in / 32.64cm / 5 pts HL,308.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD,4.8,229.99,"With the Blackout 300 XTD, Solinco takes the ...",100 in² / 645.16 cm²,27.5in / 69.85cm,11.3oz / 320g,12.8in / 32.51cm / 8 pts HL,328.0,...,,,,,,,,,,
393,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD+,5.0,229.99,"With the Blackout 300 XTD+, Solinco gives adv...",100 in² / 645.16 cm²,28in / 71.12cm,11.3oz / 320g,12.8in / 32.51cm / 10 pts HL,333.0,...,,,,,,,,,,
394,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23,4.5,199.00,Introducing the Lascoste L23! Following on th...,100 in² / 645.16 cm²,27in / 68.58cm,11.1oz / 315g,12.9in / 32.77cm / 5 pts HL,318.0,...,,,,,,,,,,
395,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23L,5.0,199.00,Lacoste makes impressive updates to the L23L ...,100 in² / 645.16 cm²,27in / 68.58cm,10.2oz / 289g,13.4in / 34.04cm / 1 pts HL,310.0,...,,,,,,,,,Stiffness: 68,


In [9]:
racquets.racquet_name[0].split(" ")

['Babolat', 'Pure', 'Drive', '2025']

In [10]:
racquets["racquet_brand"] = racquets["racquet_name"].apply(lambda x: x.split(" ")[0])

In [11]:
racquets["racquet_brand"].unique()

array(['Babolat', 'Wilson', 'Head', 'Yonex', 'Prince', 'Ektelon',
       'Tecnifibre', 'Dunlop', 'Volkl', 'ProKennex', 'Solinco', 'Lacoste'],
      dtype=object)

In [12]:
_new_column_order = ["racquet_brand"] + [col for col in racquets.columns if col != "racquet_brand"]


In [13]:
racquets = racquets[_new_column_order]; racquets.columns

Index(['racquet_brand', 'racquet_img', 'racquet_name', 'racquet_rating',
       'racquet_price', 'racquet_desc', 'Head Size', 'Length', 'Strung Weight',
       'Balance', 'Swingweight', 'Stiffness', 'Beam Width', 'Composition',
       'Power Level', 'Stroke Style', 'Swing Speed', 'Racquet Colors',
       'Grip Type', 'String Pattern', 'String Tension', 'Balance:',
       'Swingweight:', 'Stiffness:', 'Beam Width:', 'Composition:',
       'Power Level:', 'Stroke Style:', 'Swing Speed:', 'Racquet Colors:',
       'Grip Type:', 'String Pattern:', 'String Tension:', 'Age', 'Weight',
       'Height', 'Other', 'Strung  Weight'],
      dtype='object')

In [14]:
junior_racquets = racquets[racquets["racquet_name"].str.contains("Junior")]

In [15]:
junior_racquets

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:,Age,Weight,Height,Other,Strung Weight.1
42,Babolat,https://img.tennis-warehouse.com/watermark/rs....,"Babolat Pure Drive 26"" Junior",,129.00,Introducing the 2025 version of the Pure Driv...,100 in² / 645.16 cm²,26 in / 66.04 cm,,,...,,,,,,11+,,,,
43,Babolat,https://img.tennis-warehouse.com/watermark/rs....,"Babolat Pure Drive 26"" Junior Light Blue",5.0,129.00,"Offered in a light blue cosmetic, the Pure D...",100 in² / 645.16 cm²,26 in / 66.04 cm,,,...,,,,,,11+,,,,
44,Babolat,https://img.tennis-warehouse.com/watermark/rs....,"Babolat Pure Aero 25"" 2023 Junior",3.0,139.00,"Engineered for spin, power and precision, thi...",100 in² / 645.16 cm²,25 in / 63.50 cm,,,...,,,,,,9-10,,,,
45,Babolat,https://img.tennis-warehouse.com/watermark/rs....,"Babolat Pure Drive 25"" Junior",,129.00,Introducing the 2025 version of the Pure Driv...,98 in² / 632.26 cm²,25 in / 63.50 cm,,,...,,,,,,9-10,,,,
46,Babolat,https://img.tennis-warehouse.com/watermark/rs....,"Babolat Pure Drive 25"" Junior Light Blue",,129.00,"Offered in a light blue cosmetic, the Pure Dr...",98 in² / 632.26 cm²,25 in / 63.50 cm,,,...,,,,,,9-10,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300,Tecnifibre,https://img.tennis-warehouse.com/watermark/rs....,"Tecnifibre TFight Tour 25"" Junior",,109.00,"With the TFight Tour 25"" Junior Racquet, Tecn...",100 in² / 645.16 cm²,25 in / 63.50 cm,,,...,,,,,,8-10,,,,
320,Dunlop,https://img.tennis-warehouse.com/watermark/rs....,"Dunlop FX Junior 21""",,24.99,For the junior player that is looking to add ...,,,,,...,,,,,,,,,,
321,Dunlop,https://img.tennis-warehouse.com/watermark/rs....,"Dunlop FX Junior 25""",,24.99,For the junior player that is looking to add ...,,,,,...,,,,,,,,,,
322,Dunlop,https://img.tennis-warehouse.com/watermark/rs....,"Dunlop FX Junior 23""",,24.99,For the junior player that is looking to add ...,,,,,...,,,,,,,,,,


In [16]:
racquets[racquets["racquet_brand"] == "ProKennex"]

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:,Age,Weight,Height,Other,Strung Weight.1
359,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace Pro,5.0,249.0,"With roots that stretch back to the '80s, the...",97 in² / 625.81 cm²,27in / 68.58cm,11.4oz / 323g,13in / 33.02cm / 4 pts HL,...,,,,,,,,,,
360,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace (315),4.5,249.0,ProKennex adds another chapter to the heavies...,100 in² / 645.16 cm²,27in / 68.58cm,11.7oz / 332g,12.59in / 31.98cm / 7 pts HL,...,,,,,,,,,,
361,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace (300),5.0,249.0,"With roots that go back 40 years, the Black A...",100 in² / 645.16 cm²,27in / 68.58cm,11.2oz / 318g,12.99in / 32.99cm / 4 pts HL,...,,,,,,,,,,
362,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace (285),5.0,249.0,ProKennex adds another chapter to the lightes...,100 in² / 645.16 cm²,27in / 68.58cm,10.6oz / 301g,13.18in / 33.48cm / 3 pts HL,...,,,,,,,,,,
363,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace 105,4.7,249.0,"With the Black Ace 105, ProKennex updates the...",105 in² / 677.42 cm²,27.25in / 69.22cm,11.2oz / 318g,13in / 33.02cm / 5 pts HL,...,,,,,,,,,,
364,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace 300,4.9,199.0,With roots that extend back 40 years to the o...,100 in² / 645.16 cm²,27in / 68.58cm,11.2oz / 318g,12.99in / 32.99cm / 4 pts HL,...,,,,,,,,,,
365,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Black Ace 315,5.0,199.0,With roots that extend back 40 years to the o...,,,,,...,,,,,,,,,,
366,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Ki Q+ Tour,5.0,169.95,"For 2021, ProKennex updates the Ki Q+ Tour wi...",98 in² / 632.26 cm²,27in / 68.58cm,11.2oz / 318g,12.79in / 32.49cm / 6 pts HL,...,,,,,,,,,,
367,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Ki Q+ Tour Pro 315,3.3,169.95,ProKennex updates the Ki Q+ Tour Pro 315 with...,98 in² / 632.26 cm²,27in / 68.58cm,11.7oz / 332g,12.59in / 31.98cm / 7 pts HL,...,,,,,,,,,,
368,ProKennex,https://img.tennis-warehouse.com/watermark/rs....,ProKennex Ki Q+ 5 Pro,5.0,179.95,Updated with a new cosmetic along with a more...,100 in² / 645.16 cm²,27in / 68.58cm,11.7oz / 332g,12.59in / 31.98cm / 7 pts HL,...,,,,,,,,,,


In [17]:
racquets_no_junior = racquets[~racquets["racquet_name"].str.contains("Junior")]

In [18]:
racquets_no_junior

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:,Age,Weight,Height,Other,Strung Weight.1
0,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 2025,4.8,289.00,The Pure Drive is popular for a reason. Boast...,100 in² / 645.16 cm²,27in / 68.58cm,11.2oz / 318g,12.99in / 32.99cm / 4 pts HL,...,,,,,,,,,,
1,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2025,4.5,299.00,Originally launched in 2019 under the VS moni...,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,...,,,,,,,,,,
2,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2-Pack 2025,5.0,579.00,This product is for 2 Pure Drive 98 racquets....,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,...,,,,,,,,,,
3,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Plus 2025,5.0,289.00,Babolat adds another chapter to one of the ga...,100 in² / 645.16 cm²,27.5in / 69.85cm,11.2oz / 318g,13in / 33.02cm / 6 pts HL,...,,,,,,,,,,
4,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Team 2025,5.0,269.00,The Pure Drive Team 2025 is defined by its us...,100 in² / 645.16 cm²,27in / 68.58cm,10.6oz / 301g,12.85in / 32.64cm / 5 pts HL,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD,4.8,229.99,"With the Blackout 300 XTD, Solinco takes the ...",100 in² / 645.16 cm²,27.5in / 69.85cm,11.3oz / 320g,12.8in / 32.51cm / 8 pts HL,...,,,,,,,,,,
393,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD+,5.0,229.99,"With the Blackout 300 XTD+, Solinco gives adv...",100 in² / 645.16 cm²,28in / 71.12cm,11.3oz / 320g,12.8in / 32.51cm / 10 pts HL,...,,,,,,,,,,
394,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23,4.5,199.00,Introducing the Lascoste L23! Following on th...,100 in² / 645.16 cm²,27in / 68.58cm,11.1oz / 315g,12.9in / 32.77cm / 5 pts HL,...,,,,,,,,,,
395,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23L,5.0,199.00,Lacoste makes impressive updates to the L23L ...,100 in² / 645.16 cm²,27in / 68.58cm,10.2oz / 289g,13.4in / 34.04cm / 1 pts HL,...,,,,,,,,,Stiffness: 68,


In [19]:
racquets.shape[0] - junior_racquets.shape[0]

324

In [20]:
racquets_no_junior[racquets_no_junior["Head Size"].isna() == True]

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:,Age,Weight,Height,Other,Strung Weight.1
36,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat EVO Drive Lite W,5.0,129.0,Pre-strung for extra savings!Introducing the ...,,,,,...,,,,,,,,,,
39,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Boost Wimbledon,4.0,129.0,This racquet comes pre-strung for added conve...,,,,,...,,,,,,,,,,
124,Head,https://img.tennis-warehouse.com/watermark/rs....,Head Speed Pro,4.8,164.0,"Endorsed by Novak Djokovic, the Head Speed Pr...",,,,,...,,,,,,,,,,
125,Head,https://img.tennis-warehouse.com/watermark/rs....,Head Speed MP,4.6,164.0,The latest update to the Speed MP doubles dow...,,,,,...,,,,,,,,,,
136,Head,https://img.tennis-warehouse.com/watermark/rs....,Head IG Boom XCeed,4.0,109.0,Introducing the Head IG Boom XCeed Racquet! W...,,,,,...,,,,,,,,,,
152,Head,https://img.tennis-warehouse.com/watermark/rs....,Head Radical MP 2021,4.9,164.0,Head adds another chapter to one of their mos...,,,,,...,,,,,,,,,,
161,Head,https://img.tennis-warehouse.com/watermark/rs....,Head Extreme Elite 2024,5.0,174.0,"With the Extreme Elite 2024, Head puts the ea...",,,,,...,,,,,,,,,,
194,Yonex,https://img.tennis-warehouse.com/watermark/rs....,Yonex EZONE 115 (2025),5.0,230.0,Introducing the EZONE 115 (2025)! New to the ...,,,,,...,,,,,,,,,,
210,Yonex,https://img.tennis-warehouse.com/watermark/rs....,Yonex VCORE Feel Sand Beige,5.0,140.0,Pre-strung for extra savings.With the VCORE F...,,,,,...,,,,,,,,,,
211,Yonex,https://img.tennis-warehouse.com/watermark/rs....,Yonex VCORE Game Sand Beige,5.0,165.0,Pre-strung for extra savings!Introducing the ...,,,,,...,,,,,,,,,,


In [21]:
racquets[(racquets["racquet_brand"] == "Tecnifibre") & (racquets["racquet_name"].str.contains("TF40"))]

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Swing Speed:,Racquet Colors:,Grip Type:,String Pattern:,String Tension:,Age,Weight,Height,Other,Strung Weight.1
290,Tecnifibre,https://img.tennis-warehouse.com/watermark/rs....,Tecnifibre TF40 315g (16x19),5.0,279.0,"Updated with a new cosmetic, the TF40 (315g) ...",98 in² / 632.26 cm²,27in / 68.58cm,11.7oz / 332g,12.51in / 31.78cm / 8 pts HL,...,,,,,,,,,,
291,Tecnifibre,https://img.tennis-warehouse.com/watermark/rs....,Tecnifibre TF40 305g (16x19),4.8,279.0,Introducing the second generation of the TF40...,98 in² / 632.26 cm²,27in / 68.58cm,11.3oz / 320g,13.07in / 33.2cm / 3 pts HL,...,,,,,,,,,,
292,Tecnifibre,https://img.tennis-warehouse.com/watermark/rs....,Tecnifibre TF40 305g (18x20),4.4,279.0,"Boasting a new cosmetic for 2024, the TF40 (3...",98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.12in / 33.32cm / 3 pts HL,...,,,,,,,,,,
293,Tecnifibre,https://img.tennis-warehouse.com/watermark/rs....,Tecnifibre TF40 290g (16x19),3.0,249.0,Introducing the TF40 290g (16x19)! With this ...,98 in² / 632.26 cm²,27in / 68.58cm,10.7oz / 303g,13.07in / 33.2cm / 3 pts HL,...,,,,,,,,,,
294,Tecnifibre,https://img.tennis-warehouse.com/watermark/rs....,Tecnifibre TF40 305 (18x20),4.7,119.0,Tecnifibre adds another chapter to the TF40 3...,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.12in / 33.32cm / 3 pts HL,...,,,,,,,,,,


In [22]:
racquets_no_junior["String Tension"].isna().sum() / racquets_no_junior.shape[0]

np.float64(0.05555555555555555)

In [23]:
columns_to_drop = []

for column in racquets_no_junior.columns:
    if racquets_no_junior[column].isna().sum() == 0:
        pass
    elif racquets_no_junior[column].isna().sum() / racquets_no_junior.shape[0] > 0.95:
        columns_to_drop.append(column)
        print(f"{column} is {(racquets_no_junior[column].isna().sum()/racquets_no_junior.shape[0]).round(2)*100}% NAs.\
              \n Drop this column.")
    else:
        print(f"{column} is {(racquets_no_junior[column].isna().sum()/racquets_no_junior.shape[0]).round(2)*100}% NAs")

print(f"Drop the following columns: {columns_to_drop}")

racquet_rating is 2.0% NAs
Head Size is 5.0% NAs
Length is 5.0% NAs
Strung Weight is 5.0% NAs
Balance is 5.0% NAs
Swingweight is 5.0% NAs
Stiffness is 5.0% NAs
Beam Width is 5.0% NAs
Composition is 5.0% NAs
Power Level is 5.0% NAs
Stroke Style is 5.0% NAs
Swing Speed is 5.0% NAs
Racquet Colors is 5.0% NAs
Grip Type is 5.0% NAs
String Pattern is 5.0% NAs
String Tension is 6.0% NAs
Balance: is 100.0% NAs.              
 Drop this column.
Swingweight: is 100.0% NAs.              
 Drop this column.
Stiffness: is 100.0% NAs.              
 Drop this column.
Beam Width: is 100.0% NAs.              
 Drop this column.
Composition: is 100.0% NAs.              
 Drop this column.
Power Level: is 100.0% NAs.              
 Drop this column.
Stroke Style: is 100.0% NAs.              
 Drop this column.
Swing Speed: is 100.0% NAs.              
 Drop this column.
Racquet Colors: is 100.0% NAs.              
 Drop this column.
Grip Type: is 100.0% NAs.              
 Drop this column.
String Patte

In [24]:
no_junior_no_na_racquets = racquets_no_junior.drop(columns = columns_to_drop)

In [25]:
no_junior_no_na_racquets

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Stiffness,Beam Width,Composition,Power Level,Stroke Style,Swing Speed,Racquet Colors,Grip Type,String Pattern,String Tension
0,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 2025,4.8,289.00,The Pure Drive is popular for a reason. Boast...,100 in² / 645.16 cm²,27in / 68.58cm,11.2oz / 318g,12.99in / 32.99cm / 4 pts HL,...,69,23mm / 26mm / 23mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Blue,Babolat Syntec Pro,16 Mains / 19 CrossesMains skip,46-55 pounds
1,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2025,4.5,299.00,Originally launched in 2019 under the VS moni...,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,...,69,21mm / 23mm / 21mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Blue,Babolat Syntec Pro,16 Mains / 20 Crosses\n\n\nMains skip,46-55 pounds
2,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2-Pack 2025,5.0,579.00,This product is for 2 Pure Drive 98 racquets....,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,...,69,21mm / 23mm / 21mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Blue,Babolat Syntec Pro,16 Mains / 20 Crosses\n\n\nMains skip,46-55 pounds
3,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Plus 2025,5.0,289.00,Babolat adds another chapter to one of the ga...,100 in² / 645.16 cm²,27.5in / 69.85cm,11.2oz / 318g,13in / 33.02cm / 6 pts HL,...,69,23mm / 26mm / 23mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Blue,Babolat Syntec Pro,16 Mains / 19 CrossesMains skip,46-55 pounds
4,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Team 2025,5.0,269.00,The Pure Drive Team 2025 is defined by its us...,100 in² / 645.16 cm²,27in / 68.58cm,10.6oz / 301g,12.85in / 32.64cm / 5 pts HL,...,69,23mm / 26mm / 23mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Blue,Babolat Syntec Pro,16 Mains / 19 Crosses\n\n\nMains skip,44-53 pounds
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD,4.8,229.99,"With the Blackout 300 XTD, Solinco takes the ...",100 in² / 645.16 cm²,27.5in / 69.85cm,11.3oz / 320g,12.8in / 32.51cm / 8 pts HL,...,70,23.5mm / 26mm / 23mm,40T Carbon/Graphite,Low-Medium,Medium-Full,Medium-Fast,Black,Solinco Synthetic,16 Mains / 19 Crosses\n\n\nMains skip,50-60 pounds
393,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD+,5.0,229.99,"With the Blackout 300 XTD+, Solinco gives adv...",100 in² / 645.16 cm²,28in / 71.12cm,11.3oz / 320g,12.8in / 32.51cm / 10 pts HL,...,66,23.5mm / 26mm / 23mm,40T Carbon/Graphite,Low-Medium,Medium-Full,Medium-Fast,Black,Solinco Synthetic,16 Mains / 19 CrossesMains skip,50-60 pounds
394,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23,4.5,199.00,Introducing the Lascoste L23! Following on th...,100 in² / 645.16 cm²,27in / 68.58cm,11.1oz / 315g,12.9in / 32.77cm / 5 pts HL,...,69,23mm / 25mm / 23mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Green,Lacoste Synthetic,16 Mains / 19 Crosses\n\n\nMains skip,51-55 pounds
395,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23L,5.0,199.00,Lacoste makes impressive updates to the L23L ...,100 in² / 645.16 cm²,27in / 68.58cm,10.2oz / 289g,13.4in / 34.04cm / 1 pts HL,...,,23mm / 25mm / 23mm,Graphite,Low-Medium,Medium-Full,Medium-Fast,Green,Lacoste Synthetic,16 Mains / 19 Crosses\n\n\nMains skip,51-55 pounds


In [26]:
float(no_junior_no_na_racquets["Head Size"].str.replace("in²", "").str.replace("cm²", "").str.split("/")[0][0])


100.0

In [27]:
str_cleaned_racquets = no_junior_no_na_racquets.copy()

In [28]:
# str_cleaned_racquets["racquet_head_size_sq_in"] = str_cleaned_racquets["Head Size"].str.split("/").str[0]\
#     .str.replace("in²", "").str.replace("in", "").str.replace("sq", "").str.replace(".", "").astype(float)
    
#Rewrote the above line using regex with help from ChatGPT
str_cleaned_racquets["racquet_head_size_sq_in"] = (
    str_cleaned_racquets["Head Size"]\
    .str.extract(r"(\d+\.?\d*)\s*(?:in²|in|sq\s*in)")\
    .astype(float)
)

In [31]:
type(
    str_cleaned_racquets["Head Size"]\
    .str.extract(r"(\d+\.?\d*)\s*(?:in²|in|sq\s*in)")\
    .astype(float)
)

pandas.core.frame.DataFrame

In [29]:
str_cleaned_racquets[["Head Size","racquet_head_size_sq_in"]]



Unnamed: 0,Head Size,racquet_head_size_sq_in
0,100 in² / 645.16 cm²,100.0
1,98 in² / 632.26 cm²,98.0
2,98 in² / 632.26 cm²,98.0
3,100 in² / 645.16 cm²,100.0
4,100 in² / 645.16 cm²,100.0
...,...,...
392,100 in² / 645.16 cm²,100.0
393,100 in² / 645.16 cm²,100.0
394,100 in² / 645.16 cm²,100.0
395,100 in² / 645.16 cm²,100.0


In [32]:
# Testing reusability of regex code
str_cleaned_racquets["racquet_length_in"] = (
    str_cleaned_racquets["Length"]\
    .str.extract(r"(\d+\.?\d*)\s*(?:in²|in|sq\s*in)")\
    .astype(float)
)

In [33]:
str_cleaned_racquets["racquet_length_in"]

0      27.0
1      27.0
2      27.0
3      27.5
4      27.0
       ... 
392    27.5
393    28.0
394    27.0
395    27.0
396     NaN
Name: racquet_length_in, Length: 324, dtype: float64

In [None]:
str_cleaned_racquets

In [None]:
# Throws an error because of NAs
for row in str_cleaned_racquets["Beam Width"]:
    row.replace("mm", "")
    print(row)

In [None]:
#.str.replace("/", "").str.replace("mm", "").str.replace("  ", ",")

### Columns to process into quantitative features:
- Head Size - done
- Length - done
- Strung Weight - done
- Balance - done
    - Created two columns: racquet_balance_in and racquet_balance_HH_HL
- Stiffness - done
- Beam width - done
- String Pattern - done
- String Tension - done


In [None]:
str_columns = []

for col in str_cleaned_racquets.columns:
    if str_cleaned_racquets[col].dtype == "object" and "racquet_" not in col:
        str_columns.append(col)
    else:
        pass
    
str_columns 

#Keep Composition, Power Level, Stroke Style, Swing Speed, Racquet Colors, Grip Type

#Drop Head Size, Length, Strung Weight, Balance, Beam Width, String Pattern, String Tension

In [45]:
str_cleaned_racquets["racquet_strung_weight_oz"] = (
    str_cleaned_racquets["Strung Weight"]\
    .str.extract(r"(\d+\.?\d*)\s*")\
    .astype(float)
)

In [46]:
str_cleaned_racquets["racquet_strung_weight_oz"]

0      11.2
1      11.4
2      11.4
3      11.2
4      10.6
       ... 
392    11.3
393    11.3
394    11.1
395    10.2
396     NaN
Name: racquet_strung_weight_oz, Length: 324, dtype: float64

In [52]:
units = "oz | ounces"
regex_str = fr"(\d+\.?\d*)\s*(?:{units})"

str_cleaned_racquets["Strung Weight"]\
    .str.extract(regex_str)\
        .astype(float)

Unnamed: 0,0
0,11.2
1,11.4
2,11.4
3,11.2
4,10.6
...,...
392,11.3
393,11.3
394,11.1
395,10.2


In [None]:
    str_cleaned_racquets["Length"]\
    .str.extract(r"(\d+\.?\d*)\s*(?:in²|in|sq\s*in)")\
    .astype(float)

In [None]:
str_cleaned_racquets

In [None]:
str_cleaned_racquets["racquet_balance_in"] = (
    str_cleaned_racquets["Balance"]\
    .str.extract(r"(\d+(?:\.\d+)?)\s*in\b").
    astype(float)
)

In [53]:
#Made with help from ChatGPT

# Extract number and label separately
extracted = str_cleaned_racquets["Balance"].str.extract(
    r'(\d+(?:\.\d+)?)\s*(?:pts\s*)?(HL|HH|EB)\b'
)

# Rename columns for clarity
extracted.columns = ['value', 'label']

# Convert number to float
extracted['value'] = extracted['value'].astype(float)

# Apply sign logic: HL → positive, HH → negative, EB → 0
def apply_balance_sign(row):
    if row['label'] == 'HL':
        return row['value']
    elif row['label'] == 'HH':
        return -row['value']
    elif row['label'] == 'EB':
        return 0.0
    return None  # In case label is missing

# Assign the final calculated value to new column
str_cleaned_racquets['racquet_balance_HH_HL'] = extracted.apply(apply_balance_sign, axis=1)

In [63]:
type(extracted.iloc[0])

pandas.core.series.Series

In [57]:
str_cleaned_racquets["racquet_balance_HH_HL"]

0       4.0
1       3.0
2       3.0
3       6.0
4       5.0
       ... 
392     8.0
393    10.0
394     5.0
395     1.0
396     NaN
Name: racquet_balance_HH_HL, Length: 324, dtype: float64

In [38]:
str_cleaned_racquets["racquet_balance_in"]

0      12.99
1      13.18
2      13.18
3      13.00
4      12.85
       ...  
392    12.80
393    12.80
394    12.90
395    13.40
396      NaN
Name: racquet_balance_in, Length: 324, dtype: float64

In [None]:
str_cleaned_racquets['Stiffness'] = str_cleaned_racquets['Stiffness'].replace('N/A (very low)', np.nan)

str_cleaned_racquets["Stiffness"] = str_cleaned_racquets["Stiffness"].astype(float)

str_cleaned_racquets.rename(columns = {"Stiffness":"racquet_stiffness"}, inplace = True)

str_cleaned_racquets["racquet_stiffness"]


In [None]:
# Testing ChatGPT's code

str_cleaned_racquets["Beam Width"][0]

In [None]:
# Testing ChatGPT's code

(23 + 26 + 23)/3

In [None]:
# Testing ChatGPT's code
test_part = str_cleaned_racquets["Beam Width"][0].split("/")

test_number = [float(part.strip().replace("mm", "")) for part in test_part]

sum(test_number) / len(test_number)

In [None]:
# Since, in general, wider beams relate to stiffer and more powerful frames, 
# we can use the average beam measurement as a proxy for the multiple beam width measurements. 

# Written with help from ChatGPT
def average_beam_width(value):
    if isinstance(value, str):
        parts = value.split('/')
        numbers = []
        for part in parts:
            cleaned = part.strip().replace('mm', '')
            if cleaned:  # only proceed if not empty
                try:
                    numbers.append(float(cleaned))
                except ValueError:
                    # skip parts that can't be converted, or log here
                    pass
        if numbers:
            return sum(numbers) / len(numbers)
        else:
            return float('nan')  # no valid numbers found
    else:
        return float('nan')

# Apply the function to create a new column
str_cleaned_racquets["racquet_avg_beam_width"] = str_cleaned_racquets["Beam Width"].apply(average_beam_width)

str_cleaned_racquets[["Beam Width", "racquet_avg_beam_width"]]

In [81]:
# Written with help from ChatGPT

def extract_mains_crosses(value):
    mains = np.nan
    crosses = np.nan
    
    if isinstance(value, str) and value.strip():
        
        # Try to extract mains
        mains_match = re.search(r'(\d+)\s*Mains', value, re.IGNORECASE)
        crosses_match = re.search(r'(\d+)\s*Crosses', value, re.IGNORECASE)
        
        if mains_match:
            mains = float(mains_match.group(1))
        if crosses_match:
            crosses = float(crosses_match.group(1))
    
    return pd.Series([mains, crosses])

# Apply function and create two new columns
str_cleaned_racquets[['racquet_mains', 'racquet_crosses']] = str_cleaned_racquets['String Pattern'].apply(extract_mains_crosses)

str_cleaned_racquets[["String Pattern", "racquet_mains", "racquet_crosses"]]

Unnamed: 0,String Pattern,racquet_mains,racquet_crosses
0,16 Mains / 19 CrossesMains skip,16.0,19.0
1,16 Mains / 20 Crosses\n\n\nMains skip,16.0,20.0
2,16 Mains / 20 Crosses\n\n\nMains skip,16.0,20.0
3,16 Mains / 19 CrossesMains skip,16.0,19.0
4,16 Mains / 19 Crosses\n\n\nMains skip,16.0,19.0
...,...,...,...
392,16 Mains / 19 Crosses\n\n\nMains skip,16.0,19.0
393,16 Mains / 19 CrossesMains skip,16.0,19.0
394,16 Mains / 19 Crosses\n\n\nMains skip,16.0,19.0
395,16 Mains / 19 Crosses\n\n\nMains skip,16.0,19.0


In [80]:
str_cleaned_racquets["String Pattern"][0]

m_match = re.search(r'(\d+)\s*Mains', 
                    str_cleaned_racquets["String Pattern"][0], 
                    re.IGNORECASE)

m_match.group(1)

'16'

In [None]:
# Written with help from ChatGPT

def extract_tension_bounds(value):
    lower = np.nan
    upper = np.nan
    
    if isinstance(value, str) and value.strip():
        # Use regex to extract two numbers separated by '-'
        match = re.search(r'(\d+)\s*-\s*(\d+)', value)
        if match:
            lower = float(match.group(1))
            upper = float(match.group(2))
    
    return pd.Series([lower, upper])

# Apply function to create new columns
str_cleaned_racquets[['racquet_tension_lower', 'racquet_tension_upper']] = str_cleaned_racquets['String Tension'].apply(extract_tension_bounds)

str_cleaned_racquets[["String Tension", "racquet_tension_lower", "racquet_tension_upper"]]

In [None]:
str_cleaned_racquets

In [83]:
trimmed_racquets = str_cleaned_racquets.copy()

In [84]:
trimmed_racquets

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Head Size,Length,Strung Weight,Balance,...,Grip Type,String Pattern,String Tension,racquet_head_size_sq_in,racquet_length_in,racquet_strung_weight_oz,racquet_balance_in,racquet_balance_HH_HL,racquet_mains,racquet_crosses
0,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 2025,4.8,289.00,The Pure Drive is popular for a reason. Boast...,100 in² / 645.16 cm²,27in / 68.58cm,11.2oz / 318g,12.99in / 32.99cm / 4 pts HL,...,Babolat Syntec Pro,16 Mains / 19 CrossesMains skip,46-55 pounds,100.0,27.0,11.2,12.99,4.0,16.0,19.0
1,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2025,4.5,299.00,Originally launched in 2019 under the VS moni...,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,...,Babolat Syntec Pro,16 Mains / 20 Crosses\n\n\nMains skip,46-55 pounds,98.0,27.0,11.4,13.18,3.0,16.0,20.0
2,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2-Pack 2025,5.0,579.00,This product is for 2 Pure Drive 98 racquets....,98 in² / 632.26 cm²,27in / 68.58cm,11.4oz / 323g,13.18in / 33.48cm / 3 pts HL,...,Babolat Syntec Pro,16 Mains / 20 Crosses\n\n\nMains skip,46-55 pounds,98.0,27.0,11.4,13.18,3.0,16.0,20.0
3,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Plus 2025,5.0,289.00,Babolat adds another chapter to one of the ga...,100 in² / 645.16 cm²,27.5in / 69.85cm,11.2oz / 318g,13in / 33.02cm / 6 pts HL,...,Babolat Syntec Pro,16 Mains / 19 CrossesMains skip,46-55 pounds,100.0,27.5,11.2,13.00,6.0,16.0,19.0
4,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Team 2025,5.0,269.00,The Pure Drive Team 2025 is defined by its us...,100 in² / 645.16 cm²,27in / 68.58cm,10.6oz / 301g,12.85in / 32.64cm / 5 pts HL,...,Babolat Syntec Pro,16 Mains / 19 Crosses\n\n\nMains skip,44-53 pounds,100.0,27.0,10.6,12.85,5.0,16.0,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD,4.8,229.99,"With the Blackout 300 XTD, Solinco takes the ...",100 in² / 645.16 cm²,27.5in / 69.85cm,11.3oz / 320g,12.8in / 32.51cm / 8 pts HL,...,Solinco Synthetic,16 Mains / 19 Crosses\n\n\nMains skip,50-60 pounds,100.0,27.5,11.3,12.80,8.0,16.0,19.0
393,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD+,5.0,229.99,"With the Blackout 300 XTD+, Solinco gives adv...",100 in² / 645.16 cm²,28in / 71.12cm,11.3oz / 320g,12.8in / 32.51cm / 10 pts HL,...,Solinco Synthetic,16 Mains / 19 CrossesMains skip,50-60 pounds,100.0,28.0,11.3,12.80,10.0,16.0,19.0
394,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23,4.5,199.00,Introducing the Lascoste L23! Following on th...,100 in² / 645.16 cm²,27in / 68.58cm,11.1oz / 315g,12.9in / 32.77cm / 5 pts HL,...,Lacoste Synthetic,16 Mains / 19 Crosses\n\n\nMains skip,51-55 pounds,100.0,27.0,11.1,12.90,5.0,16.0,19.0
395,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23L,5.0,199.00,Lacoste makes impressive updates to the L23L ...,100 in² / 645.16 cm²,27in / 68.58cm,10.2oz / 289g,13.4in / 34.04cm / 1 pts HL,...,Lacoste Synthetic,16 Mains / 19 Crosses\n\n\nMains skip,51-55 pounds,100.0,27.0,10.2,13.40,1.0,16.0,19.0


In [85]:
#Drop Head Size, Length, Strung Weight, Balance, Beam Width, String Pattern, String Tension

trimmed_racquets.drop(columns = ["Head Size", "Length", "Strung Weight", "Balance", "Beam Width", "String Pattern", "String Tension"], inplace = True)

trimmed_racquets

Unnamed: 0,racquet_brand,racquet_img,racquet_name,racquet_rating,racquet_price,racquet_desc,Swingweight,Stiffness,Composition,Power Level,...,Swing Speed,Racquet Colors,Grip Type,racquet_head_size_sq_in,racquet_length_in,racquet_strung_weight_oz,racquet_balance_in,racquet_balance_HH_HL,racquet_mains,racquet_crosses
0,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 2025,4.8,289.00,The Pure Drive is popular for a reason. Boast...,317.0,69,Graphite,Low-Medium,...,Medium-Fast,Blue,Babolat Syntec Pro,100.0,27.0,11.2,12.99,4.0,16.0,19.0
1,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2025,4.5,299.00,Originally launched in 2019 under the VS moni...,326.0,69,Graphite,Low-Medium,...,Medium-Fast,Blue,Babolat Syntec Pro,98.0,27.0,11.4,13.18,3.0,16.0,20.0
2,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive 98 2-Pack 2025,5.0,579.00,This product is for 2 Pure Drive 98 racquets....,323.0,69,Graphite,Low-Medium,...,Medium-Fast,Blue,Babolat Syntec Pro,98.0,27.0,11.4,13.18,3.0,16.0,20.0
3,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Plus 2025,5.0,289.00,Babolat adds another chapter to one of the ga...,325.0,69,Graphite,Low-Medium,...,Medium-Fast,Blue,Babolat Syntec Pro,100.0,27.5,11.2,13.00,6.0,16.0,19.0
4,Babolat,https://img.tennis-warehouse.com/watermark/rs....,Babolat Pure Drive Team 2025,5.0,269.00,The Pure Drive Team 2025 is defined by its us...,308.0,69,Graphite,Low-Medium,...,Medium-Fast,Blue,Babolat Syntec Pro,100.0,27.0,10.6,12.85,5.0,16.0,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD,4.8,229.99,"With the Blackout 300 XTD, Solinco takes the ...",328.0,70,40T Carbon/Graphite,Low-Medium,...,Medium-Fast,Black,Solinco Synthetic,100.0,27.5,11.3,12.80,8.0,16.0,19.0
393,Solinco,https://img.tennis-warehouse.com/watermark/rs....,Solinco Blackout 300 XTD+,5.0,229.99,"With the Blackout 300 XTD+, Solinco gives adv...",333.0,66,40T Carbon/Graphite,Low-Medium,...,Medium-Fast,Black,Solinco Synthetic,100.0,28.0,11.3,12.80,10.0,16.0,19.0
394,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23,4.5,199.00,Introducing the Lascoste L23! Following on th...,318.0,69,Graphite,Low-Medium,...,Medium-Fast,Green,Lacoste Synthetic,100.0,27.0,11.1,12.90,5.0,16.0,19.0
395,Lacoste,https://img.tennis-warehouse.com/watermark/rs....,Lacoste L23L,5.0,199.00,Lacoste makes impressive updates to the L23L ...,310.0,,Graphite,Low-Medium,...,Medium-Fast,Green,Lacoste Synthetic,100.0,27.0,10.2,13.40,1.0,16.0,19.0


In [None]:
# Standardize column names
trimmed_racquets.rename(columns = {"Swingweight":"racquet_swingweight", 
                                   "Composition":"racquet_composition", 
                                   "Power Level":"racquet_power",
                                   "Stroke Style":"racquet_stroke_style", 
                                   "Swing Speed": "racquet_swing_speed", 
                                   "Racquet Colors": "racquet_colors",
                                   "Grip Type":"racquet_grip_type"}, inplace = True)

trimmed_racquets.columns

In [None]:
text_aggregation_df = trimmed_racquets.copy()

In [None]:
cols_to_drop = []

for col in text_aggregation_df.columns:
    if text_aggregation_df[col].dtype != "object":
        print(f"{col} is not a string. Added to cols_to_drop.")
        cols_to_drop.append(col)
    else:
        print(f"{col} is a string. Keep in df.")
        
cols_to_drop

In [None]:
text_aggregation_df.drop(columns = cols_to_drop, inplace = True)

text_aggregation_df.columns

In [None]:
#We don't need the racquet_brand or racquet_img columns either

text_aggregation_df.drop(columns = ["racquet_brand", "racquet_img"], inplace = True)

In [None]:
text_aggregation_df["combined_text"] = " "

In [None]:
print(text_aggregation_df[0:5])

In [None]:
def combine_text(row):
    fields = [
        f"Name: {row['racquet_name']}",
        f"Composition: {row['racquet_composition']}",
        f"Power: {row['racquet_power']}",
        f"Stroke Style: {row['racquet_stroke_style']}",
        f"Swing Speed: {row['racquet_swing_speed']}",
        f"Colors: {row['racquet_colors']}",
        f"Grip Type: {row['racquet_grip_type']}",
        f"Description: {row['racquet_desc']}"
    ]
    return "; ".join([str(f) for f in fields if pd.notnull(f)])

text_aggregation_df["combined_text"] = text_aggregation_df.apply(combine_text, axis=1)


In [None]:
text_aggregation_df["combined_text"] = text_aggregation_df["combined_text"].str.replace("\n", " ")

In [None]:
text_aggregation_df["combined_text"][0]