In [6]:
import requests
import pandas as pd
import re
from bs4 import BeautifulSoup

url = 'https://www.bestbuy.com/site/mobile-cell-phones/all-cell-phones/pcmcat1625163553254.c?id=pcmcat1625163553254'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

links = soup.find_all('h4', class_='sku-title')
phones = soup.find_all('div', class_='c-ratings-reviews')

data = []
for link, phone in zip(links, phones):
    title = link.get_text(strip=True)
    href = link.find('a')['href']
    reviews_info = phone.find('p').get_text(strip=True)

    rating_match = re.search(r'Rating (\d+\.\d+)', reviews_info)
    rating = rating_match.group(1) if rating_match else None

    reviews_match = re.search(r'(\d+) reviews?$', reviews_info)
    reviews = reviews_match.group(1) if reviews_match else None

    brand = title.split('-')[0].strip()
    product_title = '-'.join(title.split('-')[1:]).strip()

    data.append({'Brand': brand, 'Title': product_title, 'Link': href, 'Rating': rating, 'Reviews': reviews})

df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('mobile_phones_data.csv', index=False)


In [7]:
df

Unnamed: 0,Brand,Title,Link,Rating,Reviews
0,Tracfone,Samsung Galaxy A12 32GB Prepaid - Black,/site/tracfone-samsung-galaxy-a12-32gb-prepaid...,4.6,41
1,Motorola,Moto G 5G 2023 128GB (Unlocked) - Ink Blue,/site/motorola-moto-g-5g-2023-128gb-unlocked-i...,3.7,11
2,Samsung,Galaxy S22 Ultra 128GB - Phantom Black (T-Mobile),/site/samsung-galaxy-s22-ultra-128gb-phantom-b...,4.7,54
3,Google,Pixel 6a 128GB (Unlocked) - Charcoal,/site/google-pixel-6a-128gb-unlocked-charcoal/...,4.4,1316
4,Google,Pixel 7 128GB (Unlocked) - Obsidian,/site/google-pixel-7-128gb-unlocked-obsidian/6...,4.6,1378
5,Lively™,Jitterbug Smart3 Smartphone for Seniors - Black,/site/lively-jitterbug-smart3-smartphone-for-s...,4.2,925
6,Apple,Pre-Owned iPhone XR 64GB (Unlocked) - Black,/site/apple-pre-owned-iphone-xr-64gb-unlocked-...,4.2,1002
7,Apple,Pre-Owned iPhone SE (2020) 64GB (Unlocked) - B...,/site/apple-pre-owned-iphone-se-2020-64gb-unlo...,4.1,220
8,Apple,Pre-Owned iPhone 11 64GB (Unlocked) - Black,/site/apple-pre-owned-iphone-11-64gb-unlocked-...,4.1,308
9,Google,Pixel 7a 5G 128GB (Unlocked) - Charcoal,/site/google-pixel-7a-5g-128gb-unlocked-charco...,4.6,198


In [8]:
# Summary statistics
print(df.describe())


         Brand                                    Title  \
count       18                                       18   
unique       7                                       18   
top     Google  Samsung Galaxy A12 32GB Prepaid - Black   
freq         5                                        1   

                                                     Link Rating Reviews  
count                                                  18     18      18  
unique                                                 18     10      18  
top     /site/tracfone-samsung-galaxy-a12-32gb-prepaid...    4.6      41  
freq                                                    1      5       1  


In [None]:
import matplotlib.pyplot as plt

# Plot the distribution of ratings
plt.figure(figsize=(8, 6))
plt.hist(df['Rating'], bins=10, edgecolor='black')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.title('Distribution of Ratings')
plt.show()