<a href="https://colab.research.google.com/github/ryan-saloma/bike-market-summit/blob/main/Bike_Market_Summit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
%pip install -U googlemaps

import googlemaps as gm
import requests
import pandas as pd
import plotly
import plotly.express as px

gmaps = gm.Client(key='XXXXXXXXXX')

# First get location of Summit, NJ
def get_coordinates_from_address(address, api_key):
    url = 'https://maps.googleapis.com/maps/api/geocode/json'
    params = {
        'address': address,
        'key': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        results = response.json().get('results')
        if results:
            location = results[0]['geometry']['location']
            return location['lat'], location['lng']
        else:
            print("No results found for the given city name.")
            return None
    else:
        print(f"Error: {response.status_code}")
        return None

summit_trek_coords = \
get_coordinates_from_address('348 Springfield Ave, Summit, NJ 07901', gmaps.key)

# How far would someone travel for a bike?
# According to a bike forum, some people are willing to drive 2+ hours
# source: https://www.bikeforums.net/road-cycling/947577-how-far-you-willing-drive-buy-new-bike.html
#
# According to one study, people typically don't travel more than 20 min
# for routine purchases. This suggests the market for bikes and the market for
# bike accessories are different, as people will travel further for expensive
# purchases and unique experiences (classes, etc.)
#
# source: https://blog.accessdevelopment.com/research-how-far-will-consumers-travel-to-make-routine-purchases
#
# TODO: work the other direction and figure out how large an area you'd need to
# reach to meet sales goals

bike_stores = gmaps.places(query='bike store', \
                                 location=summit_trek_coords,radius=10000, \
                                 type='bicycle_store')
# Get results as a list of dicts
results = bike_stores['results']

# Perform list comprehension to extract keys of interest
keys_of_interest = ['name', 'rating', 'user_ratings_total', 'geometry']
stores_df = pd.DataFrame(results)
stores_df

# Get coords of competing stores
coords_competition = pd.DataFrame()
coords_competition['Name'] = stores_df['name']
coords_competition['Latitude'] = stores_df['geometry'].apply(lambda x: x['location']['lat'])
coords_competition['Longitude'] = stores_df['geometry'].apply(lambda x: x['location']['lng'])
coords_competition['Rating'] = stores_df['rating']
coords_competition['Number of Ratings'] = stores_df['user_ratings_total']
coords_competition

coords_competition.dropna(
    axis=0,
    how='any',
    subset=None,
    inplace=True
)

color_scale = [(0, 'orange'), (1,'red')]

fig = px.scatter_mapbox(coords_competition,
                        lat="Latitude",
                        lon="Longitude",
                        hover_name="Name",
                        hover_data=["Name", "Rating"],
                        color="Rating",
                        color_continuous_scale=color_scale,
                        size="Number of Ratings",
                        zoom=8,
                        height=800,
                        width=800)

fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
plotly.io.write_html(fig, "competition_map.html")


## Market Analysis

Sources:
- https://www.businessnewsdaily.com/15751-conduct-market-analysis.html
- https://www.sba.gov/business-guide/plan-your-business/market-research-competitive-analysis

Who are my potential customers?
What are my customers' buying habits?
How large is my target market?
How much are customers willing to pay for my product?
Who are my main competitors?
What are my competitors' strengths and weaknesses?

To-Do List
- List products sold at Trek Store, list products made by Trek, disjoint set
- Get reviews from competitors, create word cloud

In [99]:
%pip install selenium
%pip install bs4

def extract_links_from_page(driver):
    # Extract all the links on the current page
    links = driver.find_elements(By.TAG_NAME, 'a')
    return [link.get_attribute('href') for link in links if link.get_attribute('href')]

def get_product_name_from_page(driver):
    # Extract all the links on the current page
    try:
        # Extract all elements with the class name 'buying-zone__title'
        product_elements = driver.find_element(By.CLASS_NAME, 'buying-zone__title')

        # Extract text from each element and return as a list
        product_names = [element.text for element in product_elements]

        return product_names
    except:
        # Handle the case where the element is not found
        print("No product names found on this page.")
        return []

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.headless = True
driver = webdriver.Chrome(chrome_options)
driver.get('https://www.trekbikes.com/us/en_US/bikes/c/B100/')
all_links = []

# This doesn't always work as expected
# Except ~2200 links
while True:
    # Extract links from the current page
    all_links.extend(extract_links_from_page(driver))

    try:
        # Wait for the "Next" button to be clickable
        next_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, 'search-page-next'))
        )

        # Check if the "Next" button is not disabled
        if 'disabled' not in next_button.get_attribute('class'):
            next_button.click()
        else:
            print('Next button is disabled')
            break
    except:
        print('No next button found')
        break

unique_links = set(all_links)
links_series = pd.Series(list(unique_links))
links_series.to_csv('links_to_bikes.csv')


# Name: <h1 qaid="product-name" class="buying-zone__title"></h1>
# Price: <span qaid="actual-price" class="actual-price"></span>
# Description: <p qaid="product-positioning-statement"></p>
# Rating: <div aria-hidden="true" class="pr-snippet-rating-decimal">0.0</div>
# <a href=""" class="pr-snippet-write-review-link pr-underline"
# rel="nofollow">Be the first to write a review!</a>
# Model No.: <span qaid="product-sku" class="buying-zone__code text-xsmall-xstrong">Model 5280576</span>

# <div class="pdp-product-attributes-container" render-stock-status="true">

# Colors: <div class="attribute-color"> ...
# <label data-v-6c68cf30="" qaid="color-swatch-label-blue"
# for="swatches-swatch-blue" title="blue"><span data-v-6c68cf30=""
# class="sr-only">blue swatch</span>

# Size: <button data-v-717a25be="" data-v-c852d3ce=""
# data-v-af68ddea="" type="button" qaid="product-attribute-button-bike_sizeframe_47"
# class="product-attribute-btn button button--secondary button--sm selected unavailable"
# attribute="[object Object],[object Object],[object Object],[object Object],[object Object],
# [object Object],[object Object],[object Object]"><!----> <span data-v-717a25be="">
# 47 </span></button>

product_list = []
reduced_series = links_series.iloc[74:]
for url in reduced_series:
    try:
        driver.get(url)
        product = get_product_name_from_page(driver)
        if product:
            product_list.append(product)  # Extract and append the text content
        else:
            product_list.append("No title found")  # Handle the case where the product title is not found
    except requests.RequestException as e:
        print(f"Request failed for URL {url}: {e}")
        product_list.append("Request failed")

# Print the extracted product titles
for product in product_list:
    print(product)

tmp = pd.Series(product_list)
tmp[tmp != 'No title found']
tmp.to_csv('Bikes.csv')
