In [55]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [56]:
base_url = 'https://www.ibm.com'

In [73]:
res = requests.get(f'{base_url}/cloud/products')
base_soup = BeautifulSoup(res.text, 'html.parser')

In [74]:
cat_services_mapping_soups = []
band_soups = base_soup.find_all('div', {'class': 'ibm-band'}, id=True)
i = 0
while i < len(band_soups) - 1:
    cat_services_mapping_soups.append((band_soups[i], band_soups[i+1]))
    i += 2

In [88]:
ibm_services = []

for cat_soup, services_soup in cat_services_mapping_soups:
    cat_name = cat_soup.find('h2').text.strip()
    cat_desc_soup = cat_soup.find('p')
    cat_description = cat_desc_soup.text.strip()

    cat_link_soup = cat_desc_soup.find('a')
    
    if cat_link_soup:
        cat_link = f"{base_url}{cat_link_soup['href']}"
        learn_more_idx = cat_description.index(cat_link_soup.text.strip())
        cat_description = cat_description[:learn_more_idx]

    
    print('Getting service metadata from IBM category: ', cat_name)
    
    for card_soup in services_soup.find_all('div', {'class': 'ibm-card'}):
        service_link_soup = card_soup.find('a')
        service_name = service_link_soup.text.strip()

        href = service_link_soup['href']
        if href.startswith('http'):
            print(href)
            service_link = href
        else:   
            service_link = f"{base_url}{href}"
        
        
        service_page_soup = BeautifulSoup(requests.get(service_link).text, 'html.parser')
        try:
            service_short_desc = service_page_soup.find('meta', {'name': 'description'})['content'].strip()
        except:
            continue
        try:
            
            service_long_desc = service_page_soup.find(
                'section', {'class': 'product-overview-summary-section'}
            ).text.strip().split('\n\n')[-1]
        except:
            service_long_desc = service_short_desc

        ibm_services.append({        
            'category_name': cat_name,
            'category_description': cat_description,
            'category_link': cat_link,
            'name': service_name,
            'short_description': service_short_desc,
            'long_description': service_long_desc,
            'uri': service_link
        })

ibm_df = pd.DataFrame(ibm_services)

Getting service metadata from IBM category:  Compute
Getting service metadata from IBM category:  Network
Getting service metadata from IBM category:  Storage
Getting service metadata from IBM category:  Management
https://console.bluemix.net/catalog/services/availability-monitoring
https://console.bluemix.net/catalog/services/cloud-automation-manager
https://console.bluemix.net/catalog/services/workload-scheduler
https://console.bluemix.net/docs/services/Monitoring-with-Sysdig/
Getting service metadata from IBM category:  Security
Getting service metadata from IBM category:  Databases
Getting service metadata from IBM category:  Analytics
Getting service metadata from IBM category:  AI
https://www.ibm.com/watson/ibm-salesforce/
https://www.ibm.com/cloud/watson-knowledge-studio
Getting service metadata from IBM category:  IoT
https://console.bluemix.net/catalog/services/weather-company-data
Getting service metadata from IBM category:  Mobile
Getting service metadata from IBM category: 

In [89]:
ibm_df.head()

Unnamed: 0,category_description,category_link,category_name,long_description,name,short_description,uri
0,"From bare metal servers to serverless compute,...",https://www.ibm.com/cloud/compute,Compute,Bare metal servers provide customers sole acc...,Bare metal servers,"Bare metal servers are dedicated, IBM high-per...",https://www.ibm.com/cloud/bare-metal-servers
1,"From bare metal servers to serverless compute,...",https://www.ibm.com/cloud/compute,Compute,"When you create a virtual server, you can choo...",Cloud Virtual Servers,IBM Cloud Virtual Servers are public and dedic...,https://www.ibm.com/cloud/virtual-servers
2,"From bare metal servers to serverless compute,...",https://www.ibm.com/cloud/compute,Compute,You know your mission-critical storage needs ...,Mass Storage Servers,Build your own SAN/NAS environments with the c...,https://www.ibm.com/cloud/bare-metal-servers/m...
3,"From bare metal servers to serverless compute,...",https://www.ibm.com/cloud/compute,Compute,IBM bare metal servers are certified by SAP f...,SAP-Certified Infrastructure,IBM IaaS for SAP offers some of the largest an...,https://www.ibm.com/cloud/sap/certified-infras...
4,"From bare metal servers to serverless compute,...",https://www.ibm.com/cloud/compute,Compute,Store and distribute container images in a fu...,Container Registry,IBM Container Registry enables you to store an...,https://www.ibm.com/cloud/container-registry


In [4]:
from enum import Enum
class Cloud(Enum):
    AWS = 'AWS'
    AZURE = 'AZURE'
    DIGITALOCEAN = 'DIGITALOCEAN'
    GOOGLE = 'GOOGLE'
    IBM = 'IBM'
    ORACLE = 'ORACLE'
    

In [9]:
[c for c in Cloud]

[<Cloud.AWS: 'AWS'>,
 <Cloud.AZURE: 'AZURE'>,
 <Cloud.DIGITALOCEAN: 'DIGITALOCEAN'>,
 <Cloud.GOOGLE: 'GOOGLE'>,
 <Cloud.IBM: 'IBM'>,
 <Cloud.ORACLE: 'ORACLE'>]