In [1]:
import requests
from bs4 import BeautifulSoup
import pickle
import re

In [2]:
# Scrapes transcript data 
def extract_claims_text(url):
    response = requests.get(url)
    if response.status_code != 200:
        print("Failed to retrieve data from:", url)
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    claims_elements = soup.find_all('div', {'class': 'claim-text'})
    claims_texts = [claim_element.get_text() for claim_element in claims_elements]
    combined_text = ' '.join(claims_texts)
    
    # Use regex to split claims based on the numbered pattern
    claims = re.split(r'\b\d+\.\s+', combined_text)
    
    # Remove empty strings from the list
    claims = [claim.strip() for claim in claims if claim.strip()]
    
    return claims
    #return claims_texts

In [3]:
patent_urls = [
    'https://patents.google.com/patent/GB2478972A/en?q=(phone)&oq=phone',
    'https://patents.google.com/patent/US9634864B2/en?oq=US9634864B2',
    'https://patents.google.com/patent/US9980046B2/en?oq=US9980046B2'
]

In [4]:
all_claims = {}
for i, url in enumerate(patent_urls, start=1):
    claims = extract_claims_text(url)
    all_claims[i] = claims

In [5]:
all_claims

{1: ['CLAIMS1. A wireless telephone apparatus comprising: a handset; an onioff-hook switch; a wireless communications module for establishing first and second cellular telephone calls via a base station; and means for generating an explicit call transfer command for sending to the base station in response to activation of the on-hook switch when the first and second wireless calls are established through the apparatus.',
  'The apparatus of claim 1, ftirther comprising a body having a cradle for the handset, wherein the onloff hook switch operates in response to placing the handset in the cradle.',
  'The apparatus of claim 1, 2 or 3, ftirther comprising: call receiving means for receiving a first call from a calling party; call initiating means for entering a call initiation mode, in response to activation of a first predetermined button, for initiating a second call; and transfer means for putting the first call on hold, initiating the second call, and toggling, in response to activa

In [6]:
with open('all_claims.pkl', 'wb') as f:
    pickle.dump(all_claims, f)