In [14]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote

def extract_all_syndrome_names(html):
    # Parse the HTML content
    soup = html
    
    # Find all 'td' elements with the class 'pad-right'
    syndrome_td = soup.find_all('td', class_='pad-right')
    
    # Extract the text inside these 'td' elements
    syndrome_names = [td.get_text(strip=True) for td in syndrome_td]
    
    # Filter and return all occurrences of the syndrome name
    syndrome_list = [name for name in syndrome_names]
    
    return syndrome_list


def fetch_and_parse_html(ucsc_coords: str, assembly="GRCh38"):
    # Set up the headers like in the JavaScript fetch request
    encoded_coords = quote(ucsc_coords)

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Upgrade-Insecure-Requests": "1",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Sec-GPC": "1"
    }

    url = f"https://www.ncbi.nlm.nih.gov/clinvar/?term={encoded_coords}"
    print(url)


    # Send the GET request with the headers
    response = requests.get(url, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the content of the page with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        data = extract_all_syndrome_names(soup)

        # Return the parsed HTML (You can perform further operations on the soup here)
        return data
    else:
        # If the request fails, return the status code for debugging
        return f"Failed to retrieve content. Status code: {response.status_code}"



In [15]:
data = fetch_and_parse_html("chr1:10636604-10796646")
print(data)

https://www.ncbi.nlm.nih.gov/clinvar/?term=chr1%3A10636604-10796646
['See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'Peroxisome biogenesis disorder, complementation group K', 'not provided', 'Peroxisome biogenesis disorder, complementation group K', 'Peroxisome biogenesis disorder, complementation group K+1more', 'Peroxisome biogenesis disorder, complementation group K', 'Peroxisome biogenesis disorder, complementation group K+1more', 'not provided+1more', 'Peroxisome biogenesis disorder, complementation group K+1more', 'Peroxisome biogenesis disorder, complementation group K', 'Peroxisome biogenesis disorder, complementation group K', 'Inborn genetic diseases', 'Peroxisome biogenesis disorder 13A (Zellweger)+1more', 'Inborn genetic diseases', 'Peroxisome biogenesis disorder, complementation group K', 'not provided+3 more', 'Pe

In [None]:
def extract_all_syndrome_names(html):
    # Parse the HTML content
    soup = html
    
    # Find all 'td' elements with the class 'pad-right'
    syndrome_td = soup.find_all('td', class_='pad-right')
    
    # Extract the text inside these 'td' elements
    syndrome_names = [td.get_text(strip=True) for td in syndrome_td]
    
    # Filter and return all occurrences of the syndrome name
    syndrome_list = [name for name in syndrome_names]
    
    return syndrome_list

# Call the function to extract all syndrome names
syndrome_names = extract_all_syndrome_names(html_content)

# Print the result
print(syndrome_names)

['See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'See cases', 'not provided', 'Chromosome 1p36 deletion syndrome', 'not provided', 'not provided', 'not provided', 'Harel-Yoon syndrome', 'See cases', 'not provided', 'See cases', 'See cases', 'Neurodevelopmental disorder', 'not provided', 'Chromosome 1p36 deletion syndrome', 'Chromosome 1p36 deletion syndrome', 'See cases', 'See cases', 'See cases', 'See cases']


In [8]:
from collections import Counter

count_syndromes = Counter(syndrome_names)
print(count_syndromes)

Counter({'See cases': 52, 'not provided': 6, 'Chromosome 1p36 deletion syndrome': 3, 'Harel-Yoon syndrome': 1, 'Neurodevelopmental disorder': 1})


In [None]:
Object { term: '("single nucleotide variant"[Type of variation]) AND (1[CHR] AND (1:112618928[CHRPOS37] AND 112525883:2000000000[CHRPOS37]))', assembly: "GCF_000001405.25" }

In [None]:
import requests

def search_clinvar(term):
    url = "https://www.ncbi.nlm.nih.gov/clinvar"
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.5",
        "Content-Type": "application/x-www-form-urlencoded",
        "Upgrade-Insecure-Requests": "1",
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Sec-GPC": "1",
        "Priority": "u=0, i"
    }
    
    data = {
        "term": term,
        "EntrezSystem2.PEntrez.clinVar.Entrez_PageController.PreviousPageName": "results",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Facets.FacetsUrlFrag": "filters%3D17890",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Facets.FacetSubmitted": "false",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.sPresentation": "tabular",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.sSort": "Position",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.SendTo": "File",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.FFormat": "tabular",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.FSort": "Position",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.FileFormat": "tabular",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.LastPresentation": "tabular",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.Presentation": "tabular",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PageSize": "100",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.LastPageSize": "100",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.Sort": "Position",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.LastSort": "Position",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.FileSort": "Position",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.Format": "",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.LastFormat": "",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PrevPageSize": "100",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PrevPresentation": "tabular",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PrevSort": "Position",
        "CollectionStartIndex": "1",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Clinvar_Entrez_ResultsController.ResultCount": "2",
        "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Clinvar_Entrez_ResultsController.RunLastQuery": "",
        "EntrezSystem2.PEntrez.DbConnector.Db": "clinvar",
        "EntrezSystem2.PEntrez.DbConnector.LastDb": "clinvar",
        "EntrezSystem2.PEntrez.DbConnector.Term": term,
        "EntrezSystem2.PEntrez.DbConnector.LastTabCmd": "",
        "EntrezSystem2.PEntrez.DbConnector.LastQueryKey": "20",
        "EntrezSystem2.PEntrez.DbConnector.IdsFromResult": "",
        "EntrezSystem2.PEntrez.DbConnector.LastIdsFromResult": "",
        "EntrezSystem2.PEntrez.DbConnector.LinkName": "",
        "EntrezSystem2.PEntrez.DbConnector.LinkReadableName": "",
        "EntrezSystem2.PEntrez.DbConnector.LinkSrcDb": "",
        "EntrezSystem2.PEntrez.DbConnector.Cmd": "file",
        "EntrezSystem2.PEntrez.DbConnector.TabCmd": "",
        "EntrezSystem2.PEntrez.DbConnector.QueryKey": "",
        "p%24a": "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.SendToSubmit",
        "p%24l": "EntrezSystem2",
        "p%24st": "clinvar"
    }

    response = requests.post(url, headers=headers, data=data)
    
    if response.status_code == 200:
        return response.text  # or you can parse the HTML response as needed
    else:
        return f"Error: {response.status_code}"

# Example usage:
search_term = "chr1:112525883-112618928"
result = search_clinvar(search_term)
print(result)
