# Scrape the connectivity information from BAMS

This script calls the BAMS website (https://bams2.bams1.org/search/connections/outputs/ and https://bams2.bams1.org/search/connections/inputs) to load the tables with connectivty information for all basal ganglia related brain regions. The downloaded tables are store in the folder `Data/csvs/bams2_connectivity`.

NB: It takes about 20 minutes to finish

In [3]:
from bs4 import BeautifulSoup
import requests
import re
import csv

# define helper methods

def write_stuff(url, replace_str, numbers, output_file):
    with open(output_file, 'wb') as file:
        file.write(b'')
    print("Getting", url)
    for i in range(1, numbers+1):
        print("."*i)
        url = url.replace(replace_str, str(i))
        res = requests.get(url)
        
        with open(output_file, 'ab') as file:
            file.write(res.content)

def write_to_csv(input_html, output_csv):

    htmls = []
    with open(input_html, 'r') as file:
        htmls = file.read().split("<!DOCTYPE")

    htmls = list(filter(len, htmls))

    with open(output_csv, 'w') as csvfile:
        csvfile.write("")

    add_header = True    

    output_rows = []
    for html in htmls:
        soup = BeautifulSoup(html)
        table = soup.find("table")


        for table_row in table.findAll('tr'):
            if add_header:
                headers = table_row.findAll('th')
                output_row = []
                for header in headers:
                    output_row.append(header.text.replace("\n", "").strip())
                output_rows.append(output_row)
                add_header = False

            columns = table_row.findAll('td')
            output_row = []
            for column in columns:
                column_text = column.text.replace("\n", "").strip()
                column_text = re.sub(' +', ' ', column_text)
                output_row.append(column_text)
            if len(output_row) > 0:
                output_rows.append(output_row)

    with open(output_csv, 'a') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(output_rows)

    print("Tables from ", input_html, "converted to csv in", output_csv)


In [6]:
## Run the methods

root = "../Data/BAMS_database"
temp_html = root + "/temp.html"

#areas_bams_ugly = "Striatum Striatum dorsal region Caudoputamen Striatum ventral region Nucleus accumbens Fundus of the striatum Olfactory tubercle Olfactory tubercle molecular layer Olfactory tubercle pyramidal layer Olfactory tubercle polymorph layer Islands of Calleja Major island of Calleja Striatum caudal (amygdalar) region Anterior amygdaloid area Central nucleus of amygdala Central nucleus of amygdala medial part Central nucleus of amygdala lateral part Central nucleus of amygdala capsular part Medial nucleus of the amygdala Medial nucleus of the amygdala anterodorsal part Medial nucleus of the amygdala anteroventral part Medial nucleus of the amygdala posterodorsal part Medial nucleus of the amygdala posteroventral part Bed nucleus of the accessory olfactory part Intercalated nuclei of the amygdala Striatum medial (septal) region Pallidum Pallidum rostral region Bed nuclei of the stria terminalis Bed nucleus of the anterior commissure Bed nucleus of the stria medularis Pallidum dorsal region Globus pallidus Globus pallidus lateral segment Globus pallidus medial segment Pallidum ventral region Substantia innominata Magnocellular preoptic nucleus Medial septal complex Medial septal nucleus Nucleus of the diagonal band Triangular nucleus of the septum"
#areas_bams_pretty = re.sub(r"\W(?=[A-Z])", ",", areas_bams_ugly)

#areas_bams = areas_bams_pretty.split(",")

areas_bams = ["Striatum", "Striatum dorsal region", "Caudoputamen", "Striatum ventral region", "Nucleus accumbens", "Fundus of the striatum", "Striatum caudal (amygdalar) region", "Striatum medial (septal) region", "Pallidum dorsal region", "Globus pallidus", "Globus pallidus lateral segment", "Globus pallidus medial segment", "Pallidum ventral region", "Substantia innominata", "Subthalamic nucleus", "Substantia nigra"]
print("Areas of BAMS:", areas_bams)

for area in areas_bams:
    print("Looking at area:", area)
    inputs_url = "https://bams2.bams1.org/search/connections/inputs/?query=%s" % area
    outputs_url = "https://bams2.bams1.org/search/connections/outputs/?query=%s" % area
    
    input_html = requests.get(inputs_url).content
    outputs_html = requests.get(outputs_url).content

    page_match_inputs = re.search(r"<a.*class\=\"page\">(\d*)<", str(input_html))
    page_match_outputs = re.search(r"<a.*class\=\"page\">(\d*)<", str(outputs_html))

    get_inputs_url = "https://bams2.bams1.org/search/connections/inputs/?page={number}&query=" + area.replace(" ", "%20")
    get_outputs_url = "https://bams2.bams1.org/search/connections/outputs/?page={number}&query=" + area.replace(" ", "%20")

    if(page_match_inputs):
        print("Area has number of inputs:", page_match_inputs.group(1))
        no_inputs = int(page_match_inputs.group(1))
        write_stuff(get_inputs_url, "{numbers}", no_inputs, temp_html)

        output_file = "../Data/csvs/bams2_connectivity/bams_%s_inputs.csv" % area.lower().replace(" ", "_")
        write_to_csv(temp_html, output_file)
    
    if(page_match_outputs):
        print("Area has number of outputs:", page_match_outputs.group(1))
        no_inputs = int(page_match_outputs.group(1))
        write_stuff(get_outputs_url, "{numbers}", no_inputs, temp_html)
        print("Done fetching html outputs")

        output_file = "../Data/csvs/bams2_connectivity/bams_%s_outputs.csv" % area.lower().replace(" ", "_")
        write_to_csv(temp_html, output_file)
    
import os
# Delete temp file
os.remove(temp_html)

Areas of BAMS: ['Striatum', 'Striatum dorsal region', 'Caudoputamen', 'Striatum ventral region', 'Nucleus accumbens', 'Fundus of the striatum', 'Striatum caudal (amygdalar) region', 'Striatum medial (septal) region', 'Pallidum dorsal region', 'Globus pallidus', 'Globus pallidus lateral segment', 'Globus pallidus medial segment', 'Pallidum ventral region', 'Substantia innominata', 'Subthalamic nucleus', 'Substantia nigra']
Looking at area: Striatum
Area has number of inputs: 11
Getting https://bams2.bams1.org/search/connections/inputs/?page={number}&query=Striatum
.
..
...
....
.....
......
.......
........
.........
..........
...........
Tables from  ../Data/BAMS_database/temp.html converted to csv in ../Data/csvs/bams2_connectivity/bams_striatum_inputs.csv
Area has number of outputs: 2
Getting https://bams2.bams1.org/search/connections/outputs/?page={number}&query=Striatum
.
..
Done fetching html outputs
Tables from  ../Data/BAMS_database/temp.html converted to csv in ../Data/csvs/ba

..
...
....
.....
......
.......
........
.........
..........
...........
Done fetching html outputs
Tables from  ../Data/BAMS_database/temp.html converted to csv in ../Data/csvs/bams2_connectivity/bams_substantia_nigra_outputs.csv


In [5]:
import re

areas_bams_ugly = "Striatum Striatum dorsal region Caudoputamen Striatum ventral region Nucleus accumbens Fundus of the striatum Olfactory tubercle Olfactory tubercle molecular layer Olfactory tubercle pyramidal layer Olfactory tubercle polymorph layer Islands of calleja Major island of calleja Striatum caudal (amygdalar) region Anterior amygdaloid area Central nucleus of amygdala Central nucleus of amygdala medial part Central nucleus of amygdala lateral part Central nucleus of amygdala capsular part Medial nucleus of the amygdala Medial nucleus of the amygdala anterodorsal part Medial nucleus of the amygdala anteroventral part Medial nucleus of the amygdala posterodorsal part Medial nucleus of the amygdala posteroventral part Bed nucleus of the accessory olfactory part Intercalated nuclei of the amygdala Striatum medial (septal) region Pallidum Pallidum rostral region Bed nuclei of the stria terminalis Bed nucleus of the anterior commissure Bed nucleus of the stria medularis Pallidum dorsal region Globus pallidus Globus pallidus lateral segment Globus pallidus medial segment Pallidum ventral region Substantia innominata Magnocellular preoptic nucleus Medial septal complex Medial septal nucleus Nucleus of the diagonal band Triangular nucleus of the septum"
areas_bams_pretty = re.sub(r"\W(?=[A-Z])", ",", areas_bams_ugly)

areas_bams = areas_bams_pretty.split(",")
print("Areas of BAMS:")
for name in areas_bams:
    print(name)

Areas of BAMS:
Striatum
Striatum dorsal region
Caudoputamen
Striatum ventral region
Nucleus accumbens
Fundus of the striatum
Olfactory tubercle
Olfactory tubercle molecular layer
Olfactory tubercle pyramidal layer
Olfactory tubercle polymorph layer
Islands of calleja
Major island of calleja
Striatum caudal (amygdalar) region
Anterior amygdaloid area
Central nucleus of amygdala
Central nucleus of amygdala medial part
Central nucleus of amygdala lateral part
Central nucleus of amygdala capsular part
Medial nucleus of the amygdala
Medial nucleus of the amygdala anterodorsal part
Medial nucleus of the amygdala anteroventral part
Medial nucleus of the amygdala posterodorsal part
Medial nucleus of the amygdala posteroventral part
Bed nucleus of the accessory olfactory part
Intercalated nuclei of the amygdala
Striatum medial (septal) region
Pallidum
Pallidum rostral region
Bed nuclei of the stria terminalis
Bed nucleus of the anterior commissure
Bed nucleus of the stria medularis
Pallidum dor