# Webscrape

In [9]:
from bs4 import BeautifulSoup
import requests

def fetch_html_tables(url):
    "returns a list of tables in the html of url"
    page = requests.get(url)
    bs = BeautifulSoup(page.content)
    tables = bs.find_all('table')
    return tables


tables = fetch_html_tables('https://www.norskfamilie.no/barometre/bsu/')
table_html = tables[0]

#printing top
print(str(table_html)[:1000])

<table class="table table-striped table-hover barometer">
<thead>
<tr>
<th> </th>
<th>Bank</th>
<th>Produkt</th>
<th> </th>
<th>Krever produktpakke</th>
<th>Rente</th>
</tr>
</thead>
<tbody>
<tr>
<td>1</td>
<td class="bank">SpareBank 1 Ringerike Hadeland</td>
<td>LOfavør BSU</td>
<td>
<button class="popover_info btn btn-none" data-html="true" data-placement="left" data-toggle="tooltip" title="For å ha BSU-konto forutsettes det at kunden har lønnskonto med lønnsinngang i banken." type="button">
<i class="fa fa-info-circle"></i>
</button>
</td>
<td><i class="fa fa-check"></i></td>
<td>3,60</td>
</tr>
<tr>
<td>2</td>
<td class="bank">SpareBank 1 Hallingdal Valdres</td>
<td>LO Favør BSU</td>
<td>
<button class="popover_info btn btn-none" data-html="true" data-placement="left" data-toggle="tooltip" title="Fra 2021 er det kun de som ikke eier bolig som får skattefradrag når de sparer i BSU." type="button">
<i class="fa fa-info-circle"></i>
</button>
</td>
<td><i class="fa fa-check"></i></td>

In [10]:
def html_to_table(html):
    "Returns the table defined in html as a list"
    #defining the table:
    table=[]
    #iterating over all rows
    for row in html.find_all('tr'):
        r=[]
        #finding all cells in each row:
        cells=row.find_all('td')
        
        #if no cells are found, look for headings
        if len(cells)==0:
            cells=row.find_all('th')
            
        #iterate over cells:
        for cell in cells:
            cell=format(cell)
            r.append(cell)
        
        #append the row to t:
        table.append(r)
    return table

def format(cell):
    "Returns a string after converting bs4 object cell to clean text"
    if cell.content is None:
        s=cell.text
    elif len(cell.content)==0:
        return ''
    else:
        s=' '.join([str(c) for c in cell.content])
        
    #here you can add additional characters/strings you want to 
    #remove, change punctuations or format the string in other
    #ways:
    s=s.replace('\xa0','')
    s=s.replace('\n','')
    return s

table=html_to_table(table_html)

#printing top
print(str(table)[:1000])

[['', 'Bank', 'Produkt', '', 'Krever produktpakke', 'Rente'], ['1', 'SpareBank 1 Ringerike Hadeland', 'LOfavør BSU', '', '', '3,60'], ['2', 'SpareBank 1 Hallingdal Valdres', 'LO Favør BSU', '', '', '3,50'], ['3', 'Orkla Sparebank', 'BSU - Boligsparing for unge', '', '', '3,40'], ['4', 'Høland og Setskog Sparebank', 'BSU', '', '', '3,40'], ['5', 'Skue Sparebank', 'BSU', '', '', '3,40'], ['6', 'SpareBank 1 Sørøst-Norge', 'LOfavør BSU', '', '', '3,35'], ['7', 'SpareBank 1 SMN', 'LOfavør BSU', '', '', '3,35'], ['8', 'SpareBank 1 Ringerike Hadeland', 'BSU', '', '', '3,35'], ['9', 'SpareBank 1 Modum', 'BSU LO Favør', '', '', '3,35'], ['10', 'SpareBank 1 Østlandet', 'LOfavør BSU til fylte 34 år', '', '', '3,30'], ['11', 'Drangedal Sparebank', 'BSU', '', '', '3,30'], ['12', 'Odal Sparebank', 'BSU', '', '', '3,30'], ['13', 'SpareBank 1 Gudbrandsdal', 'LOfavør Sparekonto BSU', '', '', '3,30'], ['14', 'Hjelmeland Sparebank', 'BSU', '', '', '3,25'], ['15', 'Tolga-Os Sparebank', 'Boligsparing for u

In [11]:
';'.join(table[0])

';Bank;Produkt;;Krever produktpakke;Rente'

In [12]:
def save_data(file_name,table):
    "Saves table to file_name"
    f=open(file_name,'w')
    for row in table:
        f.write(';'.join(row)+'\n')
    f.close()
    
save_data('BSU.csv',table)

In [14]:
import pandas as pd
pd.read_csv('BSU.csv', delimiter=';', encoding='latin1')

Unnamed: 0.1,Unnamed: 0,Bank,Produkt,Unnamed: 3,Krever produktpakke,Rente
0,1,SpareBank 1 Ringerike Hadeland,LOfavÃ¸r BSU,,,360
1,2,SpareBank 1 Hallingdal Valdres,LO FavÃ¸r BSU,,,350
2,3,Orkla Sparebank,BSU - Boligsparing for unge,,,340
3,4,HÃ¸land og Setskog Sparebank,BSU,,,340
4,5,Skue Sparebank,BSU,,,340
...,...,...,...,...,...,...
117,118,Sbanken ASA,BSU,,,252
118,119,Sunndal Sparebank,BSU (Boligsparing for ungdom),,,250
119,120,Soknedal Sparebank,BSU,,,250
120,121,KLP Banken AS,BSU (BoligÂ­Â­sÂ­paring for ungdom),,,240
