# Test Notebook

- Encode/Decode Cycle: https://stackoverflow.com/questions/15304229/convert-python-elementtree-to-string

In [1]:
import lxml
from lxml import etree, html
import numpy as np
import pandas as pd
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from app.utils import clean_boilerplate

### Loading

In [2]:
DATA_DIR_PATH = "../data/"
LANG = ""

TRAIN_PATH_CSV = DATA_DIR_PATH + "train" + LANG + ".csv"
TEST_PATH_CSV = DATA_DIR_PATH + "test" + LANG + ".csv"

N = 0

In [7]:
%%time
train = pd.read_csv(TRAIN_PATH_CSV, nrows=1000)

CPU times: user 1.82 s, sys: 346 ms, total: 2.17 s
Wall time: 2.58 s


In [8]:
train.head(1)

Unnamed: 0,url,industry,industry_label,group,group_representative,html,text,source,country,group_representative_label,webtype
0,http://www.energy-net.de,8,Telecommunications,"gov, tech",8,"<!DOCTYPE html>\n<html dir=""ltr"" lang=""de-DE"">...",Energy Net Apple Reseller\n\nSpringe zum Inhal...,xing,DE,Telecommunications,html


In [5]:
train2 = train.head(10)

### create test html

In [7]:
old_html = train.iloc[N]["html"]
old_html = old_html.replace("\n", "")
old_html = old_html.replace("\r", "")
old_html = old_html.replace("\t", "")
with open("../app/old/test.html", "w+") as f:
    f.write(old_html)

In [8]:
html_length = train.apply(lambda row: len(row["html"]), axis=1)
print(np.mean(html_length))

121802.391


In [9]:
old_html[:100]

'<!DOCTYPE html><html dir="ltr" lang="de-DE"><head><meta charset="utf-8"><!-- TYPO3 Website made with'

## Test strange websites

In [None]:
import requests

l = ['http://www.ciphron.de', 
     'http://www.debas.de', 
     'http://www.m2g-consult.de',
     'http://www.roundsolutions.com']

d = {}
d2 = {}

for link in l:
    d[link] = requests.get(link).text
    d2[link] = requests.get(link).content
    
url = 'http://www.roundsolutions.com'
s = d[url]
clean = clean_boilerplate(s, url)

In [42]:
s = train3.iloc[0].html
parser = etree.XMLParser(encoding="utf-8", recover=True)
tree = etree.fromstring(s)

XMLSyntaxError: Start tag expected, '<' not found, line 1, column 1 (<string>, line 1)

In [59]:
train4 = train3.copy()
train4["html"] = train4.apply(lambda row: clean_boilerplate(row.html, row.url), axis=1)

In [16]:
%%time
import random
p = 0.2  # 1% of the lines
train2 = pd.read_csv(
         TRAIN_PATH_CSV,
         header=0, 
         skiprows=lambda i: i>0 and random.random() > p
)

CPU times: user 2min 46s, sys: 37.3 s, total: 3min 24s
Wall time: 4min 43s


In [60]:
train2["html"] = train2.apply(lambda row: clean_boilerplate(row.html, row.url), axis=1)

In [105]:
%%time
import random
p = 0.05  # 1% of the lines
train3 = pd.read_csv(
         TRAIN_PATH_CSV,
         header=0, 
         skiprows=lambda i: i>0 and random.random() > p
)

CPU times: user 43.4 s, sys: 2.71 s, total: 46.1 s
Wall time: 46 s


In [12]:
train3["html"] = train3.apply(lambda row: clean_html_boilerplate(row), axis=1)

http://www.intera.de
Website 'http://www.intera.de' couldn't be cleaned (Type: xml).
http://www.maschinen-stockert.de
Website 'http://www.maschinen-stockert.de' couldn't be cleaned (Type: xml).
http://www.arnold-verladesysteme.com
Website 'http://www.arnold-verladesysteme.com' couldn't be cleaned (Type: xml).
http://www.crediteuropebank.com
Website 'http://www.crediteuropebank.com' couldn't be cleaned (Type: xml).
http://www.fasek.com
Website 'http://www.fasek.com' couldn't be cleaned (Type: xml).
http://www.bvb-berlin.de
Website 'http://www.bvb-berlin.de' couldn't be cleaned (Type: xml).
http://www.bauder.de
Website 'http://www.bauder.de' couldn't be cleaned (Type: xml).
http://www.storzmedical.com
Website 'http://www.storzmedical.com' couldn't be cleaned (Type: xml).
http://www.debas.de
Website 'http://www.debas.de' couldn't be cleaned (Type: xml).
http://www.buedenbender-hausbau.de
Website 'http://www.buedenbender-hausbau.de' couldn't be cleaned (Type: xml).
http://www.k-m-werbemitt

## Only text between specific HTML Tags

In [188]:
# TODO: aktuell unused

def stringify_children(node):
    """Given a LXML tag, return contents as a string

       >>> html = "<p><strong>Sample sentence</strong> with tags.</p>"
       >>> node = lxml.html.fragment_fromstring(html)
       >>> extract_html_content(node)
       "<strong>Sample sentence</strong> with tags."
    """
    if node is None or (len(node) == 0 and not getattr(node, 'text', None)):
        return ""
    node.attrib.clear()
    opening_tag = len(node.tag) + 2
    closing_tag = -(len(node.tag) + 3)
    return lxml.html.tostring(node)[opening_tag:closing_tag]

In [None]:
tag_list = ['a', 'b', 'em', 'h1', 'h2', 'h3', 'i', 'li', 'p', 'strong', 'title']

In [202]:
cleared_html_string = remove_tags(old_html, tag_list, tagless_output_string=True)
print(cleared_html_string)

Energy Net Apple Reseller Springe zum Inhalt Home Unternehmen Über Energy Net Partner Referenzen Aktuelles Stellenangebote Soziales Engagement Lösungen Apple Enterprise Services Collaboration Publishing Print & Copy Training & Events Services SHOP Kontakt Online einkaufen oder bestellen. Besuchen Sie unseren Shop! Ihr Partner für Remote Working.Work Anything. Anywhere. Lösungen für Dein BusinessFinden Sie Produkte in unserem Online-Shop 360° Video-KonferenzlösungenKeep your business rolling!                     Online einkaufen oder bestellen. Besuchen Sie unseren Shop!                                     Ihr Partner für Remote Working.                                     Lösungen für Dein Business                                     360° Video-Konferenzlösungen                 ENERGY NET - IHR PARTNER FÜR DIGITALE LÖSUNGEN Energy Net ist ein Full-Service Anbieter für die Bereiche Informations- und Bürokommunikation und gehört seit 2018 zur Econocom Group, mit über 10.700 Mitarbeitern,

In [186]:
cleared_html_string[:1000]

'<html><title>Energy Net Apple Reseller</title><a>Springe zum Inhalt</a><a></a><a><i></i></a><li><a>Home</a></li><li><a>Unternehmen</a><li><a>Über Energy Net</a></li><li><a>Partner</a></li><li><a>Referenzen</a></li><li><a>Aktuelles</a></li><li><a>Stellenangebote</a></li><li><a>Soziales Engagement</a></li></li><li><a>Lösungen</a><li><a>Apple Enterprise Services</a></li><li><a>Collaboration</a></li><li><a>Publishing</a></li><li><a>Print &amp; Copy</a></li><li><a>Training &amp; Events</a></li></li><li><a>Services</a></li><li><a>SHOP</a></li><li><a>Kontakt</a></li><i></i><a><a>Online einkaufen oder bestellen. Besuchen Sie unseren Shop!</a></a><a></a>Ihr Partner für Remote Working.Work Anything. Anywhere.<a><a>Lösungen für Dein BusinessFinden Sie Produkte in unserem Online-Shop</a></a><a><a>360° Video-KonferenzlösungenKeep your business rolling!</a></a><li>                    Online einkaufen oder bestellen. Besuchen Sie unseren Shop!                </li><li><li>                    Ihr Part

In [179]:
for tag in cleared_html.iter():
    if not len(tag):
        #print(tag.tag, "|", tag.text)
        if type(tag.text) != str:
            print(stringify_children(tag.text))

title | Energy Net Apple Reseller
a | Springe zum Inhalt
a | None

i | None

a | Home
a | Unternehmen
a | Über Energy Net
a | Partner
a | Referenzen
a | Aktuelles
a | Stellenangebote
a | Soziales Engagement
a | Lösungen
a | Apple Enterprise Services
a | Collaboration
a | Publishing
a | Print & Copy
a | Training & Events
a | Services
a | SHOP
a | Kontakt
i | None

a | Online einkaufen oder bestellen. Besuchen Sie unseren Shop!
a | None

a | Lösungen für Dein BusinessFinden Sie Produkte in unserem Online-Shop
a | 360° Video-KonferenzlösungenKeep your business rolling!
li |                     Online einkaufen oder bestellen. Besuchen Sie unseren Shop!                
li | None

li |                     Ihr Partner für Remote Working.                
li |                     Lösungen für Dein Business                
li |                     360° Video-Konferenzlösungen                
a | None

a | None

h1 | ENERGY NET - IHR PARTNER FÜR DIGITALE LÖSUNGEN
p | Energy Net ist ein Full-Serv

In [124]:
import numpy as np

html_file = old_html[:100]
token_list = ['a', 'b', 'em', 'h1', 'h2', 'h3', 'i', 'li', 'p', 'strong', 'title']

tree = html.fromstring(html_file)
unique_tags = list(np.unique([element.tag for element in tree.iter()]))
unique_tags = [element for element in unique_tags if element not in token_list]

In [125]:
unique_tags

['body', 'div', 'head', 'html', 'span']

In [126]:
etree.strip_tags(tree, unique_tags)
etree.tostring(tree)

b'<html><title>Energy Net Apple Reseller</title><a>Springe zum Inhalt</a></html>'

In [127]:
result_html = stringify_children(tree)

In [128]:
result_html.decode("utf-8")

'<title>Energy Net Apple Reseller</title><a>Springe zum Inhalt</a>'

In [89]:
old_html[:1000]

'<html><head><title>Energy Net Apple Reseller</title></head><body><div><a><span>Springe zum Inhalt</span></a><header><div><div><div><a></a></div><div><div><div><div><div><a><i></i></a></div></div></div><div><nav><ul><li><a><span>Home</span></a></li><li><a><span>Unternehmen</span></a><ul><li><a><span>Über Energy Net</span></a></li><li><a><span>Partner</span></a></li><li><a><span>Referenzen</span></a></li><li><a><span>Aktuelles</span></a></li><li><a><span>Stellenangebote</span></a></li><li><a><span>Soziales Engagement</span></a></li></ul></li><li><a><span>Lösungen</span></a><ul><li><a><span>Apple Enterprise Services</span></a></li><li><a><span>Collaboration</span></a></li><li><a><span>Publishing</span></a></li><li><a><span>Print &amp; Copy</span></a></li><li><a><span>Training &amp; Events</span></a></li></ul></li><li><a><span>Services</span></a></li><li><a><span>SHOP</span></a></li><li><a><span>Kontakt</span></a></li></ul></nav></div></div></div></div></div><div><span><i></i></span><div>

In [77]:
for tag in tree.iter():
    if not len(tag):
        print(tag.tag, "|", tag.text)
        if type(tag.text) != str:
            print(stringify_children(tag.text))

title | Energy Net Apple Reseller
span | Springe zum Inhalt
a | None
<class 'NoneType'>


AttributeError: 'NoneType' object has no attribute 'text'

## Only keep specific HTML Tags

In [31]:
from lxml.html.clean import Cleaner
from lxml import html, etree


html_file = etree.fromstring(old_html, parser=etree.HTMLParser(remove_comments=True))

tags = [element.tag for element in html_file.iter()]

In [None]:
özel_tags = ["title", "h1", "h2", "h3", "b", "strong", "em", "i", "p", "a", "li"]

TODO:
- alles entfernen außer özel tags
    -  das vielleicht in cleaner?
    - mindestens die in `allow_tags`
        - https://lxml.de/api/lxml.html.clean.Cleaner-class.html
- paper 2020 hashemi weiterlesen
- weitere verfahren angucken
- clf pipeline aufsetzen

In [113]:
f = html.fromstring(old_html)

In [129]:
# get all tags

from lxml import etree
from lxml import html
import numpy as np

html_file = etree.fromstring(old_html, parser=etree.HTMLParser(remove_comments=True))

tags = [element.tag for element in html_file.iter()]
#list(np.unique(tags))

In [131]:
print(old_html)

<!doctype html><html class="no-js" lang=""><head><meta charset="utf-8"><meta http-equiv="x-ua-compatible" content="ie=edge"><meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no"><link rel="shortcut icon" type="image/x-icon" href="https://www.plersch.de/wp-content/themes/plersch/favicon.ico"><link rel="shortcut icon" type="image/png" href="https://www.plersch.de/wp-content/themes/plersch/favicon.png"><!-- This site is optimized with the Yoast SEO plugin v7.1 - https://yoast.com/wordpress/plugins/seo/ --><title>Start | Plersch Edelstahltechnik GmbH</title><link rel="canonical" href="https://www.plersch.de/" /><meta property="og:locale" content="de_DE" /><meta property="og:type" content="website" /><meta property="og:title" content="Start | Plersch Edelstahltechnik GmbH" /><meta property="og:description" content="Dies ist eine Beispiel-Seite. Sie unterscheidet sich von Beiträgen, da sie stets an der selben Stelle bleibt und (bei den meisten Themes) in der N

In [116]:
from lxml import etree
from lxml import html

f = html.fromstring(old_html)

# for r in f.xpath('//r'):
#     r.tail = 'DELETED' + r.tail if r.tail else 'DELETED'

etree.strip_elements(f, 'meta', with_tail=True)

print(etree.tostring(f,pretty_print=True)[:100])

b'<html class="no-js" lang="">\n  <head><link rel="shortcut icon" type="image/x-icon" href="https://www'


In [102]:
string = old_html

parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
parsed_xml = etree.fromstring(string.encode('utf-8'), parser=parser)
string = etree.tostring(parsed_xml)
string = string.decode("utf-8")

TypeError: Type 'NoneType' cannot be serialized.

## Clean HTML

### lxml

In [84]:
from lxml.html.clean import Cleaner
from lxml import html, etree


def clean_html_boilerplate(row):
    """ Cleans boilerplate HTML tags from HTML."""
    
    string = row.html
    url = row.url
    
    cleaner = Cleaner(page_structure=False, meta=True, style=True, kill_tags=["img"])
    clean = string
    
    try:
        clean = cleaner.clean_html(string)
    except:
        try:
            parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
            parsed_xml = etree.fromstring(string.encode('utf-8'), parser=parser)
            string = etree.tostring(parsed_xml)
            string = string.decode("utf-8")
            clean = cleaner.clean_html(string)
        except:
            print(f"Website '{url}' couldn't be cleaned.")
    
    clean = clean.replace("\n", "")
    clean = clean.replace("\r", "")
    clean = clean.replace("\t", "")
    
    return clean

In [47]:
%%time
train["html"] = train.apply(lambda row: clean_html_boilerplate(row), axis=1)

CPU times: user 3min 22s, sys: 350 ms, total: 3min 22s
Wall time: 3min 22s


In [48]:
html_length = train.apply(lambda row: len(row["html"]), axis=1)
print(np.mean(html_length))

38466.64927477841


In [56]:
a = 119055
b = 38466

b/a-1

-0.6769056318508253

In [57]:
26 / 30

0.8666666666666667

In [7]:
cleaned_html = clean_html_boilerplate(old_html)
len(old_html), len(cleaned_html)

(27263, 13341)

### VERWORFEN: Tidylib (Python Wrapper)

In [30]:
from tidylib import Tidy
tidy = Tidy()
document, errors = tidy.tidy_document(cleaned_html, options={'alt-text': 'baz'})
document = document.replace("\n", "")
document = document.replace("\r", "")
document = document.replace("\t", "")

## real tests

In [15]:
cleaned_html = clean_html_boilerplate(old_html)

In [16]:
len(old_html), len(cleaned_html)

(27263, 13341)

In [106]:
with open("../app/old/test.html", "r") as f:
    testhtml = f.read()
    
    
html_doc = html.fromstring(soup_str)
html_doc_s = etree.tostring(html_doc, method="xml").decode("utf-8") 

In [113]:
print(html_doc_s[:1000])

<html class="no-js" lang="">
 <head>
  <title>
   Start | Plersch Edelstahltechnik GmbH
  </title>
 </head>
 <body class="home page-template-default page page-id-2">
  <aside class="pr-metaMenu">
   <nav>
    <ul class="pr-metaMenu__wrap" id="menu-meta-menue-kleingedrucktes-header">
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/impressum/">
       Impressum
      </a>
     </li>
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/kontakt/">
       Kontakt
      </a>
     </li>
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/datenschutz/">
       Datenschutz
      </a>
     </li>
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/agb/">
       AGB
      </a>
     </li>
    </ul>
   </nav>
  </aside>
  <header class="pr-siteHead">
   <div class="pr-siteHead__wrap">
   

In [99]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(cleaned_html)
soup_str = soup.prettify()
with open("../app/old/etest.html", "w+") as f:
    f.write(soup_str)

In [100]:
from lxml.html import html_to_xhtml
from lxml import etree

html_doc = html.fromstring(soup_str)
html_doc_s = etree.tostring(html_doc).decode("utf-8") 
print(len(html_doc_s))
print(html_doc_s[:1000])

with open("../app/old/etest.html", "w+") as f:
    f.write(html_doc_s)

16658
<html class="no-js" lang="">
 <head>
  <title>
   Start | Plersch Edelstahltechnik GmbH
  </title>
 </head>
 <body class="home page-template-default page page-id-2">
  <aside class="pr-metaMenu">
   <nav>
    <ul class="pr-metaMenu__wrap" id="menu-meta-menue-kleingedrucktes-header">
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/impressum/">
       Impressum
      </a>
     </li>
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/kontakt/">
       Kontakt
      </a>
     </li>
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/datenschutz/">
       Datenschutz
      </a>
     </li>
     <li class="pr-metaMenu__item-wrap">
      <a class="pr-metaMenu__item" href="https://www.plersch.de/agb/">
       AGB
      </a>
     </li>
    </ul>
   </nav>
  </aside>
  <header class="pr-siteHead">
   <div class="pr-siteHead__wrap

In [101]:
html_to_xhtml(html_doc)

html_doc_s = etree.tostring(html_doc).decode("utf-8") 
print(len(html_doc_s))
print(html_doc_s[:1000])

with open("../app/old/etest.xhtml", "w+") as f:
    f.write(html_doc_s)

18620
<html:html xmlns:html="http://www.w3.org/1999/xhtml" class="no-js" lang="">
 <html:head>
  <html:title>
   Start | Plersch Edelstahltechnik GmbH
  </html:title>
 </html:head>
 <html:body class="home page-template-default page page-id-2">
  <html:aside class="pr-metaMenu">
   <html:nav>
    <html:ul class="pr-metaMenu__wrap" id="menu-meta-menue-kleingedrucktes-header">
     <html:li class="pr-metaMenu__item-wrap">
      <html:a class="pr-metaMenu__item" href="https://www.plersch.de/impressum/">
       Impressum
      </html:a>
     </html:li>
     <html:li class="pr-metaMenu__item-wrap">
      <html:a class="pr-metaMenu__item" href="https://www.plersch.de/kontakt/">
       Kontakt
      </html:a>
     </html:li>
     <html:li class="pr-metaMenu__item-wrap">
      <html:a class="pr-metaMenu__item" href="https://www.plersch.de/datenschutz/">
       Datenschutz
      </html:a>
     </html:li>
     <html:li class="pr-metaMenu__item-wrap">
      <html:a class="pr-metaMenu__item" href="

In [9]:
train2["html"] = train2.apply(lambda row: clean_html_boilerplate(row["html"]), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [10]:
train.head(3)

Unnamed: 0,url,industry,industry_label,group,group_representative,html,text,source,country,group_representative_label
0,http://www.weldmont.de,56,Mining & Metals,man,55,<html> <head> ...,Weldmont\n\nHU\nDE\nEN\nSK\nHU\nDE\nEN\nSK\n\n...,xing,DE,Apparel & Fashion
1,http://www.rvs.at,41,Banking,fin,43,"<html lang=""de""><head> <title>Raiff...",Raiffeisenverband Salzburg Privatkunden\n\nSki...,xing,DE,Higher Education
2,http://www.unicon.at,11,Management Consulting,"corp, consulting",11,"<html lang=""de""><head><title>Unternehmensberat...","Unternehmensberatung, Coaching & Seminare | UN...",linkedin,DE,Motion Pictures and Film


In [44]:
gr = sorted(list(train.group_representative_label.unique()))
i = sorted(list(train.industry_label.unique()))

for element in gr:
    if element in i:
        print(element)

Hospitality
Logistics and Supply Chain
Management Consulting
Renewables & Environment


In [102]:
gr

['Apparel & Fashion',
 'Business Supplies and Equipment',
 'Civic & Social Organization',
 'Cosmetics',
 'Education Management',
 'Higher Education',
 'Hospitality',
 'Investment Banking',
 'Law Practice',
 'Logistics and Supply Chain',
 'Management Consulting',
 'Maritime',
 'Media Production',
 'Motion Pictures and Film',
 'Museums and Institutions',
 'Package/Freight Delivery',
 'Photography',
 'Renewables & Environment',
 'Research',
 'Wireless',
 'Writing and Editing']

In [11]:
train2.head(3)

Unnamed: 0,url,industry,industry_label,group,group_representative,html,text,source,country,group_representative_label
0,http://www.weldmont.de,56,Mining & Metals,man,55,<html> <head> ...,Weldmont\n\nHU\nDE\nEN\nSK\nHU\nDE\nEN\nSK\n\n...,xing,DE,Apparel & Fashion
1,http://www.rvs.at,41,Banking,fin,43,"<html lang=""de""><head> <title>Raiff...",Raiffeisenverband Salzburg Privatkunden\n\nSki...,xing,DE,Higher Education
2,http://www.unicon.at,11,Management Consulting,"corp, consulting",11,"<html lang=""de""><head><title>Unternehmensberat...","Unternehmensberatung, Coaching & Seminare | UN...",linkedin,DE,Motion Pictures and Film


In [14]:
with open("../app/old/test.html", "r") as f:
    old_html = f.read()

In [15]:
old_html[:100]

'<!DOCTYPE html><html>    <head>        <!-- Global site tag (gtag.js) - Google Analytics -->        '

In [17]:
cleaned_html = clean_html_boilerplate(old_html)

In [18]:
cleaned_html[:20]

'<html>    <head>    '

In [19]:
cleaned_html[-20:]

'      </body></html>'

In [20]:
from lxml import etree
from lxml import html
import numpy as np

html_file = html.fromstring(cleaned_html)

tags = [element.tag for element in html_file.iter()]
list(np.unique(tags))

['a',
 'body',
 'br',
 'div',
 'h1',
 'h2',
 'head',
 'html',
 'i',
 'li',
 'p',
 'span',
 'svg',
 'title',
 'ul']

In [21]:
# If you want to visit all of the descendants
for element in html_file.iter():
    print(element.tag)
    
    
# Or, if you want to have a list of all the descendents
all_elements = list(html_file.iter())
#print([element.tag for element in all_elements])

html
head
title
body
div
div
div
div
div
div
div
a
div
div
div
i
i
div
div
div
svg
div
a
div
a
div
a
div
a
div
div
a
svg
div
a
svg
div
a
svg
div
a
svg
div
div
div
div
div
a
svg
div
a
svg
div
a
svg
div
a
svg
div
div
svg
div
a
div
a
div
a
div
a
div
ul
li
a
li
a
div
div
div
a
div
div
div
div
h1
h2
div
svg
div
p
div
a
div
div
div
div
h1
h2
div
svg
div
p
div
div
div
div
div
div
span
br
br
br
div
span
br
br
br
div
div
div
ul
li
a
li
a
li
a
li
a
li
a
li
a
div
div
div
ul
li
a
li
a
li
a
li
a
li
a
div
div
div
div
ul
li
a
li
a
div
div
div
a
svg
div
a
svg
div
a
svg
div
a
svg
div
a
div
a
