Skip to content

Commit

Permalink
Merge ea11614 into d4e78f0
Browse files Browse the repository at this point in the history
  • Loading branch information
mgrgic committed Nov 3, 2016
2 parents d4e78f0 + ea11614 commit 6235119
Show file tree
Hide file tree
Showing 2 changed files with 196 additions and 41 deletions.
81 changes: 59 additions & 22 deletions composition/html_content_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,25 +273,32 @@ func ParseHeadFragment(fragment *StringFragment, headPropertyMap map[string]stri
return z.Err()
}
break forloop

case tt == html.StartTagToken || tt == html.SelfClosingTagToken:

if string(tag) == "meta" {
if(processMetaTag(string(tag), attrs, headPropertyMap)) {
headBuff.Write(raw)
}
continue
}
if string(tag) == "title" {
if(headPropertyMap["title"] == "") {
headPropertyMap["title"] = "title"
headBuff.Write(raw)
} else if (tt != html.SelfClosingTagToken) {
skipCompleteTag(z, "title")
continue
}
} else {
headBuff.Write(raw)
}
switch {
case string(tag) == "meta":
if (processMetaTag(string(tag), attrs, headPropertyMap)) {
headBuff.Write(raw)
}
continue forloop
case string(tag) == "link":
if (processLinkTag(attrs, headPropertyMap)) {
headBuff.Write(raw)
}
continue forloop
case string(tag) == "title":
if (headPropertyMap["title"] == "") {
headPropertyMap["title"] = "title"
headBuff.Write(raw)
} else if (tt != html.SelfClosingTagToken) {
skipCompleteTag(z, "title")
}
continue forloop
default:
headBuff.Write(raw)
}

default:
headBuff.Write(raw)
}
Expand Down Expand Up @@ -328,23 +335,21 @@ func skipCompleteTag(z *html.Tokenizer, tagName string) error {
return nil
}



func processMetaTag(tagName string, attrs []html.Attribute, metaMap map[string]string) bool {
if (len(attrs) == 0) {
return true
}

key := tagName
value := ""
// TODO: check explizit for attrName "http-equiv" || "name" || "charset" ?

// e.g.: <meta charset="utf-8">
// e.g.: <meta charset="utf-8"> => key = meta_charset; val = utf-8
if (len(attrs) == 1) {
key = tagName + "_" + attrs[0].Key
value = attrs[0].Val
}

// e.g.: <meta name="content-language" content="de"> => key = meta_name_content-language; val = content_de
if (len(attrs) > 1) {
key = tagName + "_" + attrs[0].Key + "_" + attrs[0].Val
value = attrs[1].Key + "_" + attrs[1].Val
Expand All @@ -353,11 +358,43 @@ func processMetaTag(tagName string, attrs []html.Attribute, metaMap map[string]s
if (metaMap[key] == "") {
metaMap[key] = value
return true

}
return false
}

/**
Returns true if a link tag can be processed.
Checks if a <link> tag contains a canonical relation and avoids multiple canonical definitions.
*/
func processLinkTag(attrs []html.Attribute, metaMap map[string]string) bool {
if (len(attrs) == 0) {
return true
}

const canonical = "canonical"
var key string
var value string

// e.g.: <link rel="canonical" href="/baumarkt/suche"> => key = canonical; val = /baumarkt/suche
for _, attr := range attrs {
if (attr.Key == "rel" && attr.Val == canonical) {
key = canonical
}
if (attr.Key == "href") {
value = attr.Val
}
}
if (key == canonical && metaMap[canonical] != "") {
// if canonical is already in map then don't process this link tag
return false
}

if (key != "" && value != "") {
metaMap[key] = value
}
return true
}

func parseMetaJson(z *html.Tokenizer, c *MemoryContent) error {
tt := z.Next()
if tt != html.TextToken {
Expand Down
156 changes: 137 additions & 19 deletions composition/html_content_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,12 @@ var productUiGeneratedHtml = `<!DOCTYPE html>
<head>
<meta charset="utf-8">
<title>navigationservice</title>
<!-- START Include legacy styles - emulate integration -->
<!-- END Include legacy styles -->
<link rel="stylesheet" href="/navigationservice/stylesheets/main-ffc9b54a22.css">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="canonical" href="/baumarkt/suche">
<script>
// Define global SCRIPTS variable and
// global loadScript() method to loading scripts
Expand Down Expand Up @@ -64,29 +61,18 @@ var productUiGeneratedHtml = `<!DOCTYPE html>
TYPO3 is copyright 1998-2016 of Kasper Skaarhoj. Extensions are copyright of their respective owners.
Information and contribution at http://typo3.org/
-->
<base href="/">
<meta name="generator" content="TYPO3 CMS">
<meta name="content-language" content="de">
<link rel="stylesheet" type="text/css" href="typo3temp/compressor/merged-d0ed097d2e70237fa36186d357e1268f-4e221af468cdd1d3a44789532134127c.css?1476243484" media="all">
<script src="typo3temp/compressor/merged-f6a1f7cc0a094340acf2489928881fc7-956c525da07a115d310e68d089faa490.js?1476243484" type="text/javascript"></script>
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="cleartype" content="on">
<meta name="mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="canonical" href="/navigationservice">
<link rel="stylesheet" href="typo3conf/ext/bra_projectfiles_toom/Resources/Public/website/css/main.css">
<link rel="shortcut icon" href="favicon.ico" type="image/ico" />
Expand Down Expand Up @@ -295,7 +281,7 @@ func Test_HtmlContentParser_LoadEmptyContent(t *testing.T) {
a.Nil(c.Tail())
}

func Test_HtmlContentParser_parseHead_withMultipleMetaTags_and_Titles(t *testing.T) {
func Test_HtmlContentParser_parseHead_withMultipleMetaTags_and_Titles_and_Canonicals(t *testing.T) {
a := assert.New(t)

parser := &HtmlContentParser{}
Expand All @@ -306,8 +292,7 @@ func Test_HtmlContentParser_parseHead_withMultipleMetaTags_and_Titles(t *testing
err := parser.parseHead(z, c)
a.NoError(err)

//eqFragment(t, "<xx/><foo>xxx</foo><bar>xxx</bar>", c.Head())
//a.True(strings.Contains(string(c.Head()), "navigationservice"))
containsFragment(t, "<title>navigationservice</title>", c.Head())
}


Expand Down Expand Up @@ -633,6 +618,20 @@ func eqFragment(t *testing.T, expected string, f Fragment) {
}
}

func containsFragment(t *testing.T, contained string, f Fragment) {
if f == nil {
t.Error("Fragment is nil, but expected:", contained)
return
}
sf := f.(StringFragment)
sfStripped := strings.Replace(string(sf), " ", "", -1)
sfStripped = strings.Replace(string(sfStripped), "\n", "", -1)

if !strings.Contains(sfStripped, contained) {
t.Error("Fragment is not equal: \nexpected: ", contained, "\nactual: ", sf)
}
}


func Test_ParseHeadFragment_Filter_Title(t *testing.T) {
a := assert.New(t)
Expand Down Expand Up @@ -826,6 +825,125 @@ func Test_ParseHeadFragment_Filter_Meta_Tag(t *testing.T) {
a.Equal(expectedParsedHead, resultString)
}

func Test_ParseHeadFragment_Filter_Link_Canonical_Tag(t *testing.T) {
a := assert.New(t)

// GIVEN
originalHeadString := `<meta charset="utf-8">
<link rel="canonical" href="/navigationservice">
<title>navigationservice</title>
<!-- START Include jquery lib - add to SCRIPTS again after last JS from legacy system is removed -->
<!-- END Include jquery lib -->
<link rel="stylesheet" href="/navigationservice/stylesheets/main-93174ed18d.css">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="blub" content="width=device-width, initial-scale=1.0">
<script>
// Define global SCRIPTS variable and
// global loadScript() method to loading scripts
// async but in order.
// Each module register it's javascript by calling
// this method:
//
// loadScript('/navigationservice/components/molecules/teaser/teaser.js');
//
SCRIPTS = ['/navigationservice/javascripts/main-e566a7bb73.js'];
isLegacy = function() {
return typeof Object.assign === 'function' ? false : true;
};
loadScript = function(script, legacyOnly) {
for(var i=0; i < SCRIPTS.length; i++) if(SCRIPTS[i] === script) return false;
if((legacyOnly && isLegacy()) || (!legacyOnly)) {
SCRIPTS.push(script);
}
};
</script>
<!-- fonts.com - Async Font Loading -->`

expectedParsedHead := `<meta charset="utf-8">
<title>navigationservice</title>
<!-- START Include jquery lib - add to SCRIPTS again after last JS from legacy system is removed -->
<!-- END Include jquery lib -->
<link rel="stylesheet" href="/navigationservice/stylesheets/main-93174ed18d.css">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="blub" content="width=device-width, initial-scale=1.0">
<script>
// Define global SCRIPTS variable and
// global loadScript() method to loading scripts
// async but in order.
// Each module register it's javascript by calling
// this method:
//
// loadScript('/navigationservice/components/molecules/teaser/teaser.js');
//
SCRIPTS = ['/navigationservice/javascripts/main-e566a7bb73.js'];
isLegacy = function() {
return typeof Object.assign === 'function' ? false : true;
};
loadScript = function(script, legacyOnly) {
for(var i=0; i < SCRIPTS.length; i++) if(SCRIPTS[i] === script) return false;
if((legacyOnly && isLegacy()) || (!legacyOnly)) {
SCRIPTS.push(script);
}
};
</script>
<!-- fonts.com - Async Font Loading -->`

headMetaPropertyMap := make(map[string]string)
headMetaPropertyMap["canonical"] = "/baumarkt/suche"

headFragment := StringFragment(originalHeadString)
// WHEN
ParseHeadFragment(&headFragment, headMetaPropertyMap)

// THEN
expectedParsedHead = removeTabsAndNewLines(expectedParsedHead)
resultString := removeTabsAndNewLines(string(headFragment))

a.Equal(expectedParsedHead, resultString)
}

func Test_ParseHeadFragment_Filter_Link_Canonical_Tag_without_existing_Map(t *testing.T) {
// GIVEN
a := assert.New(t)

originalHeadString := `
<link rel="stylesheet" href="/searchservice/stylesheets/main-36b9f2e88a.css">
<link />
<link rel="canonical" href="/baumarkt/bauen-renovieren/suche">
<meta charset="utf-8" />
<link rel="canonical"
href="/navigationservice">
<foo bar=""/>
<title>navigationservice</title>
`

expectedParsedHead := `
<link rel="stylesheet" href="/searchservice/stylesheets/main-36b9f2e88a.css">
<link />
<link rel="canonical" href="/baumarkt/bauen-renovieren/suche">
<meta charset="utf-8" />
<foo bar=""/>
<title>navigationservice</title>
`

headMetaPropertyMap := make(map[string]string)

headFragment := StringFragment(originalHeadString)
// WHEN
ParseHeadFragment(&headFragment, headMetaPropertyMap)

// THEN
expectedParsedHead = removeTabsAndNewLines(expectedParsedHead)
resultString := removeTabsAndNewLines(string(headFragment))

a.Equal(expectedParsedHead, resultString)
}

func removeTabsAndNewLines(stringToProcess string) string{
stringToProcess = strings.Replace(stringToProcess, "\n", "", -1)
stringToProcess = strings.Replace(stringToProcess, "\t", "", -1)
Expand Down

0 comments on commit 6235119

Please sign in to comment.