diff --git a/docs/development/search.rst b/docs/development/search.rst index eea0568e6e3..5210eac0de7 100644 --- a/docs/development/search.rst +++ b/docs/development/search.rst @@ -98,7 +98,7 @@ As per requirements of `django-elasticsearch-dsl`_, it is stored in the The fields and ES Datatypes are specified in the `PageDocument`. The indexable data is taken from `processed_json` property of `HTMLFile`. This property provides python dictionary with - document data like `title`, `headers`, `content` etc. + document data like `title`, `sections`, `path` etc. .. _Elasticsearch: https://www.elastic.co/products/elasticsearch diff --git a/media/css/readthedocs-doc-embed.css b/media/css/readthedocs-doc-embed.css index 761e395a495..0e2e518740f 100644 --- a/media/css/readthedocs-doc-embed.css +++ b/media/css/readthedocs-doc-embed.css @@ -286,3 +286,8 @@ div.ethical-footer { font-size: 14px; line-height: 20px; } + +/* Margin between the search results */ +.rtd_search_hits_spacing { + margin: 10px 0; +} diff --git a/readthedocs/core/static-src/core/js/doc-embed/search.js b/readthedocs/core/static-src/core/js/doc-embed/search.js index 79ef3c01c77..82d250ab6cf 100644 --- a/readthedocs/core/static-src/core/js/doc-embed/search.js +++ b/readthedocs/core/static-src/core/js/doc-embed/search.js @@ -4,6 +4,8 @@ var rtddata = require('./rtd-data'); var xss = require('xss/lib/index'); +var MAX_RESULT_PER_SECTION = 3; +var MAX_SUBSTRING_LIMIT = 100; /* @@ -35,14 +37,24 @@ function attach_elastic_search_query(data) { for (var i = 0; i < hit_list.length; i += 1) { var doc = hit_list[i]; var highlight = doc.highlight; + var inner_hits = doc.inner_hits || []; var list_item = $('
  • '); + var title = doc.title; + // if highlighted title is present, + // use that. + if (highlight) { + if (highlight.title) { + title = xss(highlight.title[0]); + } + } + // Creating the result from elements - var link = doc.link + DOCUMENTATION_OPTIONS.FILE_SUFFIX + - '?highlight=' + $.urlencode(query); + var link = doc.link + DOCUMENTATION_OPTIONS.FILE_SUFFIX + "?highlight=" + $.urlencode(query); var item = $('', {'href': link}); - item.html(doc.title); + item.html(title); + item.find('em').addClass('highlighted'); list_item.append(item); // If the document is from subproject, add extra information @@ -53,19 +65,115 @@ function attach_elastic_search_query(data) { list_item.append(extra); } - // Show highlighted texts - if (highlight.content) { - for (var index = 0; index < highlight.content.length; index += 1) { - if (index < 3) { - // Show up to 3 results for search - var content = highlight.content[index]; - var content_text = xss(content); - var contents = $('
    '); - - contents.html("..." + content_text + "..."); - contents.find('em').addClass('highlighted'); - list_item.append(contents); + for (var j = 0; j < inner_hits.length; j += 1) { + + var contents = $('
    '); + + var section_template = '' + + '
    ' + + '' + + '<%= section_subtitle %>' + + '' + + '
    ' + + '<% for (var i = 0; i < section_content.length; ++i) { %>' + + '
    ' + + '<%= section_content[i] %>' + + '
    ' + + '<% } %>'; + + var domain_template = '' + + '
    ' + + '' + + '<%= domain_subtitle %>' + + '' + + '
    ' + + '' + + '<%= domain_content %>' + + ''; + + // if the result is page section + if(inner_hits[j].type === "sections") { + + var section = inner_hits[j]; + var section_subtitle = section._source.title; + var section_subtitle_link = link + "#" + section._source.id; + var section_content = [section._source.content.substring(0, MAX_SUBSTRING_LIMIT) + " ..."]; + + if (section.highlight) { + if (section.highlight["sections.title"]) { + section_subtitle = xss(section.highlight["sections.title"][0]); + } + + if (section.highlight["sections.content"]) { + var content = section.highlight["sections.content"]; + section_content = []; + for ( + var k = 0; + k < content.length && k < MAX_RESULT_PER_SECTION; + k += 1 + ) { + section_content.push("... " + xss(content[k]) + " ..."); + } + } } + + contents.append( + $u.template( + section_template, + { + section_subtitle_link: section_subtitle_link, + section_subtitle: section_subtitle, + section_content: section_content + } + ) + ); + } + + // if the result is a sphinx domain object + if (inner_hits[j].type === "domains") { + + var domain = inner_hits[j]; + var domain_subtitle = domain._source.role_name; + var domain_subtitle_link = link + "#" + domain._source.anchor; + var domain_content = ""; + var domain_name = domain._source.name; + + if ( + typeof domain._source.display_name === "string" && + domain._source.display_name.length >= 1 + ) { + domain_subtitle = "(" + domain._source.role_name + ") " + domain._source.display_name; + } + + if (domain.highlight) { + if (domain.highlight["domains.name"]) { + // domain_content = type_display -- name + domain_name = xss(domain.highlight["domains.name"][0]); + } + } + + // domain_content = type_display -- name -- in doc_display + domain_content = domain._source.type_display + " -- " + domain_name + " -- in " + domain._source.doc_display; + + contents.append( + $u.template( + domain_template, + { + domain_subtitle_link: domain_subtitle_link, + domain_subtitle: domain_subtitle, + domain_content: domain_content + } + ) + ); + } + + contents.find('em').addClass('highlighted'); + list_item.append(contents); + + // Create some spacing between the results. + // Also, don't add this spacing in the last hit. + if (j !== inner_hits.length - 1) { + list_item.append($("
    ")); } } diff --git a/readthedocs/core/static/core/js/readthedocs-doc-embed.js b/readthedocs/core/static/core/js/readthedocs-doc-embed.js index c2f910fb29e..25c63e3baf8 100644 --- a/readthedocs/core/static/core/js/readthedocs-doc-embed.js +++ b/readthedocs/core/static/core/js/readthedocs-doc-embed.js @@ -1 +1 @@ -!function o(a,s,l){function d(t,e){if(!s[t]){if(!a[t]){var i="function"==typeof require&&require;if(!e&&i)return i(t,!0);if(c)return c(t,!0);var r=new Error("Cannot find module '"+t+"'");throw r.code="MODULE_NOT_FOUND",r}var n=s[t]={exports:{}};a[t][0].call(n.exports,function(e){return d(a[t][1][e]||e)},n,n.exports,o,a,s,l)}return s[t].exports}for(var c="function"==typeof require&&require,e=0;e
    "),i("table.docutils.footnote").wrap("
    "),i("table.docutils.citation").wrap("
    "),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var t=i(this);expand=i(''),expand.on("click",function(e){return r.toggleCurrent(t),e.stopPropagation(),!1}),t.prepend(expand)})},reset:function(){var e=encodeURI(window.location.hash)||"#";try{var t=$(".wy-menu-vertical"),i=t.find('[href="'+e+'"]');if(0===i.length){var r=$('.document [id="'+e.substring(1)+'"]').closest("div.section");0===(i=t.find('[href="#'+r.attr("id")+'"]')).length&&(i=t.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=e)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(e){var t=e.closest("li");t.siblings("li.current").removeClass("current"),t.siblings().find("li.current").removeClass("current"),t.find("> ul li.current").removeClass("current"),t.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:t.exports.ThemeNav,StickyNav:t.exports.ThemeNav}),function(){for(var o=0,e=["ms","moz","webkit","o"],t=0;t/g,c=/"/g,p=/"/g,f=/&#([a-zA-Z0-9]*);?/gim,h=/:?/gim,g=/&newline;?/gim,m=/((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/gi,v=/e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi,w=/u\s*r\s*l\s*\(.*/gi;function b(e){return e.replace(c,""")}function y(e){return e.replace(p,'"')}function _(e){return e.replace(f,function(e,t){return"x"===t[0]||"X"===t[0]?String.fromCharCode(parseInt(t.substr(1),16)):String.fromCharCode(parseInt(t,10))})}function x(e){return e.replace(h,":").replace(g," ")}function k(e){for(var t="",i=0,r=e.length;i/g;i.whiteList={a:["target","href","title"],abbr:["title"],address:[],area:["shape","coords","href","alt"],article:[],aside:[],audio:["autoplay","controls","loop","preload","src"],b:[],bdi:["dir"],bdo:["dir"],big:[],blockquote:["cite"],br:[],caption:[],center:[],cite:[],code:[],col:["align","valign","span","width"],colgroup:["align","valign","span","width"],dd:[],del:["datetime"],details:["open"],div:[],dl:[],dt:[],em:[],font:["color","size","face"],footer:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],header:[],hr:[],i:[],img:["src","alt","title","width","height"],ins:["datetime"],li:[],mark:[],nav:[],ol:[],p:[],pre:[],s:[],section:[],small:[],span:[],sub:[],sup:[],strong:[],table:["width","border","align","valign"],tbody:["align","valign"],td:["width","rowspan","colspan","align","valign"],tfoot:["align","valign"],th:["width","rowspan","colspan","align","valign"],thead:["align","valign"],tr:["rowspan","align","valign"],tt:[],u:[],ul:[],video:["autoplay","controls","loop","preload","src","height","width"]},i.getDefaultWhiteList=o,i.onTag=function(e,t,i){},i.onIgnoreTag=function(e,t,i){},i.onTagAttr=function(e,t,i){},i.onIgnoreTagAttr=function(e,t,i){},i.safeAttrValue=function(e,t,i,r){if(i=T(i),"href"===t||"src"===t){if("#"===(i=u.trim(i)))return"#";if("http://"!==i.substr(0,7)&&"https://"!==i.substr(0,8)&&"mailto:"!==i.substr(0,7)&&"tel:"!==i.substr(0,4)&&"#"!==i[0]&&"/"!==i[0])return""}else if("background"===t){if(m.lastIndex=0,m.test(i))return""}else if("style"===t){if(v.lastIndex=0,v.test(i))return"";if(w.lastIndex=0,w.test(i)&&(m.lastIndex=0,m.test(i)))return"";!1!==r&&(i=(r=r||a).process(i))}return i=E(i)},i.escapeHtml=s,i.escapeQuote=b,i.unescapeQuote=y,i.escapeHtmlEntities=_,i.escapeDangerHtml5Entities=x,i.clearNonPrintableCharacter=k,i.friendlyAttrValue=T,i.escapeAttrValue=E,i.onIgnoreTagStripAll=function(){return""},i.StripTagBody=function(a,s){"function"!=typeof s&&(s=function(){});var l=!Array.isArray(a),d=[],c=!1;return{onIgnoreTag:function(e,t,i){if(o=e,l||-1!==u.indexOf(a,o)){if(i.isClosing){var r="[/removed]",n=i.position+r.length;return d.push([!1!==c?c:i.position,n]),c=!1,r}return c||(c=i.position),"[removed]"}return s(e,t,i);var o},remove:function(t){var i="",r=0;return u.forEach(d,function(e){i+=t.slice(r,e[0]),r=e[1]}),i+=t.slice(r)}}},i.stripCommentTag=function(e){return e.replace(S,"")},i.stripBlankChar=function(e){var t=e.split("");return(t=t.filter(function(e){var t=e.charCodeAt(0);return!(127===t||t<=31&&10!==t&&13!==t)})).join("")},i.cssFilter=a,i.getDefaultCSSWhiteList=n},{"./util":5,cssfilter:10}],3:[function(e,t,i){var r=e("./default"),n=e("./parser"),o=e("./xss");for(var a in(i=t.exports=function(e,t){return new o(t).process(e)}).FilterXSS=o,r)i[a]=r[a];for(var a in n)i[a]=n[a];"undefined"!=typeof window&&(window.filterXSS=t.exports)},{"./default":2,"./parser":4,"./xss":6}],4:[function(e,t,i){var c=e("./util");function p(e){var t=c.spaceIndex(e);if(-1===t)var i=e.slice(1,-1);else i=e.slice(1,t+1);return"/"===(i=c.trim(i).toLowerCase()).slice(0,1)&&(i=i.slice(1)),"/"===i.slice(-1)&&(i=i.slice(0,-1)),i}var u=/[^a-zA-Z0-9_:\.\-]/gim;function f(e,t){for(;t"===u){r+=i(e.slice(n,o)),c=p(d=e.slice(o,s+1)),r+=t(o,r.length,c,d,"";var s=function(e){var t=b.spaceIndex(e);if(-1===t)return{html:"",closing:"/"===e[e.length-2]};var i="/"===(e=b.trim(e.slice(t+1,-1)))[e.length-1];return i&&(e=b.trim(e.slice(0,-1))),{html:e,closing:i}}(i),l=c[n],d=w(s.html,function(e,t){var i,r=-1!==b.indexOf(l,e);return y(i=f(n,e,t,r))?r?(t=g(n,e,t,v))?e+'="'+t+'"':e:y(i=h(n,e,t,r))?void 0:i:i});i="<"+n;return d&&(i+=" "+d),s.closing&&(i+=" /"),i+=">"}return y(o=p(n,i,a))?m(i):o},m);return i&&(r=i.remove(r)),r},t.exports=s},{"./default":2,"./parser":4,"./util":5,cssfilter:10}],7:[function(e,t,i){var r,n;r=this,n=function(){var T=!0;function a(i){function e(e){var t=i.match(e);return t&&1t[1][i])return 1;if(t[0][i]!==t[1][i])return-1;if(0===i)return 0}}function o(e,t,i){var r=s;"string"==typeof t&&(i=t,t=void 0),void 0===t&&(t=!1),i&&(r=a(i));var n=""+r.version;for(var o in e)if(e.hasOwnProperty(o)&&r[o]){if("string"!=typeof e[o])throw new Error("Browser version in the minVersion map should be a string: "+o+": "+String(e));return l([n,e[o]])<0}return t}return s.test=function(e){for(var t=0;t'),a=r.link+DOCUMENTATION_OPTIONS.FILE_SUFFIX+"?highlight="+$.urlencode(h),s=$("",{href:a});if(s.html(r.title),o.append(s),r.project!==g){var l=" (from project "+r.project+")",d=$("",{text:l});o.append(d)}if(n.content)for(var c=0;c');f.html("..."+p+"..."),f.find("em").addClass("highlighted"),o.append(f)}Search.output.append(o),o.slideDown(5)}t.length?Search.status.text(_("Search finished, found %s page(s) matching the search query.").replace("%s",t.length)):(Search.query_fallback(h),console.log("Read the Docs search failed. Falling back to Sphinx search."))}).fail(function(e){Search.query_fallback(h)}).always(function(){$("#search-progress").empty(),Search.stopPulse(),Search.title.text(_("Search Results")),Search.status.fadeIn(500)}),$.ajax({url:e.href,crossDomain:!0,xhrFields:{withCredentials:!0},complete:function(e,t){return"success"!==t||void 0===e.responseJSON||0===e.responseJSON.count?r.reject():r.resolve(e.responseJSON)}}).fail(function(e,t,i){return r.reject()})}}$(document).ready(function(){"undefined"!=typeof Search&&Search.init()})}(r.get())}}},{"./../../../../../../bower_components/xss/lib/index":3,"./rtd-data":16}],18:[function(n,e,t){var o=n("./rtd-data");e.exports={init:function(){var e=o.get();if($(document).on("click","[data-toggle='rst-current-version']",function(){var e=$("[data-toggle='rst-versions']").hasClass("shift-up")?"was_open":"was_closed";"undefined"!=typeof ga?ga("rtfd.send","event","Flyout","Click",e):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Flyout","Click",e])}),void 0===window.SphinxRtdTheme){var t=n("./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js").ThemeNav;if($(document).ready(function(){setTimeout(function(){t.navBar||t.enable()},1e3)}),e.is_rtd_like_theme()&&!$("div.wy-side-scroll:first").length){console.log("Applying theme sidebar fix...");var i=$("nav.wy-nav-side:first"),r=$("
    ").addClass("wy-side-scroll");i.children().detach().appendTo(r),r.prependTo(i),t.navBar=r}}}}},{"./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js":1,"./rtd-data":16}],19:[function(e,t,i){var u,p=e("./constants"),f=e("./rtd-data"),r=e("bowser"),h="#ethical-ad-placement";function g(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),r=p.PROMO_TYPES.LEFTNAV,n=p.DEFAULT_PROMO_PRIORITY,o=null;return u.is_mkdocs_builder()&&u.is_rtd_like_theme()?(o="nav.wy-nav-side",e="ethical-rtd ethical-dark-theme"):u.is_rtd_like_theme()?(o="nav.wy-nav-side > div.wy-side-scroll",e="ethical-rtd ethical-dark-theme"):u.is_alabaster_like_theme()&&(o="div.sphinxsidebar > div.sphinxsidebarwrapper",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top>$(window).height())&&(n=p.LOW_PROMO_PRIORITY),{div_id:i,display_type:r,priority:n}):null}function m(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),r=p.PROMO_TYPES.FOOTER,n=p.DEFAULT_PROMO_PRIORITY,o=null;return u.is_rtd_like_theme()?(o=$("
    ").insertAfter("footer hr"),e="ethical-rtd"):u.is_alabaster_like_theme()&&(o="div.bodywrapper .body",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top<$(window).height())&&(n=p.LOW_PROMO_PRIORITY),{div_id:i,display_type:r,priority:n}):null}function v(){var e="rtd-"+(Math.random()+1).toString(36).substring(4),t=p.PROMO_TYPES.FIXED_FOOTER,i=p.DEFAULT_PROMO_PRIORITY;return r&&r.mobile&&(i=p.MAXIMUM_PROMO_PRIORITY),$("
    ").attr("id",e).appendTo("body"),{div_id:e,display_type:t,priority:i}}function w(e){this.id=e.id,this.div_id=e.div_id||"",this.html=e.html||"",this.display_type=e.display_type||"",this.view_tracking_url=e.view_url,this.click_handler=function(){"undefined"!=typeof ga?ga("rtfd.send","event","Promo","Click",e.id):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Promo","Click",e.id])}}w.prototype.display=function(){var e="#"+this.div_id,t=this.view_tracking_url;$(e).html(this.html),$(e).find('a[href*="/sustainability/click/"]').on("click",this.click_handler);var i=function(){$.inViewport($(e),-3)&&($("").attr("src",t).css("display","none").appendTo(e),$(window).off(".rtdinview"),$(".wy-side-scroll").off(".rtdinview"))};$(window).on("DOMContentLoaded.rtdinview load.rtdinview scroll.rtdinview resize.rtdinview",i),$(".wy-side-scroll").on("scroll.rtdinview",i),$(".ethical-close").on("click",function(){return $(e).hide(),!1}),this.post_promo_display()},w.prototype.disable=function(){$("#"+this.div_id).hide()},w.prototype.post_promo_display=function(){this.display_type===p.PROMO_TYPES.FOOTER&&($("
    ").insertAfter("#"+this.div_id),$("
    ").insertBefore("#"+this.div_id+".ethical-alabaster .ethical-footer"))},t.exports={Promo:w,init:function(){var e,t,i,r,n,o={format:"jsonp"},a=[],s=[],l=[],d=[m,g,v];if(u=f.get(),r="rtd-"+(Math.random()+1).toString(36).substring(4),n=p.PROMO_TYPES.LEFTNAV,i=u.is_rtd_like_theme()?"ethical-rtd ethical-dark-theme":"ethical-alabaster",t=0<$(h).length?($("
    ").attr("id",r).addClass(i).appendTo(h),{div_id:r,display_type:n}):null)a.push(t.div_id),s.push(t.display_type),l.push(t.priority||p.DEFAULT_PROMO_PRIORITY);else{if(!u.show_promo())return;for(var c=0;c").attr("id","rtd-detection").attr("class","ethical-rtd").html(" ").appendTo("body"),0===$("#rtd-detection").height()&&(i=!0),$("#rtd-detection").remove(),i)&&(console.log("---------------------------------------------------------------------------------------"),console.log("Read the Docs hosts documentation for tens of thousands of open source projects."),console.log("We fund our development (we are open source) and operations through advertising."),console.log("We promise to:"),console.log(" - never let advertisers run 3rd party JavaScript"),console.log(" - never sell user data to advertisers or other 3rd parties"),console.log(" - only show advertisements of interest to developers"),console.log("Read more about our approach to advertising here: https://docs.readthedocs.io/en/latest/ethical-advertising.html"),console.log("%cPlease allow our Ethical Ads or go ad-free:","font-size: 2em"),console.log("https://docs.readthedocs.io/en/latest/advertising/ad-blocking.html"),console.log("--------------------------------------------------------------------------------------"),e=g(),t=null,e&&e.div_id&&(t=$("#"+e.div_id).attr("class","keep-us-sustainable"),$("

    ").text("Support Read the Docs!").appendTo(t),$("

    ").html('Please help keep us sustainable by allowing our Ethical Ads in your ad blocker or go ad-free by subscribing.').appendTo(t),$("

    ").text("Thank you! ❤️").appendTo(t)))}})}}},{"./constants":14,"./rtd-data":16,bowser:7}],20:[function(e,t,i){var o=e("./rtd-data");t.exports={init:function(e){var t=o.get();if(!e.is_highest){var i=window.location.pathname.replace(t.version,e.slug),r=$('

    Note

    You are not reading the most recent version of this documentation. is the latest version available.

    ');r.find("a").attr("href",i).text(e.slug);var n=$("div.body");n.length||(n=$("div.document")),n.prepend(r)}}}},{"./rtd-data":16}],21:[function(e,t,i){var r=e("./doc-embed/sponsorship"),n=e("./doc-embed/footer.js"),o=(e("./doc-embed/rtd-data"),e("./doc-embed/sphinx")),a=e("./doc-embed/search");$.extend(e("verge")),$(document).ready(function(){n.init(),o.init(),a.init(),r.init()})},{"./doc-embed/footer.js":15,"./doc-embed/rtd-data":16,"./doc-embed/search":17,"./doc-embed/sphinx":18,"./doc-embed/sponsorship":19,verge:13}]},{},[21]); \ No newline at end of file +!function o(s,a,l){function d(t,e){if(!a[t]){if(!s[t]){var i="function"==typeof require&&require;if(!e&&i)return i(t,!0);if(c)return c(t,!0);var n=new Error("Cannot find module '"+t+"'");throw n.code="MODULE_NOT_FOUND",n}var r=a[t]={exports:{}};s[t][0].call(r.exports,function(e){return d(s[t][1][e]||e)},r,r.exports,o,s,a,l)}return a[t].exports}for(var c="function"==typeof require&&require,e=0;e
    "),i("table.docutils.footnote").wrap("
    "),i("table.docutils.citation").wrap("
    "),i(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var t=i(this);expand=i(''),expand.on("click",function(e){return n.toggleCurrent(t),e.stopPropagation(),!1}),t.prepend(expand)})},reset:function(){var e=encodeURI(window.location.hash)||"#";try{var t=$(".wy-menu-vertical"),i=t.find('[href="'+e+'"]');if(0===i.length){var n=$('.document [id="'+e.substring(1)+'"]').closest("div.section");0===(i=t.find('[href="#'+n.attr("id")+'"]')).length&&(i=t.find('[href="#"]'))}0this.docHeight||(this.navBar.scrollTop(i),this.winPosition=e)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(e){var t=e.closest("li");t.siblings("li.current").removeClass("current"),t.siblings().find("li.current").removeClass("current"),t.find("> ul li.current").removeClass("current"),t.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:t.exports.ThemeNav,StickyNav:t.exports.ThemeNav}),function(){for(var o=0,e=["ms","moz","webkit","o"],t=0;t/g,u=/"/g,p=/"/g,h=/&#([a-zA-Z0-9]*);?/gim,f=/:?/gim,g=/&newline;?/gim,m=/((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a)\:/gi,v=/e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi,w=/u\s*r\s*l\s*\(.*/gi;function b(e){return e.replace(u,""")}function _(e){return e.replace(p,'"')}function y(e){return e.replace(h,function(e,t){return"x"===t[0]||"X"===t[0]?String.fromCharCode(parseInt(t.substr(1),16)):String.fromCharCode(parseInt(t,10))})}function x(e){return e.replace(f,":").replace(g," ")}function k(e){for(var t="",i=0,n=e.length;i/g;i.whiteList={a:["target","href","title"],abbr:["title"],address:[],area:["shape","coords","href","alt"],article:[],aside:[],audio:["autoplay","controls","loop","preload","src"],b:[],bdi:["dir"],bdo:["dir"],big:[],blockquote:["cite"],br:[],caption:[],center:[],cite:[],code:[],col:["align","valign","span","width"],colgroup:["align","valign","span","width"],dd:[],del:["datetime"],details:["open"],div:[],dl:[],dt:[],em:[],font:["color","size","face"],footer:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],header:[],hr:[],i:[],img:["src","alt","title","width","height"],ins:["datetime"],li:[],mark:[],nav:[],ol:[],p:[],pre:[],s:[],section:[],small:[],span:[],sub:[],sup:[],strong:[],table:["width","border","align","valign"],tbody:["align","valign"],td:["width","rowspan","colspan","align","valign"],tfoot:["align","valign"],th:["width","rowspan","colspan","align","valign"],thead:["align","valign"],tr:["rowspan","align","valign"],tt:[],u:[],ul:[],video:["autoplay","controls","loop","preload","src","height","width"]},i.getDefaultWhiteList=o,i.onTag=function(e,t,i){},i.onIgnoreTag=function(e,t,i){},i.onTagAttr=function(e,t,i){},i.onIgnoreTagAttr=function(e,t,i){},i.safeAttrValue=function(e,t,i,n){if(i=T(i),"href"===t||"src"===t){if("#"===(i=c.trim(i)))return"#";if("http://"!==i.substr(0,7)&&"https://"!==i.substr(0,8)&&"mailto:"!==i.substr(0,7)&&"tel:"!==i.substr(0,4)&&"#"!==i[0]&&"/"!==i[0])return""}else if("background"===t){if(m.lastIndex=0,m.test(i))return""}else if("style"===t){if(v.lastIndex=0,v.test(i))return"";if(w.lastIndex=0,w.test(i)&&(m.lastIndex=0,m.test(i)))return"";!1!==n&&(i=(n=n||s).process(i))}return i=E(i)},i.escapeHtml=a,i.escapeQuote=b,i.unescapeQuote=_,i.escapeHtmlEntities=y,i.escapeDangerHtml5Entities=x,i.clearNonPrintableCharacter=k,i.friendlyAttrValue=T,i.escapeAttrValue=E,i.onIgnoreTagStripAll=function(){return""},i.StripTagBody=function(o,s){"function"!=typeof s&&(s=function(){});var a=!Array.isArray(o),l=[],d=!1;return{onIgnoreTag:function(e,t,i){if(function(e){return a||-1!==c.indexOf(o,e)}(e)){if(i.isClosing){var n="[/removed]",r=i.position+n.length;return l.push([!1!==d?d:i.position,r]),d=!1,n}return d||(d=i.position),"[removed]"}return s(e,t,i)},remove:function(t){var i="",n=0;return c.forEach(l,function(e){i+=t.slice(n,e[0]),n=e[1]}),i+=t.slice(n)}}},i.stripCommentTag=function(e){return e.replace(A,"")},i.stripBlankChar=function(e){var t=e.split("");return(t=t.filter(function(e){var t=e.charCodeAt(0);return!(127===t||t<=31&&10!==t&&13!==t)})).join("")},i.cssFilter=s,i.getDefaultCSSWhiteList=r},{"./util":5,cssfilter:10}],3:[function(e,t,i){var n=e("./default"),r=e("./parser"),o=e("./xss");for(var s in(i=t.exports=function(e,t){return new o(t).process(e)}).FilterXSS=o,n)i[s]=n[s];for(var s in r)i[s]=r[s];"undefined"!=typeof window&&(window.filterXSS=t.exports)},{"./default":2,"./parser":4,"./xss":6}],4:[function(e,t,i){var c=e("./util");function p(e){var t=c.spaceIndex(e);if(-1===t)var i=e.slice(1,-1);else i=e.slice(1,t+1);return"/"===(i=c.trim(i).toLowerCase()).slice(0,1)&&(i=i.slice(1)),"/"===i.slice(-1)&&(i=i.slice(0,-1)),i}var u=/[^a-zA-Z0-9_:\.\-]/gim;function h(e,t){for(;t"===u){n+=i(e.slice(r,o)),c=p(d=e.slice(o,a+1)),n+=t(o,n.length,c,d,"";var a=function(e){var t=b.spaceIndex(e);if(-1===t)return{html:"",closing:"/"===e[e.length-2]};var i="/"===(e=b.trim(e.slice(t+1,-1)))[e.length-1];return i&&(e=b.trim(e.slice(0,-1))),{html:e,closing:i}}(i),l=c[r],d=w(a.html,function(e,t){var i,n=-1!==b.indexOf(l,e);return _(i=h(r,e,t,n))?n?(t=g(r,e,t,v))?e+'="'+t+'"':e:_(i=f(r,e,t,n))?void 0:i:i});i="<"+r;return d&&(i+=" "+d),a.closing&&(i+=" /"),i+=">"}return _(o=p(r,i,s))?m(i):o},m);return i&&(n=i.remove(n)),n},t.exports=a},{"./default":2,"./parser":4,"./util":5,cssfilter:10}],7:[function(e,t,i){var n,r;n=this,r=function(){var T=!0;function s(i){function e(e){var t=i.match(e);return t&&1t[1][i])return 1;if(t[0][i]!==t[1][i])return-1;if(0===i)return 0}}function o(e,t,i){var n=a;"string"==typeof t&&(i=t,t=void 0),void 0===t&&(t=!1),i&&(n=s(i));var r=""+n.version;for(var o in e)if(e.hasOwnProperty(o)&&n[o]){if("string"!=typeof e[o])throw new Error("Browser version in the minVersion map should be a string: "+o+": "+String(e));return E([r,e[o]])<0}return t}return a.test=function(e){for(var t=0;t'),a=n.title;!r||r.title&&(a=O(r.title[0]));var l=n.link+DOCUMENTATION_OPTIONS.FILE_SUFFIX+"?highlight="+$.urlencode(A),d=$("",{href:l});if(d.html(a),d.find("em").addClass("highlighted"),s.append(d),n.project!==S){var c=" (from project "+n.project+")",u=$("",{text:c});s.append(u)}for(var p=0;p');if("sections"===o[p].type){var f=o[p],g=f._source.title,m=l+"#"+f._source.id,v=[f._source.content.substring(0,C)+" ..."];if(f.highlight&&(f.highlight["sections.title"]&&(g=O(f.highlight["sections.title"][0])),f.highlight["sections.content"])){var w=f.highlight["sections.content"];v=[];for(var b=0;b<%= section_subtitle %>
    <% for (var i = 0; i < section_content.length; ++i) { %>
    <%= section_content[i] %>
    <% } %>',{section_subtitle_link:m,section_subtitle:g,section_content:v}))}if("domains"===o[p].type){var y,x=o[p],k=x._source.role_name,T=l+"#"+x._source.anchor,E=x._source.name;"string"==typeof x._source.display_name&&1<=x._source.display_name.length&&(k="("+x._source.role_name+") "+x._source.display_name),!x.highlight||x.highlight["domains.name"]&&(E=O(x.highlight["domains.name"][0])),y=x._source.type_display+" -- "+E+" -- in "+x._source.doc_display,h.append($u.template('<%= domain_content %>',{domain_subtitle_link:T,domain_subtitle:k,domain_content:y}))}h.find("em").addClass("highlighted"),s.append(h),p!==o.length-1&&s.append($("
    "))}Search.output.append(s),s.slideDown(5)}t.length?Search.status.text(_("Search finished, found %s page(s) matching the search query.").replace("%s",t.length)):(Search.query_fallback(A),console.log("Read the Docs search failed. Falling back to Sphinx search."))}).fail(function(e){Search.query_fallback(A)}).always(function(){$("#search-progress").empty(),Search.stopPulse(),Search.title.text(_("Search Results")),Search.status.fadeIn(500)}),$.ajax({url:e.href,crossDomain:!0,xhrFields:{withCredentials:!0},complete:function(e,t){return"success"!==t||void 0===e.responseJSON||0===e.responseJSON.count?n.reject():n.resolve(e.responseJSON)}}).fail(function(e,t,i){return n.reject()})}}$(document).ready(function(){"undefined"!=typeof Search&&Search.init()})}(n.get())}}},{"./../../../../../../bower_components/xss/lib/index":3,"./rtd-data":16}],18:[function(r,e,t){var o=r("./rtd-data");e.exports={init:function(){var e=o.get();if($(document).on("click","[data-toggle='rst-current-version']",function(){var e=$("[data-toggle='rst-versions']").hasClass("shift-up")?"was_open":"was_closed";"undefined"!=typeof ga?ga("rtfd.send","event","Flyout","Click",e):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Flyout","Click",e])}),void 0===window.SphinxRtdTheme){var t=r("./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js").ThemeNav;if($(document).ready(function(){setTimeout(function(){t.navBar||t.enable()},1e3)}),e.is_rtd_like_theme()&&!$("div.wy-side-scroll:first").length){console.log("Applying theme sidebar fix...");var i=$("nav.wy-nav-side:first"),n=$("
    ").addClass("wy-side-scroll");i.children().detach().appendTo(n),n.prependTo(i),t.navBar=n}}}}},{"./../../../../../../bower_components/sphinx-rtd-theme/js/theme.js":1,"./rtd-data":16}],19:[function(e,t,i){var l,d=e("./constants"),c=e("./rtd-data"),n=e("bowser"),u="#ethical-ad-placement";function p(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),n=d.PROMO_TYPES.LEFTNAV,r=d.DEFAULT_PROMO_PRIORITY,o=null;return l.is_mkdocs_builder()&&l.is_rtd_like_theme()?(o="nav.wy-nav-side",e="ethical-rtd ethical-dark-theme"):l.is_rtd_like_theme()?(o="nav.wy-nav-side > div.wy-side-scroll",e="ethical-rtd ethical-dark-theme"):l.is_alabaster_like_theme()&&(o="div.sphinxsidebar > div.sphinxsidebarwrapper",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top>$(window).height())&&(r=d.LOW_PROMO_PRIORITY),{div_id:i,display_type:n,priority:r}):null}function h(){var e,t,i="rtd-"+(Math.random()+1).toString(36).substring(4),n=d.PROMO_TYPES.FOOTER,r=d.DEFAULT_PROMO_PRIORITY,o=null;return l.is_rtd_like_theme()?(o=$("
    ").insertAfter("footer hr"),e="ethical-rtd"):l.is_alabaster_like_theme()&&(o="div.bodywrapper .body",e="ethical-alabaster"),o?($("
    ").attr("id",i).addClass(e).appendTo(o),(!(t=$("#"+i).offset())||t.top<$(window).height())&&(r=d.LOW_PROMO_PRIORITY),{div_id:i,display_type:n,priority:r}):null}function f(){var e="rtd-"+(Math.random()+1).toString(36).substring(4),t=d.PROMO_TYPES.FIXED_FOOTER,i=d.DEFAULT_PROMO_PRIORITY;return n&&n.mobile&&(i=d.MAXIMUM_PROMO_PRIORITY),$("
    ").attr("id",e).appendTo("body"),{div_id:e,display_type:t,priority:i}}function g(e){this.id=e.id,this.div_id=e.div_id||"",this.html=e.html||"",this.display_type=e.display_type||"",this.view_tracking_url=e.view_url,this.click_handler=function(){"undefined"!=typeof ga?ga("rtfd.send","event","Promo","Click",e.id):"undefined"!=typeof _gaq&&_gaq.push(["rtfd._setAccount","UA-17997319-1"],["rtfd._trackEvent","Promo","Click",e.id])}}g.prototype.display=function(){var e="#"+this.div_id,t=this.view_tracking_url;$(e).html(this.html),$(e).find('a[href*="/sustainability/click/"]').on("click",this.click_handler);function i(){$.inViewport($(e),-3)&&($("").attr("src",t).css("display","none").appendTo(e),$(window).off(".rtdinview"),$(".wy-side-scroll").off(".rtdinview"))}$(window).on("DOMContentLoaded.rtdinview load.rtdinview scroll.rtdinview resize.rtdinview",i),$(".wy-side-scroll").on("scroll.rtdinview",i),$(".ethical-close").on("click",function(){return $(e).hide(),!1}),this.post_promo_display()},g.prototype.disable=function(){$("#"+this.div_id).hide()},g.prototype.post_promo_display=function(){this.display_type===d.PROMO_TYPES.FOOTER&&($("
    ").insertAfter("#"+this.div_id),$("
    ").insertBefore("#"+this.div_id+".ethical-alabaster .ethical-footer"))},t.exports={Promo:g,init:function(){var e,t,i={format:"jsonp"},n=[],r=[],o=[],s=[h,p,f];if(l=c.get(),t=function(){var e,t="rtd-"+(Math.random()+1).toString(36).substring(4),i=d.PROMO_TYPES.LEFTNAV;return e=l.is_rtd_like_theme()?"ethical-rtd ethical-dark-theme":"ethical-alabaster",0<$(u).length?($("
    ").attr("id",t).addClass(e).appendTo(u),{div_id:t,display_type:i}):null}())n.push(t.div_id),r.push(t.display_type),o.push(t.priority||d.DEFAULT_PROMO_PRIORITY);else{if(!l.show_promo())return;for(var a=0;a").attr("id","rtd-detection").attr("class","ethical-rtd").html(" ").appendTo("body"),0===$("#rtd-detection").height()&&(e=!0),$("#rtd-detection").remove(),e}()&&(console.log("---------------------------------------------------------------------------------------"),console.log("Read the Docs hosts documentation for tens of thousands of open source projects."),console.log("We fund our development (we are open source) and operations through advertising."),console.log("We promise to:"),console.log(" - never let advertisers run 3rd party JavaScript"),console.log(" - never sell user data to advertisers or other 3rd parties"),console.log(" - only show advertisements of interest to developers"),console.log("Read more about our approach to advertising here: https://docs.readthedocs.io/en/latest/ethical-advertising.html"),console.log("%cPlease allow our Ethical Ads or go ad-free:","font-size: 2em"),console.log("https://docs.readthedocs.io/en/latest/advertising/ad-blocking.html"),console.log("--------------------------------------------------------------------------------------"),function(){var e=p(),t=null;e&&e.div_id&&(t=$("#"+e.div_id).attr("class","keep-us-sustainable"),$("

    ").text("Support Read the Docs!").appendTo(t),$("

    ").html('Please help keep us sustainable by allowing our Ethical Ads in your ad blocker or go ad-free by subscribing.').appendTo(t),$("

    ").text("Thank you! ❤️").appendTo(t))}())}})}}},{"./constants":14,"./rtd-data":16,bowser:7}],20:[function(e,t,i){var o=e("./rtd-data");t.exports={init:function(e){var t=o.get();if(!e.is_highest){var i=window.location.pathname.replace(t.version,e.slug),n=$('

    Note

    You are not reading the most recent version of this documentation. is the latest version available.

    ');n.find("a").attr("href",i).text(e.slug);var r=$("div.body");r.length||(r=$("div.document")),r.prepend(n)}}}},{"./rtd-data":16}],21:[function(e,t,i){var n=e("./doc-embed/sponsorship"),r=e("./doc-embed/footer.js"),o=(e("./doc-embed/rtd-data"),e("./doc-embed/sphinx")),s=e("./doc-embed/search");$.extend(e("verge")),$(document).ready(function(){r.init(),o.init(),s.init(),n.init()})},{"./doc-embed/footer.js":15,"./doc-embed/rtd-data":16,"./doc-embed/search":17,"./doc-embed/sphinx":18,"./doc-embed/sponsorship":19,verge:13}]},{},[21]); \ No newline at end of file diff --git a/readthedocs/core/templatetags/core_tags.py b/readthedocs/core/templatetags/core_tags.py index c9a5d47e9d8..4a1d1587442 100644 --- a/readthedocs/core/templatetags/core_tags.py +++ b/readthedocs/core/templatetags/core_tags.py @@ -109,6 +109,14 @@ def key(d, key_name): return d[key_name] +@register.filter +def get_key_or_none(d, key_name): + try: + return d[key_name] + except KeyError: + return None + + @register.simple_tag def readthedocs_version(): return __version__ diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 0c7447ab2c4..866dbbcd4db 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1236,8 +1236,6 @@ def get_processed_json(self): file_path, ) return { - 'headers': [], - 'content': '', 'path': file_path, 'title': '', 'sections': [], diff --git a/readthedocs/projects/static/projects/js/tools.js b/readthedocs/projects/static/projects/js/tools.js index ec4ff8a1f40..5e60509d765 100644 --- a/readthedocs/projects/static/projects/js/tools.js +++ b/readthedocs/projects/static/projects/js/tools.js @@ -1 +1 @@ -require=function o(i,a,l){function c(t,e){if(!a[t]){if(!i[t]){var n="function"==typeof require&&require;if(!e&&n)return n(t,!0);if(u)return u(t,!0);var r=new Error("Cannot find module '"+t+"'");throw r.code="MODULE_NOT_FOUND",r}var s=a[t]={exports:{}};i[t][0].call(s.exports,function(e){return c(i[t][1][e]||e)},s,s.exports,o,i,a,l)}return a[t].exports}for(var u="function"==typeof require&&require,e=0;e'),i("body").append(t));var n=e.insertContent(t);i(n).show(),t.show(),i(document).click(function(e){i(e.target).closest("#embed-container").length||(i(n).remove(),t.remove())})}function s(e){var s=this;s.config=e||{},void 0===s.config.api_host&&(s.config.api_host="https://readthedocs.org"),s.help=o.observable(null),s.error=o.observable(null),s.project=o.observable(s.config.project),s.file=o.observable(null),s.sections=o.observableArray(),o.computed(function(){var e=s.file();(s.sections.removeAll(),e)&&(s.help("Loading..."),s.error(null),s.section(null),new r.Embed(s.config).page(s.project(),"latest",s.file(),function(e){s.sections.removeAll(),s.help(null),s.error(null);var t,n=[];for(t in e.sections){var r=e.sections[t];i.each(r,function(e,t){n.push({title:e,id:e})})}s.sections(n)},function(e){s.help(null),s.error("There was a problem retrieving data from the API")}))}),s.has_sections=o.computed(function(){return 0'),i("body").append(t));var n=e.insertContent(t);i(n).show(),t.show(),i(document).click(function(e){i(e.target).closest("#embed-container").length||(i(n).remove(),t.remove())})}function s(e){var s=this;s.config=e||{},void 0===s.config.api_host&&(s.config.api_host="https://readthedocs.org"),s.help=o.observable(null),s.error=o.observable(null),s.project=o.observable(s.config.project),s.file=o.observable(null),s.sections=o.observableArray(),o.computed(function(){var e=s.file();(s.sections.removeAll(),e)&&(s.help("Loading..."),s.error(null),s.section(null),new r.Embed(s.config).page(s.project(),"latest",s.file(),function(e){s.sections.removeAll(),s.help(null),s.error(null);var t,n=[];for(t in e.sections){var r=e.sections[t];i.each(r,function(e,t){n.push({title:e,id:e})})}s.sections(n)},function(e){s.help(null),s.error("There was a problem retrieving data from the API")}))}),s.has_sections=o.computed(function(){return 0') + self.assertTrue(data['sections'][1]['content'].startswith( + 'You can use Slumber' + )) + self.assertEqual(data['title'], 'Read the Docs Public API') + self.assertTrue(len(data['sections']) > 0, 'There are many sections for the processed file') + + # There should be no new line character present + for section in data['sections']: + self.assertFalse('\n' in section['content']) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 57c8efb9186..3ccd6d18190 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,4 +1,6 @@ +import itertools import logging +from operator import attrgetter from pprint import pformat from rest_framework import generics, serializers @@ -6,7 +8,7 @@ from rest_framework.pagination import PageNumberPagination from readthedocs.search.faceted_search import PageSearch -from readthedocs.search.utils import get_project_list_or_404 +from readthedocs.search import utils log = logging.getLogger(__name__) @@ -25,6 +27,7 @@ class PageSearchSerializer(serializers.Serializer): path = serializers.CharField() link = serializers.SerializerMethodField() highlight = serializers.SerializerMethodField() + inner_hits = serializers.SerializerMethodField() def get_link(self, obj): projects_url = self.context.get('projects_url') @@ -35,14 +38,37 @@ def get_link(self, obj): def get_highlight(self, obj): highlight = getattr(obj.meta, 'highlight', None) if highlight: - if hasattr(highlight, 'content'): - # Change results to turn newlines in highlight into periods - # https://github.com/rtfd/readthedocs.org/issues/5168 - highlight.content = [result.replace('\n', '. ') for result in highlight.content] - ret = highlight.to_dict() - log.debug('API Search highlight: %s', pformat(ret)) + ret = utils._remove_newlines_from_dict(highlight.to_dict()) + log.debug('API Search highlight [Page title]: %s', pformat(ret)) return ret + def get_inner_hits(self, obj): + inner_hits = getattr(obj.meta, 'inner_hits', None) + if inner_hits: + sections = inner_hits.sections or [] + domains = inner_hits.domains or [] + all_results = itertools.chain(sections, domains) + + sorted_results = [ + { + 'type': hit._nested.field, + '_source': hit._source.to_dict(), + 'highlight': self._get_inner_hits_highlights(hit), + } + for hit in sorted(all_results, key=attrgetter('_score'), reverse=True) + ] + + return sorted_results + + def _get_inner_hits_highlights(self, hit): + """Removes new lines from highlight and log it.""" + highlight_dict = utils._remove_newlines_from_dict( + hit.highlight.to_dict() + ) + + log.debug('API Search highlight: %s', pformat(highlight_dict)) + return highlight_dict + class PageSearchAPIView(generics.ListAPIView): @@ -112,7 +138,7 @@ def get_all_projects(self): """ project_slug = self.request.query_params.get('project') version_slug = self.request.query_params.get('version') - all_projects = get_project_list_or_404( + all_projects = utils.get_project_list_or_404( project_slug=project_slug, user=self.request.user, version_slug=version_slug, ) return all_projects diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 5e9d950f272..a09a78aed80 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -6,7 +6,6 @@ from elasticsearch import Elasticsearch from readthedocs.projects.models import HTMLFile, Project -from readthedocs.sphinx_domains.models import SphinxDomain project_conf = settings.ES_INDEXES['project'] @@ -17,10 +16,6 @@ page_index = Index(page_conf['name']) page_index.settings(**page_conf['settings']) -domain_conf = settings.ES_INDEXES['domain'] -domain_index = Index(domain_conf['name']) -domain_index.settings(**domain_conf['settings']) - log = logging.getLogger(__name__) @@ -35,46 +30,6 @@ def update(self, *args, **kwargs): super().update(*args, **kwargs) -@domain_index.doc_type -class SphinxDomainDocument(RTDDocTypeMixin, DocType): - project = fields.KeywordField(attr='project.slug') - version = fields.KeywordField(attr='version.slug') - role_name = fields.KeywordField(attr='role_name') - - # For linking to the URL - doc_name = fields.KeywordField(attr='doc_name') - anchor = fields.KeywordField(attr='anchor') - - # For showing in the search result - type_display = fields.TextField(attr='type_display') - doc_display = fields.TextField(attr='doc_display') - - # Simple analyzer breaks on `.`, - # otherwise search results are too strict for this use case - name = fields.TextField(attr='name', analyzer='simple') - display_name = fields.TextField(attr='display_name', analyzer='simple') - - modified_model_field = 'modified' - - class Meta: - model = SphinxDomain - fields = ('commit', 'build') - ignore_signals = True - - def get_queryset(self): - """Overwrite default queryset to filter certain files to index.""" - queryset = super().get_queryset() - - excluded_types = [ - {'domain': 'std', 'type': 'doc'}, - {'domain': 'std', 'type': 'label'}, - ] - - for exclude in excluded_types: - queryset = queryset.exclude(**exclude) - return queryset - - @project_index.doc_type class ProjectDocument(RTDDocTypeMixin, DocType): @@ -120,8 +75,32 @@ class PageDocument(RTDDocTypeMixin, DocType): # Searchable content title = fields.TextField(attr='processed_json.title') - headers = fields.TextField(attr='processed_json.headers') - content = fields.TextField(attr='processed_json.content') + sections = fields.NestedField( + attr='processed_json.sections', + properties={ + 'id': fields.KeywordField(), + 'title': fields.TextField(), + 'content': fields.TextField(), + } + ) + domains = fields.NestedField( + properties={ + 'role_name': fields.KeywordField(), + + # For linking to the URL + 'doc_name': fields.KeywordField(), + 'anchor': fields.KeywordField(), + + # For showing in the search result + 'type_display': fields.TextField(), + 'doc_display': fields.TextField(), + + # Simple analyzer breaks on `.`, + # otherwise search results are too strict for this use case + 'name': fields.TextField(analyzer='simple'), + 'display_name': fields.TextField(analyzer='simple'), + } + ) modified_model_field = 'modified_date' @@ -130,6 +109,28 @@ class Meta: fields = ('commit', 'build') ignore_signals = True + def prepare_domains(self, html_file): + """Prepares and returns the values for domains field.""" + domains_qs = html_file.sphinx_domains.exclude( + domain='std', + type__in=['doc', 'label'] + ).iterator() + + all_domains = [ + { + 'role_name': domain.role_name, + 'doc_name': domain.doc_name, + 'anchor': domain.anchor, + 'type_display': domain.type_display, + 'doc_display': domain.doc_display, + 'name': domain.name, + 'display_name': domain.display_name if domain.display_name != '-' else '', + } + for domain in domains_qs + ] + + return all_domains + @classmethod def faceted_search( cls, query, user, projects_list=None, versions_list=None, diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 673a4fe6948..3653974a8b8 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -2,7 +2,8 @@ from elasticsearch import Elasticsearch from elasticsearch_dsl import FacetedSearch, TermsFacet -from elasticsearch_dsl.query import Bool, SimpleQueryString +from elasticsearch_dsl.faceted_search import NestedFacet +from elasticsearch_dsl.query import Bool, SimpleQueryString, Nested, Match from django.conf import settings @@ -10,7 +11,6 @@ from readthedocs.search.documents import ( PageDocument, ProjectDocument, - SphinxDomainDocument, ) @@ -92,11 +92,26 @@ class ProjectSearchBase(RTDFacetedSearch): class PageSearchBase(RTDFacetedSearch): facets = { 'project': TermsFacet(field='project'), - 'version': TermsFacet(field='version') + 'version': TermsFacet(field='version'), + 'role_name': NestedFacet( + 'domains', + TermsFacet(field='domains.role_name') + ), } doc_types = [PageDocument] index = PageDocument._doc_type.index - fields = ['title^10', 'headers^5', 'content'] + + _outer_fields = ['title^4'] + _section_fields = ['sections.title^3', 'sections.content'] + _domain_fields = [ + 'domains.type_display', + 'domains.name^2', + 'domains.display_name', + ] + fields = _outer_fields + + # need to search for both 'and' and 'or' operations + # the score of and should be higher as it satisfies both or and and operators = ['and', 'or'] def count(self): @@ -109,17 +124,77 @@ def count(self): s = s.execute() return s.hits.total + def query(self, search, query): + """Manipulates query to support nested query.""" + search = search.highlight_options(encoder='html', number_of_fragments=1) + + # match query for the title (of the page) field. + match_title_query = Match(title=query) + + # nested query for search in sections + sections_nested_query = self.generate_nested_query( + query=query, + path='sections', + fields=self._section_fields, + inner_hits={ + 'highlight': { + 'encoder': 'html', + 'number_of_fragments': 1, + 'fields': { + 'sections.title': {}, + 'sections.content': {}, + } + } + } + ) + + # nested query for search in domains + domains_nested_query = self.generate_nested_query( + query=query, + path='domains', + fields=self._domain_fields, + inner_hits={ + 'highlight': { + 'encoder': 'html', + 'number_of_fragments': 1, + 'fields': { + 'domains.type_display': {}, + 'domains.name': {}, + 'domains.display_name': {}, + } + } + } + ) + + final_query = Bool(should=[ + match_title_query, + sections_nested_query, + domains_nested_query, + ]) + + search = search.query(final_query) + return search -class DomainSearchBase(RTDFacetedSearch): - facets = { - 'project': TermsFacet(field='project'), - 'version': TermsFacet(field='version'), - 'role_name': TermsFacet(field='role_name'), - } - doc_types = [SphinxDomainDocument] - index = SphinxDomainDocument._doc_type.index - fields = ('display_name^5', 'name^3', 'project^3', 'type_display') - operators = ['and'] + def generate_nested_query(self, query, path, fields, inner_hits): + """Generate a nested query with passed parameters.""" + queries = [] + + for operator in self.operators: + query_string = SimpleQueryString( + query=query, + fields=fields, + default_operator=operator + ) + queries.append(query_string) + + bool_query = Bool(should=queries) + + nested_query = Nested( + path=path, + inner_hits=inner_hits, + query=bool_query + ) + return nested_query class PageSearch(SettingsOverrideObject): @@ -142,39 +217,3 @@ class ProjectSearch(SettingsOverrideObject): """ _default_class = ProjectSearchBase - - -class DomainSearch(SettingsOverrideObject): - - """ - Allow this class to be overridden based on CLASS_OVERRIDES setting. - - This is primary used on the .com to adjust how we filter our search queries - """ - - _default_class = DomainSearchBase - - -class AllSearch(RTDFacetedSearch): - - """ - Simplfy for testing. - - It has some UI/UX problems that need to be addressed. - """ - - facets = { - 'project': TermsFacet(field='project'), - 'version': TermsFacet(field='version'), - 'language': TermsFacet(field='language'), - 'role_name': TermsFacet(field='role_name'), - # Need to improve UX here for exposing to users - # 'index': TermsFacet(field='_index'), - } - doc_types = [SphinxDomainDocument, PageDocument, ProjectDocument] - index = [SphinxDomainDocument._doc_type.index, - PageDocument._doc_type.index, - ProjectDocument._doc_type.index] - fields = ('title^10', 'headers^5', 'content', 'name^20', - 'slug^5', 'description', 'display_name^5') - operators = ['and'] diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py index 32f9a4c8534..7c0ea6982cf 100644 --- a/readthedocs/search/management/commands/reindex_elasticsearch.py +++ b/readthedocs/search/management/commands/reindex_elasticsearch.py @@ -10,7 +10,6 @@ from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index, index_missing_objects) -from ...utils import get_chunk log = logging.getLogger(__name__) @@ -19,17 +18,32 @@ class Command(BaseCommand): @staticmethod def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_class): - total = queryset.count() - chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE) - - for chunk in chunks: - data = { - 'app_label': app_label, - 'model_name': model_name, - 'document_class': document_class, - 'index_name': index_name, - 'chunk': chunk - } + chunk_size = settings.ES_TASK_CHUNK_SIZE + qs_iterator = queryset.only('pk').iterator() + is_iterator_empty = False + + data = { + 'app_label': app_label, + 'model_name': model_name, + 'document_class': document_class, + 'index_name': index_name, + } + + while not is_iterator_empty: + objects_id = [] + + try: + for _ in range(chunk_size): + pk = next(qs_iterator).pk + objects_id.append(pk) + + if pk % 5000 == 0: + log.info('Total: %s', pk) + + except StopIteration: + is_iterator_empty = True + + data['objects_id'] = objects_id yield index_objects_to_es.si(**data) def _run_reindex_tasks(self, models, queue): diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index a3593056204..b71e4d45d0d 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -10,17 +10,6 @@ log = logging.getLogger(__name__) -def process_headers(data, filename): - """Read headers from toc data.""" - headers = [] - if data.get('toc', False): - for element in PyQuery(data['toc'])('a'): - headers.append(recurse_while_none(element)) - if None in headers: - log.info('Unable to index file headers for: %s', filename) - return headers - - def generate_sections_from_pyquery(body): """Given a pyquery object, generate section dicts for each section.""" # Capture text inside h1 before the first h2 @@ -35,13 +24,14 @@ def generate_sections_from_pyquery(body): if next_p[0].tag == 'div' and 'class' in next_p[0].attrib: if 'section' in next_p[0].attrib['class']: break - h1_content += '\n%s\n' % next_p.html() + + h1_content += parse_content(next_p.text()) next_p = next_p.next() if h1_content: yield { 'id': h1_id, 'title': h1_title, - 'content': h1_content, + 'content': h1_content.replace('\n', '. '), } # Capture text inside h2's @@ -51,7 +41,10 @@ def generate_sections_from_pyquery(body): header = section_list.eq(num) title = header.text().replace('¶', '').strip() section_id = div.attr('id') - content = div.html() + + content = div.text() + content = parse_content(content) + yield { 'id': section_id, 'title': title, @@ -71,7 +64,6 @@ def process_file(fjson_filename): sections = [] path = '' title = '' - body_content = '' if 'current_page_name' in data: path = data['current_page_name'] @@ -80,7 +72,6 @@ def process_file(fjson_filename): if data.get('body'): body = PyQuery(data['body']) - body_content = body.text().replace('¶', '') sections.extend(generate_sections_from_pyquery(body)) else: log.info('Unable to index content for: %s', fjson_filename) @@ -93,24 +84,27 @@ def process_file(fjson_filename): log.info('Unable to index title for: %s', fjson_filename) return { - 'headers': process_headers(data, fjson_filename), - 'content': body_content, 'path': path, 'title': title, 'sections': sections, } -def recurse_while_none(element): +def parse_content(content): """ - Traverse the ``element`` until a non-None text is found. - - :param element: element to traverse until get a non-None text. - :type element: pyquery.PyQuery + Removes the starting text and ¶. - :returns: the first non-None value found - :rtype: str + It removes the starting text from the content + because it contains the title of that content, + which is redundant here. """ - if element.text is None: - return recurse_while_none(element.getchildren()[0]) - return element.text + content = content.replace('¶', '').strip() + + # removing the starting text of each + content = content.split('\n') + if len(content) > 1: # there were \n + content = content[1:] + + # converting newlines to ". " + content = '. '.join([text.strip().rstrip('.') for text in content]) + return content diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py index 8fa30478501..5f29c596235 100644 --- a/readthedocs/search/tests/conftest.py +++ b/readthedocs/search/tests/conftest.py @@ -8,6 +8,8 @@ from readthedocs.projects.models import Project, HTMLFile from readthedocs.search.documents import PageDocument +from readthedocs.sphinx_domains.models import SphinxDomain + from .dummy_data import ALL_PROJECTS, PROJECT_DATA_FILES @@ -32,6 +34,28 @@ def all_projects(es_index, mock_processed_json, db, settings): file_name = file_basename + '.html' version = project.versions.all()[0] html_file = G(HTMLFile, project=project, version=version, name=file_name) + + # creating sphinx domain test objects + file_path = get_json_file_path(project.slug, file_basename) + if os.path.exists(file_path): + with open (file_path) as f: + data = json.load(f) + domains = data['domains'] + + for domain_data in domains: + domain_role_name = domain_data.pop('role_name') + domain, type_ = domain_role_name.split(':') + + G( + SphinxDomain, + project=project, + version=version, + html_file=html_file, + domain=domain, + type=type_, + **domain_data + ) + PageDocument().update(html_file) projects_list.append(project) @@ -46,12 +70,17 @@ def project(all_projects): return all_projects[0] +def get_json_file_path(project_slug, basename): + current_path = os.path.abspath(os.path.dirname(__file__)) + file_name = f'{basename}.json' + file_path = os.path.join(current_path, 'data', project_slug, file_name) + return file_path + + def get_dummy_processed_json(instance): project_slug = instance.project.slug basename = os.path.splitext(instance.name)[0] - file_name = basename + '.json' - current_path = os.path.abspath(os.path.dirname(__file__)) - file_path = os.path.join(current_path, "data", project_slug, file_name) + file_path = get_json_file_path(project_slug, basename) if os.path.exists(file_path): with open(file_path) as f: diff --git a/readthedocs/search/tests/data/docs/story.json b/readthedocs/search/tests/data/docs/story.json deleted file mode 100644 index 2c998bcec3b..00000000000 --- a/readthedocs/search/tests/data/docs/story.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "content": "ReadtheDocsPhilosophy\nRead the Docs is Open Source software. We have licensed the code base as MIT, which provides almost no restrictions on the use of the code.\nHowever, as a project there are things that we care about more than others. We built Read the Docs to support in the Open Source community. The code is open for people to contribute to, so that they may build features into https://readthedocs.org that they want.

    XSS exploit

    We also believe sharing the code openly is a valuable learning tool, especially for demonsrating how to collaborate and maintain an enormous website.\nOfficial website Support\nThe time of the core developers of Read the Docs is limited. We provide official developers support for the following things:\nLocal development on the Python code base\nUsage of https://readthedocs.org for Open Source projects\nBug fixes in the code base, as it applies to running it on https://readthedocs.org\nUnsupported\nThere are use cases that we don\u2019t support, because it doesn\u2019t further our goal of promoting in the Open Source Community.\nWe do not support:\nSpecific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting\nCustom s of Read the Docs at your company\n of Read the Docs on other platforms\nAny issues outside of the Read the Docs Python Code\nRationale\nRead the Docs was founded to improve in the Open Source Community. We fully recognize and allow the code to be used for internal installs at companies, but we will not spend our time supporting it. Our time is limited, and we want to spend it on the mission that we set out to originally support.\nIf you feel strongly about installing Read the Docs internal to a company, we will happily link to third party resources on this topic. Please open an issue with a proposal if you want to take on this task.", - "headers": [ - "Unsupported", - "Rationale" - ], - "title": "Philosophy", - "sections": [ - { - "content": "\nRead the Docs is Open Source software.\nWe have licensed the code base as MIT,\nwhich provides almost no restrictions on the use of the code.\n\nHowever,\nas a project there are things that we care about more than others.\nWe built Read the Docs to support in the Open Source community.\nThe code is open for people to contribute to,\nso that they may build features into https://readthedocs.org that they want.\nWe also believe sharing the code openly is a valuable learning tool,\nespecially for demonsrating how to collaborate and maintain an enormous website.\n", - "id": "read-the-docs-open-source-philosophy", - "title": "Read the Docs Open Source Philosophy" - }, - { - "content": "\n

    Official Support\u00b6

    \n

    The time of the core developers of Read the Docs is limited.\nWe provide official support for the following things:

    \n\n", - "id": "official-support", - "title": "Official Support" - }, - { - "content": "\n

    Unsupported\u00b6

    \n

    There are use cases that we don\u2019t support,\nbecause it doesn\u2019t further our goal of promoting in the Open Source Community.

    \n

    We do not support:

    \n
      \n
    • Specific usage of Sphinx and Mkdocs, that don\u2019t affect our hosting
    • \n
    • Custom of Read the Docs at your company
    • \n
    • of Read the Docs on other platforms
    • \n
    • Any issues outside of the Read the Docs Python Code
    • \n
    \n", - "id": "unsupported", - "title": "Unsupported" - }, - { - "content": "\n

    Rationale\u00b6

    \n

    Read the Docs was founded to improve in the Open Source Community.\nWe fully recognize and allow the code to be used for internal installs at companies,\nbut we will not spend our time supporting it.\nOur time is limited,\nand we want to spend it on the mission that we set out to originally support.

    \n

    If you feel strongly about installing Read the Docs internal to a company,\nwe will happily link to third party resources on this topic.\nPlease open an issue with a proposal if you want to take on this task.

    \n", - "id": "rationale", - "title": "Rationale" - } - ], - "path": "open-source-philosophy" -} diff --git a/readthedocs/search/tests/data/docs/support.json b/readthedocs/search/tests/data/docs/support.json new file mode 100644 index 00000000000..265041504ad --- /dev/null +++ b/readthedocs/search/tests/data/docs/support.json @@ -0,0 +1,41 @@ +{ + "path": "support", + "title": "Support", + "sections": [ + { + "id": "usage-questions", + "title": "Usage Questions", + "content": "If you have questions about how to use Read the Docs, or have an issue that isn’t related to a bug, Stack Overflow is the best place to ask. Tag questions with read-the-docs so other folks can find them easily.. Good questions for Stack Overflow would be:. “What is the best way to structure the table of contents across a project?”. “How do I structure translations inside of my project for easiest contribution from users?”. “How do I use Sphinx to use SVG images in HTML output but PNG in PDF output?”" + }, + { + "id": "community-support", + "title": "Community Support", + "content": "Read the Docs is supported by community contributions and advertising. We hope to bring in enough money with our Gold and Ethical Ads programs to keep Read the Docs sustainable.. All people answering your questions are doing it with their own time, so please be kind and provide as much information as possible.. Bugs & Support Issues. You can file bug reports on our GitHub issue tracker, and they will be addressed as soon as possible. Support is a volunteer effort, and there is no guaranteed response time. If you need answers quickly, you can buy commercial support below.. Reporting Issues. When reporting a bug, please include as much information as possible that will help us solve this issue. This includes:. Project name. URL. Action taken. Expected result. Actual result. Specific Requests. If you need a specific request for your project or account, like more resources, change of the project’s slug or username. Send an email to support@readthedocs.org." + }, + { + "id": "commercial-support", + "title": "Commercial Support", + "content": "We offer commercial support for Read the Docs,

    XSS exploit

    commercial hosting, as well as consulting around all documentation systems. You can contact us at hello@readthedocs.com to learn more, or read more at https://readthedocs.com/services/#open-source-support." + } + ], + "domains": [ + { + "role_name": "http:post", + "doc_name": "api/v3.html", + "anchor": "post--api-v3-projects-(string-project_slug)-versions-(string-version_slug)-builds-", + "type_display": "post", + "doc_display": "API v3", + "name": "/api/v3/projects/(string:project_slug)/versions/(string:version_slug)/builds/", + "display_name": "" + }, + { + "role_name": "http:patch", + "doc_name": "api/v3.html", + "anchor": "patch--api-v3-projects-(string-project_slug)-version-(string-version_slug)-", + "type_display": "patch", + "doc_display": "API v3", + "name": "/api/v3/projects/(string:project_slug)/version/(string:version_slug)/", + "display_name": "" + } + ] +} diff --git a/readthedocs/search/tests/data/docs/wiping.json b/readthedocs/search/tests/data/docs/wiping.json index 50b3ee1658e..b5a269551d6 100644 --- a/readthedocs/search/tests/data/docs/wiping.json +++ b/readthedocs/search/tests/data/docs/wiping.json @@ -1,15 +1,49 @@ { - "content": "ReadtheDocsWiping a Build Environment\nSometimes it happen that your Builds start failing because the build environment where the is created is stale or broken. This could happen for a couple of different reasons like pip not upgrading a package properly or a corrupted cached Python package.\nIn any of these cases (and many others), the solution could be just wiping out the existing build environment files and allow Read the Docs to create a new fresh one.\nFollow these steps to wipe the build environment:\nGo to Versions\nClick on the Edit button of the version you want to wipe on the right side of the page\nGo to the bottom of the page and click the wipe link, next to the \u201cSave\u201d button\nNote\nBy wiping the build environment, all the rst, md, and code files associated with it will be removed but not the already built (HTML and PDF files). Your will still online after wiping the build environment.\nNow you can re-build the version with a fresh build environment!", - "headers": [ - "Wiping a Build Environment" - ], - "title": "Wiping a Build Environment", - "sections": [ - { - "content": "\nSometimes it happen that your Builds start failing because the build\nenvironment where the is created is stale or\nbroken. This could happen for a couple of different reasons like pip\nnot upgrading a package properly or a corrupted cached Python package.\n\nIn any of these cases (and many others), the solution could be just\nwiping out the existing build environment files and allow Read the\nDocs to create a new fresh one.\n\nFollow these steps to wipe the build environment:\n\n\n
  • Go to Versions
  • \n
  • Click on the Edit button of the version you want to wipe on the\nright side of the page
  • \n
  • Go to the bottom of the page and click the wipe link, next to\nthe \u201cSave\u201d button
  • \n\n\n\n

    Note

    \n

    By wiping the build environment, all the rst, md,\nand code files associated with it will be removed but not the\n already built (HTML and PDF files). Your\n will still online after wiping the build environment.

    \n\n\nNow you can re-build the version with a fresh build environment!\n", - "id": "wiping-a-build-environment", - "title": "Wiping a Build Environment" - } - ], - "path": "guides/wipe-environment" + "path": "guides/wipe-environment", + "title": "Wiping a Build Environment", + "sections": [ + { + "id": "wiping-a-build-environment", + "title": "Wiping a Build Environment", + "content": "Sometimes it happen that your Builds start failing because the build environment where the documentation is created is stale or broken. This could happen for a couple of different reasons like pip not upgrading a package properly or a corrupted cached Python package.In any of these cases (and many others), the solution could be just wiping out the existing build environment files and allow Read the Docs to create a new fresh one.Follow these steps to wipe the build environment:Click on the Edit button of the version you want to wipe on the right side of the page. Go to the bottom of the page and click the wipe link, next to the “Save” buttonBy wiping the documentation build environment, all the rst, md, and code files associated with it will be removed but not the documentation already built (HTML and PDF files). Your documentation will still online after wiping the build environment.Now you can re-build the version with a fresh build environment! This is a test line which contains the word 'Elasticsearch Query'." + } + ], + "domains": [ + { + "role_name": "http:get", + "doc_name": "api/v3.html", + "anchor": "get--api-v3-users-(str-username)", + "type_display": "get", + "doc_display": "API v3", + "name": "/api/v3/users/(str:username)", + "display_name": "" + }, + { + "role_name": "http:get", + "doc_name": "api/v3.html", + "anchor": "get--api-v3-projects-(string-project_slug)-versions-(string-version_slug)-", + "type_display": "get", + "doc_display": "API v3", + "name": "/api/v3/projects/(string:project_slug)/versions/(string:version_slug)/", + "display_name": "" + }, + { + "role_name": "http:get", + "doc_name": "api/v3.html", + "anchor": "get--api-v3-projects-(string-project_slug)-versions-", + "type_display": "get", + "doc_display": "API v3", + "name": "/api/v3/projects/(string:project_slug)/versions/", + "display_name": "" + }, + { + "role_name": "http:get", + "doc_name": "api/v3.html", + "anchor": "get--api-v3-projects-(string-project_slug)-", + "type_display": "get", + "doc_display": "API v3", + "name": "/api/v3/projects/(string:project_slug)/", + "display_name": "" + } + ] } diff --git a/readthedocs/search/tests/data/kuma/docker.json b/readthedocs/search/tests/data/kuma/docker.json index 16f4f1e7434..b91a0c420a3 100644 --- a/readthedocs/search/tests/data/kuma/docker.json +++ b/readthedocs/search/tests/data/kuma/docker.json @@ -1,25 +1,36 @@ { - "content": "kumadocker Docker is used for development and (soon) for deployment.\nDocker Images\nDocker images are used in development, usually with the local working files mounted in the images to set behaviour.\nImages are built by Jenkins, after tests pass, and are published to quay.io. We try to store the configuration in the environment, so that the published images can be used in deployments by setting environment variables to deployment-specific values.\nHere are some of the images used in the Kuma project:\nkuma\nThe kuma Docker image builds on the kuma_base image, installing a kuma branch and building the assets needed for running as a webservice. The environment can be customized for different deployments.\nThe image can be recreated locally with make build-kuma.\nThe image tagged latest is used by default for development. It can be created locally with make build-kuma VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io.\nkuma_base\nThe kuma_base Docker image contains the OS and libraries (C, Python, and Node.js) that support the kuma project. The kuma image extends this by installing the kuma source and building assets needed for production.\nThe image can be recreated locally with make build-base.\nThe image tagged latest is used by default for development. It can be created localled with make build-base VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io\nkumascript\nThe kumascript Docker image contains the kumascript rendering engine and support files. The environment can be customized for different deployments.\nThe image can be recreated locally with make build-kumascript.\nThe image tagged latest is used by default for development. It can be created locally with make build-kumascript KS_VERSION=latest. The latest image is created from the master branch in Jenkins and published to quay.io.\nintegration-tests\nThe integration-tests Docker image contains browser-based integration tests that check the functionality of a running Kuma deployment.\nThe image can be recreated locally with docker build -f docker/images/integration-tests/ ., but this is only necessary for image development. Most developer will follow the Client-side testing to develop and run these integration tests.\nThe image is built and used in Jenkins in the stage-integration-tests and prod-integration-tests pipelines, configured by scripts in the Jenkinsfiles folder. It is not published to quay.io.", - "headers": [ - "Docker", - "Docker Images", - "kuma", - "kuma_base", - "kumascript", - "integration-tests" - ], - "title": "Docker", - "sections": [ - { - "content": "\nDocker is used for development and (soon) for deployment.\n", - "id": "docker", - "title": "Docker" - }, - { - "content": "\n

    Docker Images\u00b6

    \n

    Docker images are used in development, usually with the local\nworking files mounted in the images to set behaviour.

    \n

    Images are built by Jenkins, after tests pass, and are\npublished to quay.io. We try to\nstore the configuration in the environment, so that the\npublished images can be used in deployments by setting\nenvironment variables to deployment-specific values.

    \n

    Here are some of the images used in the Kuma project:

    \n
    \n

    kuma\u00b6

    \n

    The kuma Docker image builds on the kuma_base image, installing a kuma branch\nand building the assets needed for running as a webservice. The environment\ncan be customized for different deployments.

    \n

    The image can be recreated locally with make build-kuma.

    \n

    The image tagged latest is used by default for development. It can be\ncreated locally with make build-kuma VERSION=latest. The latest\nimage is created from the master branch in Jenkins and published to\nquay.io.

    \n
    \n
    \n

    kuma_base\u00b6

    \n

    The kuma_base Docker image contains the OS and libraries (C, Python, and\nNode.js) that support the kuma project. The kuma image extends this by\ninstalling the kuma source and building assets needed for production.

    \n

    The image can be recreated locally with make build-base.

    \n

    The image tagged latest is used by default for development. It can be\ncreated localled with make build-base VERSION=latest. The \nlatest image is created from the master branch in Jenkins and published to\nquay.io

    \n
    \n
    \n

    kumascript\u00b6

    \n

    The kumascript Docker image contains the kumascript rendering engine and\nsupport files. The environment can be customized for different deployments.

    \n

    The image can be recreated locally with make build-kumascript.

    \n

    The image tagged latest is used by default for development. It can be\ncreated locally with make build-kumascript KS_VERSION=latest. The \nlatest image is created from the master branch in Jenkins and published to\nquay.io.

    \n
    \n
    \n

    integration-tests\u00b6

    \n

    The integration-tests Docker image contains browser-based integration tests\nthat check the functionality of a running Kuma deployment.

    \n

    The image can be recreated locally with\ndocker build -f docker/images/integration-tests/ ., but this is only\nnecessary for image development. Most developer will follow the\nClient-side testing to develop and run these integration tests.

    \n

    The image is built and used in Jenkins in the stage-integration-tests and\nprod-integration-tests pipelines, configured by scripts in the\nJenkinsfiles folder. It is not published to quay.io.

    \n
    \n", - "id": "docker-images", - "title": "Docker Images" - } - ], - "path": "docker" + "path": "docker", + "title": "Docker", + "sections": [ + { + "id": "docker", + "title": "Docker", + "content": "Docker is used for development and for deployment. This is a test line which contains the word 'Elasticsearch'." + }, + { + "id": "docker-images", + "title": "Docker Images", + "content": "Docker images are used in development, usually with the local working files mounted in the images to set behaviour.. Images are built by Jenkins, after tests pass, and are published to DockerHub. We try to store the configuration in the environment, so that the published images can be used in deployments by setting environment variables to deployment-specific values.. Here are some of the images used in the Kuma project:. kuma. The kuma Docker image builds on the kuma_base image, installing a kuma branch and building the assets needed for running as a webservice. The environment can be customized for different deployments.. The image can be recreated locally with make build-kuma.. The image tagged latest is used by default for development. It can be created locally with make build-kuma VERSION=latest. The official latest image is created from the master branch in Jenkins and published to DockerHub.. kuma_base. The kuma_base Docker image contains the OS and libraries (C, Python, and Node.js) that testSupport the kuma project. The kuma image extends this by installing the kuma source and building assets needed for production.. The image can be recreated locally with make build-base.. The image tagged latest is used by default for development. It can be created localled with make build-base VERSION=latest. The official latest image is created from the master branch in Jenkins and published to DockerHub. kumascript. The kumascript Docker image contains the kumascript rendering engine and testSupport files. The environment can be customized for different deployments.. The image can be recreated locally with make build-kumascript.. The image tagged latest is used by default for development. It can be created locally with make build-kumascript KS_VERSION=latest. The official latest image is created from the master branch in Jenkins and published to DockerHub.. integration-tests. The integration-tests Docker image contains browser-based integration tests that check the functionality of a running Kuma deployment.. The image can be recreated locally with docker build -f docker/images/integration-tests/ ., but this is only necessary for image development. Most developers will follow the Client-side testing to develop and run these integration tests.. The image is built and used in Jenkins in the stage-integration-tests and prod-integration-tests pipelines, configured by scripts in the Jenkinsfiles folder. It is not published to DockerHub." + } + ], + "domains": [ + { + "role_name": "py:module", + "doc_name": "autoapi/notfound/utils/index.html", + "anchor": "module-notfound.utils", + "type_display": "module", + "doc_display": "notfound.utils", + "name": "notfound.utils", + "display_name": "" + }, + { + "role_name": "py:function", + "doc_name": "autoapi/notfound/utils/index.html", + "anchor": "notfound.utils.replace_uris", + "type_display": "function", + "doc_display": "notfound.utils", + "name": "notfound.utils.replace_uris", + "display_name": "" + } + ] } diff --git a/readthedocs/search/tests/data/kuma/documentation.json b/readthedocs/search/tests/data/kuma/documentation.json index 1dcefa8c2c3..3f0969bcf1c 100644 --- a/readthedocs/search/tests/data/kuma/documentation.json +++ b/readthedocs/search/tests/data/kuma/documentation.json @@ -1,21 +1,36 @@ { - "content": "kumadocumentation This documentation is generated and published at Read the Docs whenever the master branch is updated. GitHub can render our .rst documents as ReStructuredText, which is close enough to Sphinx for most code reviews, without features like links between documents.\nIt is occasionally necessary to generate the documentation locally. It is easiest to do this with a virtualenv on the host system, using only to regenerate the MDN Sphinx template. If you are not comfortable with that style of development, it can be done entirely in using -compose.\nGenerating documentation\nSphinx uses a Makefile in the docs subfolder to build documentation in several formats. MDN only uses the HTML format, and the generated document index is at docs/_build/html/index.html.\nTo generate the documentation in a virtualenv on the host machine, first install the requirements:\npip install -r requirements/docs.txt\nThen switch to the docs folder to use the Makefile:\ncd docs make html python -m webbrowser file://${PWD}/_build/html/index.html\nTo generate the documentation with :\n-compose run --rm --user $(id -u) web sh -c \"\\ virtualenv /tmp/.venvs/docs && \\ . /tmp/.venvs/docs/bin/activate && \\ pip install -r /app/requirements/docs.txt && \\ cd /app/docs && \\ make html\" python -m webbrowser file://${PWD}/docs/_build/html/index.html\nA virtualenv is required, to avoid a pip bug when changing the version of a system-installed package.", - "headers": [ - "Documentation", - "Generating documentation" - ], - "title": "Documentation", - "sections": [ - { - "content": "\nThis documentation is generated and published at\nRead the Docs whenever the master branch is updated.\n\nGitHub can render our .rst documents as ReStructuredText, which is\nclose enough to Sphinx for most code reviews, without features like links\nbetween documents.\n\nIt is occasionally necessary to generate the documentation locally. It is\neasiest to do this with a virtualenv on the host system, using only to\nregenerate the MDN Sphinx template. If you are not comfortable with that style\nof development, it can be done entirely in using -compose.\n", - "id": "documentation", - "title": "Documentation" - }, - { - "content": "\n

    Generating documentation\u00b6

    \n

    Sphinx uses a Makefile in the docs subfolder to build documentation in\nseveral formats. MDN only uses the HTML format, and the generated document\nindex is at docs/_build/html/index.html.

    \n

    To generate the documentation in a virtualenv on the host machine, first\ninstall the requirements:

    \n
    pip install -r requirements/docs.txt\n
    \n
    \n

    Then switch to the docs folder to use the Makefile:

    \n
    cd docs\nmake html\npython -m webbrowser file://${PWD}/_build/html/index.html\n
    \n
    \n

    To generate the documentation with :

    \n
    -compose run --rm --user $(id -u) web sh -c \"\\\n  virtualenv /tmp/.venvs/docs && \\\n  . /tmp/.venvs/docs/bin/activate && \\\n  pip install -r /app/requirements/docs.txt && \\\n  cd /app/docs && \\\n  make html\"\npython -m webbrowser file://${PWD}/docs/_build/html/index.html\n
    \n
    \n

    A virtualenv is required, to avoid a pip bug when changing the version\nof a system-installed package.

    \n", - "id": "generating-documentation", - "title": "Generating documentation" - } - ], - "path": "documentation" + "path": "testdocumentation", + "title": "TestDocumentation", + "sections": [ + { + "id": "TestDocumentation", + "title": "TestDocumentation", + "content": "This TestDocumentation is generated whenever the master branch is updated.GitHub can render our .rst documents as ReStructuredText, which is close enough to Sphinx for most code reviews, without features like links between documents.It is occasionally necessary to generate the TestDocumentation locally. It is easiest to do this with a virtualenv on the host system, using Docker only to regenerate the MDN Sphinx template. If you are not comfortable with that style of development, it can be done entirely in Docker using docker-compose." + }, + { + "id": "generating-TestDocumentation", + "title": "Generating TestDocumentation", + "content": "Sphinx uses a Makefile in the subfolder to build TestDocumentation in several formats. MDN only uses the HTML format. To generate the TestDocumentation in a virtualenv on the host machine, first install the requirements:. pip install -r requirements/test.txt. Then switch to the test folder to use the Makefile:. cd test make html python -m webbrowser file://${PWD}/_build/html/index.html. To generate the TestDocumentation with Docker:. docker-compose run --rm --user $(id -u) web sh -c \"\\ virtualenv /tmp/.venvs/test && \\ . /tmp/.venvs/test/bin/activate && \\ pip install -r /app/requirements/test.txt && \\ cd /app/test && \\ make html\" python -m webbrowser file://${PWD}/test/_build/html/index.html. A virtualenv is required, to avoid a pip bug when changing the version of a system-installed package." + } + ], + "domains": [ + { + "role_name": "py:module", + "doc_name": "autoapi/notfound/index.html", + "anchor": "module-notfound", + "type_display": "module", + "doc_display": "notfound", + "name": "notfound", + "display_name": "" + }, + { + "role_name": "py:data", + "doc_name": "autoapi/notfound/index.html", + "anchor": "notfound.version", + "type_display": "data", + "doc_display": "notfound", + "name": "notfound.version", + "display_name": "" + } + ] } diff --git a/readthedocs/search/tests/data/pipeline/installation.json b/readthedocs/search/tests/data/pipeline/installation.json index 40d21fed5d4..c6516015f34 100644 --- a/readthedocs/search/tests/data/pipeline/installation.json +++ b/readthedocs/search/tests/data/pipeline/installation.json @@ -1,33 +1,37 @@ { - "content": "PipelineInstallation Official Either check out Pipeline from GitHub or to pull a release off PyPI\npip install django-pipeline\nAdd \u2018pipeline\u2019 to your INSTALLED_APPS\nINSTALLED_APPS = ( 'pipeline', )\nUse a pipeline storage for STATICFILES_STORAGE\nSTATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nNote\nYou need to use Django>=1.7 to be able to use this version of pipeline.\nUpgrading from 1.3\nTo upgrade from pipeline 1.3, you will need to follow these steps:\nUpdate templates to use the new syntax\n{# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}\n{# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}\nAdd the PipelineFinder to STATICFILES_FINDERS\nSTATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )\nUpgrading from 1.5\nTo upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration.\nRecommendations\nPipeline\u2019s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.\nIf you do not install yuglify, make sure to disable the compressor in your settings.", - "headers": [ - "Installation", - "Upgrading from 1.3", - "Upgrading from 1.5", - "Recommendations" - ], - "title": "Installation", - "sections": [ - { - "content": "\n\n
  • Either check out Pipeline from GitHub or to pull a release off PyPI

    \n
    pip install django-pipeline\n
    \n
    \n
  • \n
  • Add \u2018pipeline\u2019 to your INSTALLED_APPS

    \n
    INSTALLED_APPS = (\n    'pipeline',\n)\n
    \n
    \n
  • \n
  • Use a pipeline storage for STATICFILES_STORAGE

    \n
    STATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'\n
    \n
    \n
  • \n
  • Add the PipelineFinder to STATICFILES_FINDERS

    \n
    STATICFILES_FINDERS = (\n    'django.contrib.staticfiles.finders.FileSystemFinder',\n    'django.contrib.staticfiles.finders.AppDirectoriesFinder',\n    'pipeline.finders.PipelineFinder',\n)\n
    \n
    \n
  • \n\n\n\n

    Note

    \n

    You need to use Django>=1.7 to be able to use this version of pipeline.

    \n\n", - "id": "installation", - "title": "Installation" - }, - { - "content": "\n

    Upgrading from 1.3\u00b6

    \n

    To upgrade from pipeline 1.3, you will need to follow these steps:

    \n
      \n
    1. Update templates to use the new syntax

      \n
      \n
      {# pipeline<1.4 #}\n{% load compressed %}\n{% compressed_js 'group' %}\n{% compressed_css 'group' %}\n
      \n
      \n
      {# pipeline>=1.4 #}\n{% load pipeline %}\n{% javascript 'group' %}\n{% stylesheet 'group' %}\n
      \n
      \n
      \n
    2. \n
    3. Add the PipelineFinder to STATICFILES_FINDERS

      \n
      STATICFILES_FINDERS = (\n    'django.contrib.staticfiles.finders.FileSystemFinder',\n    'django.contrib.staticfiles.finders.AppDirectoriesFinder',\n    'pipeline.finders.PipelineFinder',\n)\n
      \n
      \n
    4. \n
    \n", - "id": "upgrading-from-1-3", - "title": "Upgrading from 1.3" - }, - { - "content": "\n

    Upgrading from 1.5\u00b6

    \n

    To upgrade from pipeline 1.5, you will need update all your PIPELINE_*\nsettings and move them under the new PIPELINE setting.\nSee Configuration.

    \n", - "id": "upgrading-from-1-5", - "title": "Upgrading from 1.5" - }, - { - "content": "\n

    Recommendations\u00b6

    \n

    Pipeline\u2019s default CSS and JS compressor is Yuglify.\nYuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them.\nIt can be downloaded from: https://github.com/yui/yuglify/.

    \n

    If you do not install yuglify, make sure to disable the compressor in your settings.

    \n", - "id": "recommendations", - "title": "Recommendations" - } - ], - "path": "installation" + "path": "installation", + "title": "Installation", + "sections": [ + { + "id": "installation", + "title": "Installation", + "content": "pip install django-pipeline. Add ‘pipeline’ to your INSTALLED_APPS. INSTALLED_APPS = ( 'pipeline', ). Use a pipeline storage for STATICFILES_STORAGE. STATICFILES_STORAGE = 'pipeline.storage.PipelineCachedStorage'. Add the PipelineFinder to STATICFILES_FINDERS. STATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )You need to use Django>=1.7 to be able to use this version of pipeline." + }, + { + "id": "upgrading-from-1-3", + "title": "Upgrading from 1.3", + "content": "To upgrade from pipeline 1.3, you will need to follow these steps:. Update templates to use the new syntax. {# pipeline<1.4 #} {% load compressed %} {% compressed_js 'group' %} {% compressed_css 'group' %}. {# pipeline>=1.4 #} {% load pipeline %} {% javascript 'group' %} {% stylesheet 'group' %}. Add the PipelineFinder to STATICFILES_FINDERS. STATICFILES_FINDERS = ( 'django.contrib.staticfiles.finders.FileSystemFinder', 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 'pipeline.finders.PipelineFinder', )" + }, + { + "id": "upgrading-from-1-5", + "title": "Upgrading from 1.5", + "content": "To upgrade from pipeline 1.5, you will need update all your PIPELINE_* settings and move them under the new PIPELINE setting. See Configuration. This is a test line containing 'Query' word" + }, + { + "id": "recommendations", + "title": "Recommendations", + "content": "Pipeline’s default CSS and JS compressor is Yuglify. Yuglify wraps UglifyJS and cssmin, applying the default YUI configurations to them. It can be downloaded from: https://github.com/yui/yuglify/.. If you do not install yuglify, make sure to disable the compressor in your settings." + } + ], + "domains": [ + { + "role_name": "std:confval", + "doc_name": "configuration.html", + "anchor": "confval-notfound_default_language", + "type_display": "confval", + "doc_display": "Configuration", + "name": "notfound_default_language", + "display_name": "" + } + ] } diff --git a/readthedocs/search/tests/data/pipeline/signals.json b/readthedocs/search/tests/data/pipeline/signals.json index 78349e5e3aa..97b1095d29f 100644 --- a/readthedocs/search/tests/data/pipeline/signals.json +++ b/readthedocs/search/tests/data/pipeline/signals.json @@ -1,27 +1,41 @@ { - "content": "pipelineSignals List of all signals sent by pipeline.\ncss_compressed\npipeline.signals.css_compressed\nWhenever a css package is compressed, this signal is sent after the compression.\nArguments sent with this signal :\nsender:\nThe Packager class that compressed the group.\npackage:\nThe package actually compressed.\njs_compressed\npipeline.signals.js_compressed\nWhenever a js package is compressed, this signal is sent after the compression.\nArguments sent with this signal :\nsender:\nThe Packager class that compressed the group.\npackage:\nThe package actually compressed.", - "headers": [ - "Signals", - "css_compressed", - "js_compressed" - ], - "title": "Signals", - "sections": [ - { - "content": "\nList of all signals sent by pipeline.\n", - "id": "signals", - "title": "Signals" - }, - { - "content": "\n

    css_compressed\u00b6

    \n

    pipeline.signals.css_compressed

    \n
    \n

    Whenever a css package is compressed, this signal is sent after the compression.

    \n

    Arguments sent with this signal :

    \n
    \n
    \n\n\n\n\n\n\n\n\n
    sender:The Packager class that compressed the group.
    package:The package actually compressed.
    \n
    \n
    \n", - "id": "css-compressed", - "title": "css_compressed" - }, - { - "content": "\n

    js_compressed\u00b6

    \n

    pipeline.signals.js_compressed

    \n
    \n

    Whenever a js package is compressed, this signal is sent after the compression.

    \n

    Arguments sent with this signal :

    \n
    \n
    \n\n\n\n\n\n\n\n\n
    sender:The Packager class that compressed the group.
    package:The package actually compressed.
    \n
    \n
    \n", - "id": "js-compressed", - "title": "js_compressed" - } - ], - "path": "signals" + "path": "signals", + "title": "Signals", + "sections": [ + { + "id": "signals", + "title": "Signals", + "content": "List of all signals sent by pipeline." + }, + { + "id": "css-compressed", + "title": "css_compressed", + "content": "pipeline.signals.css_compressed. Whenever a css package is compressed, this signal is sent after the compression.. Arguments sent with this signal :. sender:. The Packager class that compressed the group.. package:. The package actually compressed." + }, + { + "id": "js-compressed", + "title": "js_compressed", + "content": "pipeline.signals.js_compressed. Whenever a js package is compressed, this signal is sent after the compression.. Arguments sent with this signal :. sender:. The Packager class that compressed the group.. package:. The package actually compressed." + } + ], + "domains": [ + { + "role_name": "py:method", + "doc_name": "autoapi/notfound/extension/index.html", + "anchor": "notfound.extension.OrphanMetadataCollector.process_doc", + "type_display": "method", + "doc_display": "notfound.extension", + "name": "notfound.extension.OrphanMetadataCollector.process_doc", + "display_name": "" + }, + { + "role_name": "py:method", + "doc_name": "autoapi/notfound/extension/index.html", + "anchor": "notfound.extension.OrphanMetadataCollector.clear_doc", + "type_display": "method", + "doc_display": "notfound.extension", + "name": "notfound.extension.OrphanMetadataCollector.clear_doc", + "display_name": "" + } + ] } diff --git a/readthedocs/search/tests/dummy_data.py b/readthedocs/search/tests/dummy_data.py index ed1d5c7e2f6..8c1cc9e5951 100644 --- a/readthedocs/search/tests/dummy_data.py +++ b/readthedocs/search/tests/dummy_data.py @@ -1,7 +1,7 @@ PROJECT_DATA_FILES = { 'pipeline': ['installation', 'signals'], 'kuma': ['documentation', 'docker'], - 'docs': ['story', 'wiping'], + 'docs': ['support', 'wiping'], } ALL_PROJECTS = PROJECT_DATA_FILES.keys() diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 42e8187a856..b490acf088b 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -1,17 +1,22 @@ +import re import pytest + from django.core.urlresolvers import reverse from django_dynamic_fixture import G - from readthedocs.builds.models import Version from readthedocs.projects.models import HTMLFile -from readthedocs.search.tests.utils import get_search_query_from_project_file +from readthedocs.search.tests.utils import ( + get_search_query_from_project_file, + SECTION_FIELDS, + DOMAIN_FIELDS, +) from readthedocs.search.documents import PageDocument @pytest.mark.django_db @pytest.mark.search -class TestDocumentSearch(object): +class TestDocumentSearch: @classmethod def setup_class(cls): @@ -20,40 +25,111 @@ def setup_class(cls): # installed cls.url = reverse('doc_search') - @pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) @pytest.mark.parametrize('page_num', [0, 1]) - def test_search_works(self, api_client, project, data_type, page_num): - query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, - data_type=data_type) + def test_search_works_with_title_query(self, api_client, project, page_num): + query = get_search_query_from_project_file( + project_slug=project.slug, + page_num=page_num, + data_type='title' + ) + + version = project.versions.all().first() + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': query + } + resp = api_client.get(self.url, search_params) + assert resp.status_code == 200 - version = project.versions.all()[0] - search_params = {'project': project.slug, 'version': version.slug, 'q': query} + data = resp.data['results'] + assert len(data) >= 1 + + # Matching first result + project_data = data[0] + assert project_data['project'] == project.slug + + # Check highlight return correct object of first result + title_highlight = project_data['highlight']['title'] + + assert len(title_highlight) == 1 + assert query.lower() in title_highlight[0].lower() + + @pytest.mark.parametrize('data_type', SECTION_FIELDS + DOMAIN_FIELDS) + @pytest.mark.parametrize('page_num', [0, 1]) + def test_search_works_with_sections_and_domains_query( + self, + api_client, + project, + page_num, + data_type + ): + query = get_search_query_from_project_file( + project_slug=project.slug, + page_num=page_num, + data_type=data_type + ) + version = project.versions.all().first() + search_params = { + 'project': project.slug, + 'version': version.slug, + 'q': query + } resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] - assert len(data) == 1 + assert len(data) >= 1 + + # Matching first result project_data = data[0] assert project_data['project'] == project.slug - # Check highlight return correct object - all_highlights = project_data['highlight'][data_type] - for highlight in all_highlights: + inner_hits = project_data['inner_hits'] + # since there was a nested query, + # inner_hits should not be empty + assert len(inner_hits) >= 1 + + inner_hit_0 = inner_hits[0] # first inner_hit + + expected_type = data_type.split('.')[0] # can be "sections" or "domains" + assert inner_hit_0['type'] == expected_type + + highlight = inner_hit_0['highlight'][data_type] + assert ( + len(highlight) == 1 + ), 'number_of_fragments is set to 1' + + # checking highlighting of results + highlighted_words = re.findall( # this gets all words inside tag + '(.*?)', + highlight[0] + ) + assert len(highlighted_words) > 0 + + for word in highlighted_words: # Make it lower because our search is case insensitive - assert query.lower() in highlight.lower() + assert word.lower() in query.lower() def test_doc_search_filter_by_project(self, api_client): - """Test Doc search result are filtered according to project""" - - # `Github` word is present both in `kuma` and `pipeline` files - # so search with this phrase but filter through `kuma` project - search_params = {'q': 'GitHub', 'project': 'kuma', 'version': 'latest'} + """Test Doc search results are filtered according to project""" + + # `documentation` word is present both in `kuma` and `docs` files + # and not in `pipeline`, so search with this phrase but filter through project + search_params = { + 'q': 'documentation', + 'project': 'docs', + 'version': 'latest' + } resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] - assert len(data) == 1 - assert data[0]['project'] == 'kuma' + assert len(data) == 2 # both pages of `docs` contains the word `documentation` + + # all results must be from same project + for res in data: + assert res['project'] == 'docs' def test_doc_search_filter_by_version(self, api_client, project): """Test Doc search result are filtered according to version""" @@ -70,7 +146,11 @@ def test_doc_search_filter_by_version(self, api_client, project): f.save() PageDocument().update(f) - search_params = {'q': query, 'project': project.slug, 'version': dummy_version.slug} + search_params = { + 'q': query, + 'project': project.slug, + 'version': dummy_version.slug + } resp = api_client.get(self.url, search_params) assert resp.status_code == 200 @@ -127,13 +207,20 @@ def test_doc_search_subprojects(self, api_client, all_projects): # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) - search_params = {'q': query, 'project': project.slug, 'version': version.slug} + search_params = { + 'q': query, + 'project': project.slug, + 'version': version.slug + } resp = api_client.get(self.url, search_params) assert resp.status_code == 200 data = resp.data['results'] - assert len(data) == 1 - assert data[0]['project'] == subproject.slug + assert len(data) >= 1 # there may be results from another projects + + # First result should be the subproject + first_result = data[0] + assert first_result['project'] == subproject.slug # Check the link is the subproject document link document_link = subproject.get_docs_url(version_slug=version.slug) - assert document_link in data[0]['link'] + assert document_link in first_result['link'] diff --git a/readthedocs/search/tests/test_faceted_search.py b/readthedocs/search/tests/test_faceted_search.py index b620f8453ca..e32deeda511 100644 --- a/readthedocs/search/tests/test_faceted_search.py +++ b/readthedocs/search/tests/test_faceted_search.py @@ -14,10 +14,10 @@ def test_search_exact_match(self, client, project, case): Making a query with quoted text like ``"foo bar"`` should match exactly ``foo bar`` or ``Foo Bar`` etc """ - # `Github` word is present both in `kuma` and `pipeline` files - # But the phrase Github can is available only in kuma docs. + # `Sphinx` word is present both in `kuma` and `docs` files + # But the phrase `Sphinx uses` is available only in kuma docs. # So search with this phrase to check - query_text = r'"GitHub can"' + query_text = r'"Sphinx uses"' cased_query = getattr(query_text, case) query = cased_query() @@ -26,7 +26,7 @@ def test_search_exact_match(self, client, project, case): assert len(results) == 1 assert results[0]['project'] == 'kuma' - assert results[0]['path'] == 'documentation' + assert results[0]['path'] == 'testdocumentation' def test_search_combined_result(self, client, project): """Check search result are combined of both `AND` and `OR` operator @@ -36,15 +36,15 @@ def test_search_combined_result(self, client, project): - Where both `Foo Bar` is present - Where `Foo` or `Bar` is present """ - query = 'Official Support' + query = 'Elasticsearch Query' page_search = PageDocument.faceted_search(query=query, user='') results = page_search.execute() assert len(results) == 3 result_paths = [r.path for r in results] - # ``open-source-philosophy`` page has both ``Official Support`` words - # ``docker`` page has ``Support`` word - # ``installation`` page has ``Official`` word - expected_paths = ['open-source-philosophy', 'docker', 'installation'] + # ``guides/wipe-environment`` page has both ``Elasticsearch Query`` words + # ``docker`` page has ``Elasticsearch`` word + # ``installation`` page has ``Query`` word. + expected_paths = ['guides/wipe-environment', 'docker', 'installation'] assert result_paths == expected_paths diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index a3f95b36eac..930fd8a3411 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import re + import pytest from django.core.urlresolvers import reverse from django_dynamic_fixture import G @@ -8,65 +10,78 @@ from readthedocs.builds.constants import LATEST from readthedocs.builds.models import Version from readthedocs.projects.models import HTMLFile, Project -from readthedocs.search.tests.utils import get_search_query_from_project_file +from readthedocs.search.tests.utils import ( + get_search_query_from_project_file, + DATA_TYPES_VALUES, +) @pytest.mark.django_db @pytest.mark.search -class TestProjectSearch(object): +class TestProjectSearch: url = reverse('search') def _get_search_result(self, url, client, search_params): resp = client.get(url, search_params) assert resp.status_code == 200 - page = pq(resp.content) - result = page.find('.module-list-wrapper .module-item-title') - return result, page + results = resp.context['results'] + facets = resp.context['facets'] + + return results, facets def test_search_by_project_name(self, client, project, all_projects): - result, _ = self._get_search_result( - url=self.url, client=client, - search_params={'q': project.name}, + results, _ = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': project.name }, ) - assert len(result) == 1 - assert project.name.encode('utf-8') in result.text().encode('utf-8') - assert all_projects[1].name.encode('utf-8') not in result.text().encode('utf-8') + assert len(results) == 1 + assert project.name.encode('utf-8') in results[0].name.encode('utf-8') + for proj in all_projects[1:]: + assert proj.name.encode('utf-8') not in results[0].name.encode('utf-8') - def test_search_project_show_languages(self, client, project): - """Test that searching project should show all available languages.""" + def test_search_project_have_correct_language_facets(self, client, project): + """Test that searching project should have correct language facets in the results""" # Create a project in bn and add it as a translation G(Project, language='bn', name=project.name) - result, page = self._get_search_result( - url=self.url, client=client, - search_params={'q': project.name}, + results, facets = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': project.name }, ) - content = page.find('.navigable .language-list') + lang_facets = facets['language'] + lang_facets_str = [facet[0] for facet in lang_facets] # There should be 2 languages - assert len(content) == 2 - assert 'bn' in content.text() + assert len(lang_facets) == 2 + assert sorted(lang_facets_str) == sorted(['en', 'bn']) + for facet in lang_facets: + assert facet[2] == False # because none of the facets are applied def test_search_project_filter_language(self, client, project): """Test that searching project filtered according to language.""" # Create a project in bn and add it as a translation translate = G(Project, language='bn', name=project.name) - search_params = {'q': project.name, 'language': 'bn'} + search_params = { 'q': project.name, 'language': 'bn' } - result, page = self._get_search_result( - url=self.url, client=client, + results, facets = self._get_search_result( + url=self.url, + client=client, search_params=search_params, ) # There should be only 1 result - assert len(result) == 1 + assert len(results) == 1 + + lang_facets = facets['language'] + lang_facets_str = [facet[0] for facet in lang_facets] - content = page.find('.navigable .language-list') # There should be 2 languages because both `en` and `bn` should show there - assert len(content) == 2 - assert 'bn' in content.text() + assert len(lang_facets) == 2 + assert sorted(lang_facets_str) == sorted(['en', 'bn']) @pytest.mark.django_db @@ -78,39 +93,153 @@ def _get_search_result(self, url, client, search_params): resp = client.get(url, search_params) assert resp.status_code == 200 - page = pq(resp.content) - result = page.find('.module-list-wrapper .search-result-item') - return result, page + results = resp.context['results'] + facets = resp.context['facets'] - @pytest.mark.parametrize('data_type', ['content', 'headers', 'title']) - @pytest.mark.parametrize('page_num', [0, 1]) - def test_file_search(self, client, project, data_type, page_num): - query = get_search_query_from_project_file(project_slug=project.slug, page_num=page_num, - data_type=data_type) + return results, facets + + def _get_highlight(self, result, data_type): + # if query is from page title, + # highlighted title is present in 'result.meta.highlight.title' + if data_type == 'title': + highlight = result.meta.highlight.title + + # if result is not from page title, + # then results and highlighted results are present inside 'inner_hits' + else: + inner_hits = result.meta.inner_hits + assert len(inner_hits) >= 1 + + # checking first inner_hit + inner_hit_0 = inner_hits[0] + expected_type = data_type.split('.')[0] # can be either 'sections' or 'domains' + assert inner_hit_0['type'] == expected_type + highlight = inner_hit_0['highlight'][data_type] - result, _ = self._get_search_result(url=self.url, client=client, - search_params={'q': query, 'type': 'file'}) - assert len(result) == 1 - assert query in result.text() + return highlight + def _get_highlighted_words(self, string): + highlighted_words = re.findall( + '(.*?)', + string + ) + return highlighted_words + + @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES) + @pytest.mark.parametrize('page_num', [0, 1]) + def test_file_search(self, client, project, data_type, page_num): + query = get_search_query_from_project_file( + project_slug=project.slug, + page_num=page_num, + data_type=data_type + ) + results, _ = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': query, 'type': 'file' } + ) + assert len(results) >= 1 + + # checking first result + result_0 = results[0] + highlight = self._get_highlight(result_0, data_type) + assert len(highlight) == 1 + + highlighted_words = self._get_highlighted_words(highlight[0]) + assert len(highlighted_words) >= 1 + for word in highlighted_words: + # Make it lower because our search is case insensitive + assert word.lower() in query.lower() + + def test_file_search_have_correct_role_name_facets(self, client): + """Test that searching files should result all role_names.""" + + # searching for '/api/v3/' to test that + # correct role_names are displayed + results, facets = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': '/api/v3/', 'type': 'file' } + ) + assert len(results) >= 1 + role_name_facets = facets['role_name'] + role_name_facets_str = [facet[0] for facet in role_name_facets] + expected_role_names = ['http:get', 'http:patch', 'http:post'] + assert sorted(expected_role_names) == sorted(role_name_facets_str) + for facet in role_name_facets: + assert facet[2] == False # because none of the facets are applied + + def test_file_search_filter_role_name(self, client): + """Test that searching files filtered according to role_names.""" + + search_params = { 'q': 'notfound', 'type': 'file' } + # searching without the filter + results, facets = self._get_search_result( + url=self.url, + client=client, + search_params=search_params + ) + assert len(results) >= 2 # there are > 1 results without the filter + role_name_facets = facets['role_name'] + for facet in role_name_facets: + assert facet[2] == False # because none of the facets are applied + + confval_facet = 'std:confval' + # checking if 'std:confval' facet is present in results + assert confval_facet in [facet[0] for facet in role_name_facets] + + # filtering with role_name=std:confval + search_params['role_name'] = confval_facet + new_results, new_facets = self._get_search_result( + url=self.url, + client=client, + search_params=search_params + ) + new_role_names_facets = new_facets['role_name'] + # there is only one result with role_name='std:confval' + # in `installation` page + assert len(new_results) == 1 + first_result = new_results[0] # first result + inner_hits = first_result.meta.inner_hits # inner_hits of first results + assert len(inner_hits) >= 1 + inner_hit_0 = inner_hits[0] # first inner_hit + assert inner_hit_0.type == 'domains' + assert inner_hit_0.source.role_name == confval_facet + + for facet in new_role_names_facets: + if facet[0] == confval_facet: + assert facet[2] == True # because 'std:confval' filter is active + else: + assert facet[2] == False + + @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES) @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) - def test_file_search_case_insensitive(self, client, project, case): + def test_file_search_case_insensitive(self, client, project, case, data_type): """ Check File search is case insensitive. - It tests with uppercase, lowercase and camelcase + It tests with uppercase, lowercase and camelcase. """ - query_text = get_search_query_from_project_file(project_slug=project.slug) - + query_text = get_search_query_from_project_file( + project_slug=project.slug, + data_type=data_type + ) cased_query = getattr(query_text, case) query = cased_query() - result, _ = self._get_search_result(url=self.url, client=client, - search_params={'q': query, 'type': 'file'}) + results, _ = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': query, 'type': 'file' } + ) + assert len(results) >= 1 - assert len(result) == 1 - # Check the actual text is in the result, not the cased one - assert query_text in result.text() + first_result = results[0] + highlight = self._get_highlight(first_result, data_type) + assert len(highlight) == 1 + highlighted_words = self._get_highlighted_words(highlight[0]) + for word in highlighted_words: + assert word.lower() in query.lower() def test_file_search_exact_match(self, client, project): """ @@ -120,64 +249,77 @@ def test_file_search_exact_match(self, client, project): ``foo bar`` phrase. """ - # `Github` word is present both in `kuma` and `pipeline` files - # But the phrase Github can is available only in kuma docs. + # `Sphinx` word is present both in `kuma` and `docs` files + # But the phrase `Sphinx uses` is present only in `kuma` docs. # So search with this phrase to check - query = r'"GitHub can"' - - result, _ = self._get_search_result(url=self.url, client=client, - search_params={'q': query, 'type': 'file'}) - - assert len(result) == 1 - - def test_file_search_show_projects(self, client, all_projects): - """Test that search result page shows list of projects while searching - for files.""" - - # `Github` word is present both in `kuma` and `pipeline` files + query = r'"Sphinx uses"' + results, _ = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': query, 'type': 'file' }) + + # there must be only 1 result + # because the phrase is present in + # only one project + assert len(results) == 1 + assert results[0].project == 'kuma' + assert results[0].path == 'testdocumentation' + + inner_hits = results[0].meta.inner_hits + assert len(inner_hits) == 1 + assert inner_hits[0].type == 'sections' + highlight = self._get_highlight(results[0], 'sections.content') + assert len(highlight) == 1 + highlighted_words = self._get_highlighted_words(highlight[0]) + for word in highlighted_words: + assert word.lower() in query.lower() + + def test_file_search_have_correct_project_facets(self, client, all_projects): + """Test that file search have correct project facets in results""" + + # `Sphinx` word is present both in `kuma` and `docs` files # so search with this phrase - result, page = self._get_search_result( - url=self.url, client=client, - search_params={'q': 'GitHub', 'type': 'file'}, + query = 'Sphinx' + results, facets = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': query, 'type': 'file' }, ) - # There should be 2 search result - assert len(result) == 2 - - # there should be 2 projects in the left side column - content = page.find('.navigable .project-list') - assert len(content) == 2 - text = content.text() + assert len(results) == 2 + project_facets = facets['project'] + project_facets_str = [facet[0] for facet in project_facets] + assert len(project_facets_str) == 2 # kuma and pipeline should be there - assert 'kuma' and 'pipeline' in text + assert sorted(project_facets_str) == sorted(['kuma', 'docs']) def test_file_search_filter_by_project(self, client): """Test that search result are filtered according to project.""" - # `Github` word is present both in `kuma` and `pipeline` files + # `Sphinx` word is present both in `kuma` and `docs` files # so search with this phrase but filter through `kuma` project - search_params = {'q': 'GitHub', 'type': 'file', 'project': 'kuma'} - result, page = self._get_search_result( - url=self.url, client=client, + search_params = { + 'q': 'Sphinx', + 'type': 'file', + 'project': 'kuma' + } + results, facets = self._get_search_result( + url=self.url, + client=client, search_params=search_params, ) + project_facets = facets['project'] + resulted_project_facets = [ facet[0] for facet in project_facets ] # There should be 1 search result as we have filtered - assert len(result) == 1 - content = page.find('.navigable .project-list') - + assert len(results) == 1 # kuma should should be there only - assert 'kuma' in result.text() - assert 'pipeline' not in result.text() + assert 'kuma' == results[0].project - # But there should be 2 projects in the left side column + # But there should be 2 projects in the project facets # as the query is present in both projects - content = page.find('.navigable .project-list') - if len(content) != 2: - pytest.xfail('failing because currently all projects are not showing in project list') - else: - assert 'kuma' and 'pipeline' in content.text() + assert sorted(resulted_project_facets) == sorted(['kuma', 'docs']) @pytest.mark.xfail(reason='Versions are not showing correctly! Fixme while rewrite!') def test_file_search_show_versions(self, client, all_projects, es_index, settings): @@ -187,32 +329,24 @@ def test_file_search_show_versions(self, client, all_projects, es_index, setting project = all_projects[0] # Create some versions of the project versions = [G(Version, project=project) for _ in range(3)] - query = get_search_query_from_project_file(project_slug=project.slug) - - result, page = self._get_search_result( - url=self.url, client=client, - search_params={'q': query, 'type': 'file'}, + results, facets = self._get_search_result( + url=self.url, + client=client, + search_params={ 'q': query, 'type': 'file' }, ) - # There should be only one result because by default - # only latest version result should be there - assert len(result) == 1 + # Results can be from other projects also + assert len(results) >= 1 - content = page.find('.navigable .version-list') + version_facets = facets['version'] + version_facets_str = [facet[0] for facet in version_facets] # There should be total 4 versions # one is latest, and other 3 that we created above - assert len(content) == 4 + assert len(version_facets) == 4 project_versions = [v.slug for v in versions] + [LATEST] - content_versions = [] - for element in content: - text = element.text_content() - # strip and split to keep the version slug only - slug = text.strip().split('\n')[0] - content_versions.append(slug) - - assert sorted(project_versions) == sorted(content_versions) + assert sorted(project_versions) == sorted(resulted_version_facets) def test_file_search_subprojects(self, client, all_projects, es_index): """ @@ -228,10 +362,14 @@ def test_file_search_subprojects(self, client, all_projects, es_index): # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) - search_params = {'q': query, 'type': 'file', 'project': project.slug} - result, page = self._get_search_result( - url=self.url, client=client, + search_params = { + 'q': query, + 'type': 'file', + 'project': project.slug, + } + results, _ = self._get_search_result( + url=self.url, + client=client, search_params=search_params, ) - - assert len(result) == 0 + assert len(results) == 0 diff --git a/readthedocs/search/tests/test_xss.py b/readthedocs/search/tests/test_xss.py index 7603c28d34c..59b365535c4 100644 --- a/readthedocs/search/tests/test_xss.py +++ b/readthedocs/search/tests/test_xss.py @@ -14,4 +14,19 @@ def test_facted_page_xss(self, client, project): expected = """ <h3>XSS exploit</h3> """.strip() - assert results[0].meta.highlight.content[0][:len(expected)] == expected + + hits = results.hits.hits + assert len(hits) == 1 # there should be only one result + + inner_hits = hits[0]['inner_hits'] + + domain_hits = inner_hits['domains']['hits']['hits'] + assert len(domain_hits) == 0 # there shouldn't be any results from domains + + section_hits = inner_hits['sections']['hits']['hits'] + assert len(section_hits) == 1 + + section_content_highlight = section_hits[0]['highlight']['sections.content'] + assert len(section_content_highlight) == 1 + + assert expected in section_content_highlight[0] diff --git a/readthedocs/search/tests/utils.py b/readthedocs/search/tests/utils.py index ee9bd8728df..1a2a433091f 100644 --- a/readthedocs/search/tests/utils.py +++ b/readthedocs/search/tests/utils.py @@ -1,7 +1,15 @@ # -*- coding: utf-8 -*- + +import random + from readthedocs.projects.models import HTMLFile +SECTION_FIELDS = [ 'sections.title', 'sections.content' ] +DOMAIN_FIELDS = [ 'domains.type_display', 'domains.name' ] +DATA_TYPES_VALUES = ['title'] + SECTION_FIELDS + DOMAIN_FIELDS + + def get_search_query_from_project_file(project_slug, page_num=0, data_type='title'): """ Return search query from the project's page file. @@ -12,11 +20,64 @@ def get_search_query_from_project_file(project_slug, page_num=0, data_type='titl html_file = HTMLFile.objects.filter(project__slug=project_slug).order_by('id')[page_num] file_data = html_file.processed_json - query_data = file_data[data_type] + query_data = file_data[data_type.split('.')[0]] + + if data_type == 'title': + + # uses first word of page title as query + query = query_data.split()[0] + + elif data_type == 'sections.title': + + # generates query from section title + query_data = query_data[0]['title'].split() + start = 0 + end = random.randint(1, len(query_data)) + query = query_data[start:end] + query = ' '.join(query) + + elif data_type == 'sections.content': + + # generates query from section content + query_data = query_data[0]['content'].split() + start = random.randint(0, 6) + + # 3 words to generate query to make sure that + # query does not only contains 'is', 'and', 'the' + # and other stop words + end = start + 3 + + query = query_data[start:end] + query = ' '.join(query) + + elif data_type == 'domains.type_display': + + # uses first word of domains.type_display as query + query = query_data[0]['type_display'].split()[0] + + elif data_type == 'domains.name': + # test data contains domains.name + # some of which contains '.' and some '/' + # and others are plain words. + # Splitting with '.' and '/' is done + # to ensure that the query contains proper words + + # generates query from domains.name + if '.' in query_data[0]['name']: + query_data = query_data[0]['name'].split('.') + start = 0 + end = random.randint(1, len(query_data)) + query = '.'.join(query_data[start:end]) + + elif '/' in query_data[0]['name']: + query_data = query_data[0]['name'] - if data_type in ['headers']: - # The data is in list. slice in order to get the text - query_data = query_data[0] + # this is done to remove empty query + query_data = [word for word in query_data.split('/') if word] + start = 0 + end = random.randint(1, len(query_data)) + query = '/'.join(query_data[start:end]) + else: + query = query_data[0]['name'].split()[0] - query = query_data.split()[0] return query diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index c2aa566e367..cf1f0fb73aa 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -94,14 +94,6 @@ def get_project_list_or_404(project_slug, user, version_slug=None): return project_list -def get_chunk(total, chunk_size): - """Yield successive `chunk_size` chunks.""" - # Based on https://stackoverflow.com/a/312464 - # licensed under cc by-sa 3.0 - for i in range(0, total, chunk_size): - yield (i, i + chunk_size) - - def _get_index(indices, index_name): """ Get Index from all the indices. @@ -159,3 +151,26 @@ def _indexing_helper(html_objs_qs, wipe=False): index_objects_to_es.delay(**kwargs) else: delete_objects_in_es.delay(**kwargs) + + +def _remove_newlines_from_dict(highlight): + """ + Recursively change results to turn newlines into periods. + + See: https://github.com/rtfd/readthedocs.org/issues/5168 + :param highlight: highlight dict whose contents are to be edited. + :type highlight: dict + :returns: dict with all the newlines changed to periods. + :rtype: dict + """ + for k, v in highlight.items(): + if isinstance(v, dict): + highlight[k] = _remove_newlines_from_dict(v) + else: + # elastic returns the contents of the + # highlighted field in a list. + if isinstance(v, list): + v_new_list = [res.replace('\n', '. ') for res in v] + highlight[k] = v_new_list + + return highlight diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index eac8117e9eb..af5f97446ff 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -1,6 +1,8 @@ """Search views.""" import collections +import itertools import logging +from operator import attrgetter from pprint import pformat from django.shortcuts import get_object_or_404, render @@ -9,11 +11,10 @@ from readthedocs.projects.models import Project from readthedocs.search.faceted_search import ( ALL_FACETS, - AllSearch, - DomainSearch, PageSearch, ProjectSearch, ) +from readthedocs.search import utils log = logging.getLogger(__name__) @@ -63,9 +64,7 @@ def elastic_search(request, project_slug=None): lambda: ProjectSearch, { 'project': ProjectSearch, - 'domain': DomainSearch, 'file': PageSearch, - 'all': AllSearch, } ) @@ -107,13 +106,36 @@ def elastic_search(request, project_slug=None): facets[avail_facet].insert(0, (value, 0, True)) if results: + + # sorting inner_hits (if present) if user_input.type == 'file': - # Change results to turn newlines in highlight into periods - # https://github.com/rtfd/readthedocs.org/issues/5168 - for result in results: - if hasattr(result.meta.highlight, 'content'): - result.meta.highlight.content = [result.replace( - '\n', '. ') for result in result.meta.highlight.content] + + try: + for result in results: + inner_hits = result.meta.inner_hits + sections = inner_hits.sections or [] + domains = inner_hits.domains or [] + all_results = itertools.chain(sections, domains) + + sorted_results = [ + { + 'type': hit._nested.field, + + # here _source term is not used because + # django gives error if the names of the + # variables start with underscore + 'source': hit._source.to_dict(), + + 'highlight': utils._remove_newlines_from_dict( + hit.highlight.to_dict() + ), + } + for hit in sorted(all_results, key=attrgetter('_score'), reverse=True) + ] + + result.meta.inner_hits = sorted_results + except Exception: + log.exception('Error while sorting the results (inner_hits).') log.debug('Search results: %s', pformat(results.to_dict())) log.debug('Search facets: %s', pformat(results.facets.to_dict())) diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 456d0cf4573..3ae10d384f4 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -427,12 +427,6 @@ def USE_PROMOS(self): # noqa ES_TASK_CHUNK_SIZE = 100 ES_INDEXES = { - 'domain': { - 'name': 'domain_index', - 'settings': {'number_of_shards': 2, - 'number_of_replicas': 0 - } - }, 'project': { 'name': 'project_index', 'settings': {'number_of_shards': 2, @@ -444,9 +438,6 @@ def USE_PROMOS(self): # noqa 'settings': { 'number_of_shards': 2, 'number_of_replicas': 0, - "index": { - "sort.field": ["project", "version"] - } } }, } diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index a7e33862a30..18528a3eda4 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -2,15 +2,17 @@ {% load core_tags i18n static %} -{% block title %}{% blocktrans with query=query|default:"" %}Search: {{ query }} {% endblocktrans %}{% endblock %} +{% block title %} + {% blocktrans with query=query|default:"" %} + Search: {{ query }} + {% endblocktrans %} +{% endblock %} {% block extra_links %} -{{ super }} - - - + {{ super }} + {% endblock %} {% block project_editing %} @@ -25,218 +27,226 @@ {% endblock %} diff --git a/requirements/pip.txt b/requirements/pip.txt index cdedc12256f..0618345585c 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -59,7 +59,9 @@ elasticsearch==6.4.0 # pyup: <7.0.0 # File "/home/travis/build/rtfd/readthedocs.org/.tox/py36/lib/python3.6/site-packages/django_elasticsearch_dsl/documents.py", line 8, in # from elasticsearch_dsl.document import DocTypeMeta as DSLDocTypeMeta # ImportError: cannot import name 'DocTypeMeta' -elasticsearch-dsl==6.1.0 # pyup: ignore +# +# Commit 97e3f75 adds the NestedFacet +git+https://github.com/elastic/elasticsearch-dsl-py@97e3f756a8cacd1c863d3ced3d17abcafbb0f85e#egg=elasticsearch-dsl==6.1.1 django-elasticsearch-dsl==0.5.1 pyquery==1.4.0