Skip to content
Browse files

Merge pull request #22 from gsauthof/speedupfetching

Speedup feed url fetching (reload-all)
  • Loading branch information...
2 parents ec11bac + 93d5041 commit b25426b5808495b44abeb6e44e4b1540ca50606b @akrennmair akrennmair committed Mar 26, 2012
Showing with 105 additions and 24 deletions.
  1. +2 −1 include/controller.h
  2. +4 −0 include/rss_parser.h
  3. +23 −0 include/utils.h
  4. +30 −16 rss/parser.cpp
  5. +1 −1 rss/rsspp.h
  6. +43 −4 src/controller.cpp
  7. +2 −2 src/rss_parser.cpp
View
3 include/controller.h
@@ -18,6 +18,7 @@ namespace newsbeuter {
class view;
+ class curl_handle;
class controller {
public:
@@ -27,7 +28,7 @@ namespace newsbeuter {
view * get_view() { return v; }
void run(int argc = 0, char * argv[] = NULL);
- void reload(unsigned int pos, unsigned int max = 0, bool unattended = false);
+ void reload(unsigned int pos, unsigned int max = 0, bool unattended = false, curl_handle *easyhandle = 0);
void reload_all(bool unattended = false);
void reload_indexes(const std::vector<int>& indexes, bool unattended = false);
View
4 include/rss_parser.h
@@ -18,6 +18,8 @@ namespace newsbeuter {
~rss_parser();
std::tr1::shared_ptr<rss_feed> parse();
bool check_and_update_lastmodified();
+
+ void set_easyhandle(curl_handle *h) { easyhandle = h; }
private:
void replace_newline_characters(std::string& str);
std::string render_xhtml_title(const std::string& title, const std::string& link);
@@ -55,6 +57,8 @@ namespace newsbeuter {
rsspp::feed f;
remote_api * api;
bool is_ttrss;
+
+ curl_handle *easyhandle;
};
}
View
23 include/utils.h
@@ -3,6 +3,7 @@
#include <vector>
#include <string>
+#include <stdexcept>
#include <logger.h>
#include <curl/curl.h>
@@ -11,6 +12,28 @@
namespace newsbeuter {
+// wrapped curl handle for exception safety and so on
+// see also: https://github.com/gsauthof/ccurl
+class curl_handle {
+ private:
+ CURL *h;
+ curl_handle(const curl_handle &);
+ curl_handle &operator=(const curl_handle &);
+ public:
+ curl_handle()
+ : h(0)
+ {
+ h = curl_easy_init();
+ if (!h)
+ throw std::runtime_error("Can't obtain curl handle");
+ }
+ ~curl_handle()
+ {
+ curl_easy_cleanup(h);
+ }
+ CURL *ptr() { return h; }
+};
+
class utils {
public:
static std::vector<std::string> tokenize(const std::string& str, std::string delimiters = " \r\n\t");
View
46 rss/parser.cpp
@@ -37,6 +37,11 @@ parser::~parser() {
struct header_values {
time_t lastmodified;
std::string etag;
+
+ header_values()
+ : lastmodified(0)
+ {
+ }
};
static size_t handle_headers(void * ptr, size_t size, size_t nmemb, void * data) {
@@ -47,8 +52,13 @@ static size_t handle_headers(void * ptr, size_t size, size_t nmemb, void * data)
header[size*nmemb] = '\0';
if (!strncasecmp("Last-Modified:", header, 14)) {
- values->lastmodified = curl_getdate(header+14, NULL);
- LOG(LOG_DEBUG, "handle_headers: got last-modified %s (%d)", header+14, values->lastmodified);
+ time_t r = curl_getdate(header+14, NULL);
+ if (r == -1) {
+ LOG(LOG_DEBUG, "handle_headers: last-modified %s (curl_getdate FAILED)", header+14);
+ } else {
+ values->lastmodified = curl_getdate(header+14, NULL);
+ LOG(LOG_DEBUG, "handle_headers: got last-modified %s (%d)", header+14, values->lastmodified);
+ }
} else if (!strncasecmp("ETag:",header, 5)) {
values->etag = std::string(header+5);
utils::trim(values->etag);
@@ -60,13 +70,16 @@ static size_t handle_headers(void * ptr, size_t size, size_t nmemb, void * data)
return size * nmemb;
}
-feed parser::parse_url(const std::string& url, time_t lastmodified, const std::string& etag, newsbeuter::remote_api * api, const std::string& cookie_cache) {
+feed parser::parse_url(const std::string& url, time_t lastmodified, const std::string& etag, newsbeuter::remote_api * api, const std::string& cookie_cache, CURL *ehandle) {
std::string buf;
CURLcode ret;
- CURL * easyhandle = curl_easy_init();
+ CURL * easyhandle = ehandle;
if (!easyhandle) {
- throw exception(_("couldn't initialize libcurl"));
+ easyhandle = curl_easy_init();
+ if (!easyhandle) {
+ throw exception(_("couldn't initialize libcurl"));
+ }
}
if (ua) {
@@ -101,21 +114,20 @@ feed parser::parse_url(const std::string& url, time_t lastmodified, const std::s
curl_easy_setopt(easyhandle, CURLOPT_PROXYTYPE, prxtype);
- header_values hdrs = { 0, "" };
+ header_values hdrs;
+ curl_easy_setopt(easyhandle, CURLOPT_HEADERDATA, &hdrs);
+ curl_easy_setopt(easyhandle, CURLOPT_HEADERFUNCTION, handle_headers);
- curl_slist * custom_headers = NULL;
-
- if (lastmodified != 0) {
- curl_easy_setopt(easyhandle, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
+ curl_easy_setopt(easyhandle, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
+ if (lastmodified != 0)
curl_easy_setopt(easyhandle, CURLOPT_TIMEVALUE, lastmodified);
- curl_easy_setopt(easyhandle, CURLOPT_HEADERDATA, &hdrs);
- curl_easy_setopt(easyhandle, CURLOPT_HEADERFUNCTION, handle_headers);
- }
+ else
+ curl_easy_setopt(easyhandle, CURLOPT_TIMEVALUE, 0);
+
+ curl_slist * custom_headers = NULL;
if (etag.length() > 0) {
custom_headers = curl_slist_append(custom_headers, utils::strprintf("If-None-Match: %s", etag.c_str()).c_str());
curl_easy_setopt(easyhandle, CURLOPT_HTTPHEADER, custom_headers);
- curl_easy_setopt(easyhandle, CURLOPT_HEADERDATA, &hdrs);
- curl_easy_setopt(easyhandle, CURLOPT_HEADERFUNCTION, handle_headers);
}
ret = curl_easy_perform(easyhandle);
@@ -124,6 +136,7 @@ feed parser::parse_url(const std::string& url, time_t lastmodified, const std::s
et = hdrs.etag;
if (custom_headers) {
+ curl_easy_setopt(easyhandle, CURLOPT_HTTPHEADER, 0);
curl_slist_free_all(custom_headers);
}
@@ -136,7 +149,8 @@ feed parser::parse_url(const std::string& url, time_t lastmodified, const std::s
LOG(LOG_USERERROR, _("Error: trying to download feed `%s' returned HTTP status code %ld."), url.c_str(), status);
}
- curl_easy_cleanup(easyhandle);
+ if (!ehandle)
+ curl_easy_cleanup(easyhandle);
if (ret != 0) {
LOG(LOG_ERROR, "rsspp::parser::parse_url: curl_easy_perform returned err %d: %s", ret, curl_easy_strerror(ret));
View
2 rss/rsspp.h
@@ -75,7 +75,7 @@ class parser {
public:
parser(unsigned int timeout = 30, const char * user_agent = 0, const char * proxy = 0, const char * proxy_auth = 0, curl_proxytype proxy_type = CURLPROXY_HTTP);
~parser();
- feed parse_url(const std::string& url, time_t lastmodified = 0, const std::string& etag = "", newsbeuter::remote_api * api = 0, const std::string& cookie_cache = "");
+ feed parse_url(const std::string& url, time_t lastmodified = 0, const std::string& etag = "", newsbeuter::remote_api * api = 0, const std::string& cookie_cache = "", CURL *ehandle = 0);
feed parse_buffer(const char * buffer, size_t size, const char * url = NULL);
feed parse_file(const std::string& filename);
time_t get_last_modified() { return lm; }
View
47 src/controller.cpp
@@ -706,7 +706,7 @@ void controller::mark_all_read(unsigned int pos) {
}
}
-void controller::reload(unsigned int pos, unsigned int max, bool unattended) {
+void controller::reload(unsigned int pos, unsigned int max, bool unattended, curl_handle *easyhandle) {
LOG(LOG_DEBUG, "controller::reload: pos = %u max = %u", pos, max);
if (pos < feeds.size()) {
std::tr1::shared_ptr<rss_feed> oldfeed = feeds[pos];
@@ -717,6 +717,7 @@ void controller::reload(unsigned int pos, unsigned int max, bool unattended) {
bool ignore_dl = (cfg.get_configvalue("ignore-mode") == "download");
rss_parser parser(oldfeed->rssurl(), rsscache, &cfg, ignore_dl ? &ign : NULL, api);
+ parser.set_easyhandle(easyhandle);
LOG(LOG_DEBUG, "controller::reload: created parser");
try {
oldfeed->set_status(DURING_DOWNLOAD);
@@ -789,10 +790,48 @@ void controller::reload_indexes(const std::vector<int>& indexes, bool unattended
v->set_status("");
}
+struct feed_cmp {
+ const std::vector<std::tr1::shared_ptr<rss_feed> > &feeds;
+ feed_cmp(const std::vector<std::tr1::shared_ptr<rss_feed> > &f)
+ : feeds(f)
+ {
+ }
+ void extract(std::string &s, const std::string &url) const
+ {
+ size_t p = url.find("//");
+ p = (p == std::string::npos) ? 0 : p+2;
+ std::string suff(url.substr(p));
+ p = suff.find('/');
+ s = suff.substr(0, p);
+ }
+ bool operator()(unsigned a, unsigned b) const
+ {
+ std::tr1::shared_ptr<rss_feed> x = feeds[a];
+ std::tr1::shared_ptr<rss_feed> y = feeds[b];
+ const std::string &u = x->rssurl();
+ const std::string &v = y->rssurl();
+
+ std::string domain1, domain2;
+ extract(domain1, u);
+ extract(domain2, v);
+ std::reverse(domain1.begin(), domain1.end());
+ std::reverse(domain2.begin(), domain2.end());
+ return domain1 < domain2;
+ }
+};
+
void controller::reload_range(unsigned int start, unsigned int end, unsigned int size, bool unattended) {
- for (unsigned int i=start;i<=end;i++) {
- LOG(LOG_DEBUG, "controller::reload_range: reloading feed #%u", i);
- this->reload(i, size, unattended);
+
+ std::vector<unsigned> v;
+ for (unsigned i=start;i<=end;++i)
+ v.push_back(i);
+ std::sort(v.begin(), v.end(), feed_cmp(feeds));
+
+ curl_handle easyhandle;
+
+ for (std::vector<unsigned>::iterator i = v.begin(); i!= v.end(); ++i) {
+ LOG(LOG_DEBUG, "controller::reload_range: reloading feed #%u", *i);
+ this->reload(*i, size, unattended, &easyhandle);
}
}
View
4 src/rss_parser.cpp
@@ -20,7 +20,7 @@
namespace newsbeuter {
rss_parser::rss_parser(const std::string& uri, cache * c, configcontainer * cfg, rss_ignores * ii, remote_api * a)
- : my_uri(uri), ch(c), cfgcont(cfg), skip_parsing(false), is_valid(false), ign(ii), api(a) {
+ : my_uri(uri), ch(c), cfgcont(cfg), skip_parsing(false), is_valid(false), ign(ii), api(a), easyhandle(0) {
is_ttrss = cfgcont->get_configvalue("urls-source") == "ttrss";
}
@@ -158,7 +158,7 @@ void rss_parser::download_http(const std::string& uri) {
if (!ign || !ign->matches_lastmodified(uri)) {
ch->fetch_lastmodified(uri, lm, etag);
}
- f = p.parse_url(uri, lm, etag, api, cfgcont->get_configvalue("cookie-cache"));
+ f = p.parse_url(uri, lm, etag, api, cfgcont->get_configvalue("cookie-cache"), easyhandle ? easyhandle->ptr() : 0);
LOG(LOG_DEBUG, "rss_parser::download_http: lm = %d etag = %s", p.get_last_modified(), p.get_etag().c_str());
if (p.get_last_modified() != 0 || p.get_etag().length() > 0) {
LOG(LOG_DEBUG, "rss_parser::download_http: lastmodified old: %d new: %d", lm, p.get_last_modified());

0 comments on commit b25426b

Please sign in to comment.
Something went wrong with that request. Please try again.