diff --git a/doxygen/lang/900_release_notes.dox.tmpl b/doxygen/lang/900_release_notes.dox.tmpl index cd08793977..857951f8f8 100644 --- a/doxygen/lang/900_release_notes.dox.tmpl +++ b/doxygen/lang/900_release_notes.dox.tmpl @@ -8,7 +8,9 @@ Bugfix release; see below for more information @subsection qore_1_0_10_bug_fixes Bug Fixes in Qore - - fixed C++ foreign thread registration APIs to allow for TID reuse to provide thread affinity for language + - implemented the @ref Qore::parse_url(string, int) variant to allow for percent decoding in URL strings + (issue 4332) + - fixed C++ foreign thread registration APIs to support TID reuse to provide thread affinity for language modules such as Python and Java with successive rapid transitions into %Qore in callbacks (issue 4331) diff --git a/examples/test/qore/functions/parse_url.qtest b/examples/test/qore/functions/parse_url.qtest index 0f312ab460..c43ea96f4c 100755 --- a/examples/test/qore/functions/parse_url.qtest +++ b/examples/test/qore/functions/parse_url.qtest @@ -12,6 +12,7 @@ public class parseUrlTest inherits QUnit::Test { constructor() : Test("parse_url test", "1.0") { + addTestCase("decode test", \decodeTest()); addTestCase("slash test", \slashTest()); addTestCase("Test 1", \test()); addTestCase("port test", \portTest()); @@ -20,6 +21,22 @@ public class parseUrlTest inherits QUnit::Test { set_return_value(main()); } + decodeTest() { + hash h = parse_url("https://user-%3cname%3e%40example.com%2f" + "token:pass%3f%3aword@qoretechnologies.zendesk.com/api/v2/users%40str", QURL_DECODE); + assertEq("user-@example.com/token", h.username); + assertEq("pass?:word", h.password); + assertEq("qoretechnologies.zendesk.com", h.host); + assertEq("/api/v2/users%40str", h.path); + + h = parse_url("https://user-%3cname%3e%40example.com%2f" + "token:pass%3f%3aword@qoretechnologies.zendesk.com/api/v2/users%40str", QURL_DECODE_PATH); + assertEq("user-@example.com/token", h.username); + assertEq("pass?:word", h.password); + assertEq("qoretechnologies.zendesk.com", h.host); + assertEq("/api/v2/users@str", h.path); + } + slashTest() { hash h = parse_url("https://user@example.com/token:api-token@qoretechnologies.zendesk.com/api/v2/users"); assertEq("user@example.com/token", h.username); diff --git a/include/qore/QoreURL.h b/include/qore/QoreURL.h index a4ad3e96e3..a527ba648d 100644 --- a/include/qore/QoreURL.h +++ b/include/qore/QoreURL.h @@ -6,7 +6,7 @@ Qore Programming Language - Copyright (C) 2003 - 2019 Qore Technologies, s.r.o. + Copyright (C) 2003 - 2021 Qore Technologies, s.r.o. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), @@ -35,22 +35,13 @@ #define _QORE_QOREURL_H +#define QURL_KEEP_BRACKETS (1 << 0) +#define QURL_DECODE (1 << 1) +#define QURL_DECODE_PATH (1 << 2) +#define QURL_DECODE_ANY (QURL_DECODE | QURL_DECODE_PATH) + //! helps with parsing URLs and provides access to URL components through Qore data structures class QoreURL { -private: - //! private implementation of the class - struct qore_url_private* priv; - - DLLLOCAL void zero(); - DLLLOCAL void reset(); - DLLLOCAL void parseIntern(const char* url, ExceptionSink* xsink); - - //! this function is not implemented; it is here as a private function in order to prohibit it from being used - DLLLOCAL QoreURL(const QoreURL&); - - //! this function is not implemented; it is here as a private function in order to prohibit it from being used - DLLLOCAL QoreURL& operator=(const QoreURL&); - public: //! creates an empty structure /** @see QoreURL::parse() @@ -59,27 +50,29 @@ class QoreURL { //! parses the URL string passed /** you can check if the URL was valid by calling QoreURL::isValid() after this call - @param url the URL string to parse + @param url the URL string to parse */ DLLEXPORT QoreURL(const char* url); //! parses the URL string passed /** you can check if the URL was valid by calling QoreURL::isValid() after this call - @param url the URL string to parse + @param url the URL string to parse */ DLLEXPORT QoreURL(const QoreString* url); //! parses the URL string passed /** you can check if the URL was valid by calling QoreURL::isValid() after this call - @param url the URL string to parse - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well + @param url the URL string to parse + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, + then the brackets will be included in the \c "host" key output as well */ DLLEXPORT QoreURL(const char* url, bool keep_brackets); //! parses the URL string passed /** you can check if the URL was valid by calling QoreURL::isValid() after this call - @param url the URL string to parse - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well + @param url the URL string to parse + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, + then the brackets will be included in the \c "host" key output as well */ DLLEXPORT QoreURL(const QoreString* url, bool keep_brackets); @@ -96,29 +89,94 @@ class QoreURL { */ DLLEXPORT QoreURL(const QoreString* url, bool keep_brackets, ExceptionSink* xsink); + //! parses the URL string passed + /** you can check if the URL was valid by calling QoreURL::isValid() after this call + + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @since %Qore 1.0.10 + */ + DLLEXPORT QoreURL(const char* url, int options); + + //! parses the URL string passed + /** you can check if the URL was valid by calling QoreURL::isValid() after this call + + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @since %Qore 1.0.10 + */ + DLLEXPORT QoreURL(const QoreString& url, int options = 0); + + //! parses the URL string passed + /** you can check if the URL was valid by calling QoreURL::isValid() after this call + + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @since %Qore 1.0.10 + */ + DLLEXPORT QoreURL(const std::string& url, int options = 0); + + //! parses the URL string passed + /** you can check if the URL was valid by calling QoreURL::isValid() after this call + + @param xsink for Qore-language exceptions + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @since %Qore 1.0.10 + */ + DLLEXPORT QoreURL(ExceptionSink* xsink, const char* url, int options); + + //! parses the URL string passed + /** you can check if the URL was valid by calling QoreURL::isValid() after this call + @param xsink for Qore-language exceptions + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @since %Qore 1.0.10 + */ + DLLEXPORT QoreURL(ExceptionSink* xsink, const QoreString& url, int options = 0); + + //! parses the URL string passed + /** you can check if the URL was valid by calling QoreURL::isValid() after this call + + @param xsink for Qore-language exceptions + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @since %Qore 1.0.10 + */ + DLLEXPORT QoreURL(ExceptionSink* xsink, const std::string& url, int options = 0); + //! frees all memory and destroys the structure DLLEXPORT ~QoreURL(); //! parses the URL string passed /** If a url was already parsed previously, all memory is freed before parsing the new string. - You can check if the URL was valid by calling QoreURL::isValid() after this call + You can check if the URL was valid by calling QoreURL::isValid() after this call + @param url the URL string to parse */ DLLEXPORT int parse(const char* url); //! parses the URL string passed /** If a url was already parsed previously, all memory is freed before parsing the new string. - You can check if the URL was valid by calling QoreURL::isValid() after this call + You can check if the URL was valid by calling QoreURL::isValid() after this call + @param url the URL string to parse */ DLLEXPORT int parse(const QoreString* url); //! parses the URL string passed /** If a url was already parsed previously, all memory is freed before parsing the new string. - You can check if the URL was valid by calling QoreURL::isValid() after this call + You can check if the URL was valid by calling QoreURL::isValid() after this call @param url the URL string to parse - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, + then the brackets will be included in the \c "host" key output as well @return 0 if the URL was parsed successfully, -1 if not */ @@ -126,10 +184,11 @@ class QoreURL { //! parses the URL string passed /** If a url was already parsed previously, all memory is freed before parsing the new string. - You can check if the URL was valid by calling QoreURL::isValid() after this call + You can check if the URL was valid by calling QoreURL::isValid() after this call @param url the URL string to parse - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, + then the brackets will be included in the \c "host" key output as well @return 0 if the URL was parsed successfully, -1 if not */ @@ -137,10 +196,11 @@ class QoreURL { //! parses the URL string passed /** If a url was already parsed previously, all memory is freed before parsing the new string. - You can check if the URL was valid by calling QoreURL::isValid() after this call + You can check if the URL was valid by calling QoreURL::isValid() after this call @param url the URL string to parse - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, + then the brackets will be included in the \c "host" key output as well @param xsink for Qore-language exceptions @return 0 if the URL was parsed successfully, -1 if not @@ -151,6 +211,81 @@ class QoreURL { */ DLLEXPORT int parse(const QoreString* url, bool keep_brackets, ExceptionSink* xsink); + //! parses the URL string passed + /** If a url was already parsed previously, all memory is freed before parsing the new string. + + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @return 0 if the URL was parsed successfully, -1 if not + + @since %Qore 1.0.10 + */ + DLLEXPORT int parse(const char* url, int options); + + //! parses the URL string passed + /** If a url was already parsed previously, all memory is freed before parsing the new string. + + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @return 0 if the URL was parsed successfully, -1 if not + + @since %Qore 1.0.10 + */ + DLLEXPORT int parse(const QoreString& url, int options = 0); + + //! parses the URL string passed + /** If a url was already parsed previously, all memory is freed before parsing the new string. + + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @return 0 if the URL was parsed successfully, -1 if not + + @since %Qore 1.0.10 + */ + DLLEXPORT int parse(const std::string& url, int options = 0); + + //! parses the URL string passed + /** If a url was already parsed previously, all memory is freed before parsing the new string. + + @param xsink for Qore-language exceptions + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @return 0 if the URL was parsed successfully, -1 if not + + @since %Qore 1.0.10 + */ + DLLEXPORT int parse(ExceptionSink* xsink, const char* url, int options = 0); + + //! parses the URL string passed + /** If a url was already parsed previously, all memory is freed before parsing the new string. + + @param xsink for Qore-language exceptions + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @return 0 if the URL was parsed successfully, -1 if not + + @since %Qore 1.0.10 + */ + DLLEXPORT int parse(ExceptionSink* xsink, const QoreString& url, int options = 0); + + //! parses the URL string passed + /** If a url was already parsed previously, all memory is freed before parsing the new string. + + @param xsink for Qore-language exceptions + @param url the URL string to parse + @param options a bitfield of %Qore URL options + + @return 0 if the URL was parsed successfully, -1 if not + + @since %Qore 1.0.10 + */ + DLLEXPORT int parse(ExceptionSink* xsink, const std::string& url, int options = 0); + //! returns true if the URL string parsed is valid /** @return true if the URL string parsed is valid */ @@ -225,6 +360,20 @@ class QoreURL { @return a pointer to the hostname (0 if none present), caller owns the memory returned */ DLLEXPORT char* take_host(); + +private: + //! private implementation of the class + struct qore_url_private* priv; + + DLLLOCAL void zero(); + DLLLOCAL void reset(); + DLLLOCAL void parseIntern(const char* url, ExceptionSink* xsink); + + //! this function is not implemented; it is here as a private function in order to prohibit it from being used + DLLLOCAL QoreURL(const QoreURL&); + + //! this function is not implemented; it is here as a private function in order to prohibit it from being used + DLLLOCAL QoreURL& operator=(const QoreURL&); }; #endif diff --git a/lib/QoreURL.cpp b/lib/QoreURL.cpp index ed7a18d5e1..dd5f738798 100644 --- a/lib/QoreURL.cpp +++ b/lib/QoreURL.cpp @@ -4,7 +4,7 @@ Qore Programming Language - Copyright (C) 2003 - 2020 Qore Technologies, s.r.o. + Copyright (C) 2003 - 2021 Qore Technologies, s.r.o. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), @@ -39,6 +39,80 @@ #include struct qore_url_private { +public: + QoreStringNode* protocol, *path, *username, *password, *host; + int port; + + DLLLOCAL qore_url_private() { + zero(); + } + + DLLLOCAL ~qore_url_private() { + reset(); + } + + DLLLOCAL void zero() { + protocol = path = username = password = host = 0; + port = 0; + } + + DLLLOCAL void reset() { + if (protocol) + protocol->deref(); + if (path) + path->deref(); + if (username) + username->deref(); + if (password) + password->deref(); + if (host) + host->deref(); + } + + DLLLOCAL int parse(const char* url, int options = 0, ExceptionSink* xsink = nullptr) { + reset(); + zero(); + parse_intern(url, options, xsink); + if (xsink && !*xsink && !isValid()) { + xsink->raiseException("PARSE-URL-ERROR", "URL '%s' cannot be parsed", url); + } + return isValid() ? 0 : -1; + } + + DLLLOCAL bool isValid() const { + return (host && host->strlen()) || (path && path->strlen()); + } + + // destructive + DLLLOCAL QoreHashNode* getHash() { + QoreHashNode* h = new QoreHashNode(hashdeclUrlInfo, nullptr); + qore_hash_private* ph = qore_hash_private::get(*h); + if (protocol) { + ph->setKeyValueIntern("protocol", protocol); + protocol = nullptr; + } + if (path) { + ph->setKeyValueIntern("path", path); + path = nullptr; + } + if (username) { + ph->setKeyValueIntern("username", username); + username = nullptr; + } + if (password) { + ph->setKeyValueIntern("password", password); + password = nullptr; + } + if (host) { + ph->setKeyValueIntern("host", host); + host = nullptr; + } + if (port) + ph->setKeyValueIntern("port", port); + + return h; + } + private: DLLLOCAL void invalidate() { if (host) { @@ -51,9 +125,11 @@ struct qore_url_private { } } - DLLLOCAL void parse_intern(const char* buf, bool keep_brackets, ExceptionSink* xsink) { - if (!buf || !buf[0]) + DLLLOCAL void parse_intern(const char* buf, int options, ExceptionSink* xsink) { + if (!buf || !buf[0]) { return; + } + bool keep_brackets = options & QURL_KEEP_BRACKETS; printd(5, "QoreURL::parse_intern(%s)\n", buf); @@ -67,7 +143,7 @@ struct qore_url_private { protocol = new QoreStringNode(sbuf.c_str(), protocol_separator); // convert to lower case protocol->tolwr(); - //printd(5, "QoreURL::parse_intern protocol: %s\n", protocol->getBuffer()); + //printd(5, "QoreURL::parse_intern protocol: %s\n", protocol->c_str()); sbuf = sbuf.substr(protocol_separator + 3); } @@ -77,6 +153,9 @@ struct qore_url_private { || (sbuf[0] == '\\' && sbuf[1] == '\\')) && sbuf.find('@') == std::string::npos) { path = new QoreStringNode(sbuf.c_str()); + if (options & QURL_DECODE_ANY) { + decodeStrings(options, xsink); + } return; } @@ -95,7 +174,7 @@ struct qore_url_private { if (first_slash != std::string::npos) { // get pathname if not at EOS path = new QoreStringNode(sbuf.c_str() + first_slash); - //printd(5, "QoreURL::parse_intern path: '%s'\n", path->getBuffer()); + //printd(5, "QoreURL::parse_intern path: '%s'\n", path->c_str()); // get copy of hostname string for localized searching and invasive parsing sbuf = sbuf.substr(0, first_slash); //printd(5, "QoreURL::sbuf: '%s' size: %d\n", sbuf.c_str(), sbuf.size()); @@ -194,85 +273,42 @@ struct qore_url_private { host = new QoreStringNode(sbuf.c_str()); } } - } - - static void doInvalidPortException(ExceptionSink* xsink, const char* buf) { - xsink->raiseException("PARSE-URL-ERROR", - "URL '%s' has an invalid port value; it must be between 0 and 65535", - buf); - } - -public: - QoreStringNode* protocol, *path, *username, *password, *host; - int port; - - DLLLOCAL qore_url_private() { - zero(); - } - DLLLOCAL ~qore_url_private() { - reset(); - } - - DLLLOCAL void zero() { - protocol = path = username = password = host = 0; - port = 0; - } - - DLLLOCAL void reset() { - if (protocol) - protocol->deref(); - if (path) - path->deref(); - if (username) - username->deref(); - if (password) - password->deref(); - if (host) - host->deref(); - } - - DLLLOCAL int parse(const char* url, bool keep_brackets = false, ExceptionSink* xsink = nullptr) { - reset(); - zero(); - parse_intern(url, keep_brackets, xsink); - if (xsink && !*xsink && !isValid()) - xsink->raiseException("PARSE-URL-ERROR", "URL '%s' cannot be parsed", url); - return isValid() ? 0 : -1; - } - - DLLLOCAL bool isValid() const { - return (host && host->strlen()) || (path && path->strlen()); + // perform percent decoding, if required + if (options & QURL_DECODE_ANY) { + decodeStrings(options, xsink); + } } - // destructive - DLLLOCAL QoreHashNode* getHash() { - QoreHashNode* h = new QoreHashNode(hashdeclUrlInfo, nullptr); - qore_hash_private* ph = qore_hash_private::get(*h); - if (protocol) { - ph->setKeyValueIntern("protocol", protocol); - protocol = nullptr; + DLLLOCAL void decodeStrings(int options, ExceptionSink* xsink) { + if (username && !username->empty()) { + SimpleRefHolder holder(username); + username = decodeString(username, xsink); } - if (path) { - ph->setKeyValueIntern("path", path); - path = nullptr; + if (password && !password->empty()) { + SimpleRefHolder holder(password); + password = decodeString(password, xsink); } - if (username) { - ph->setKeyValueIntern("username", username); - username = nullptr; + if (host && !host->empty()) { + SimpleRefHolder holder(host); + host = decodeString(host, xsink); } - if (password) { - ph->setKeyValueIntern("password", password); - password = nullptr; + if ((options & QURL_DECODE_PATH) && path && !path->empty()) { + SimpleRefHolder holder(path); + path = decodeString(path, xsink); } - if (host) { - ph->setKeyValueIntern("host", host); - host = nullptr; - } - if (port) - ph->setKeyValueIntern("port", port); + } - return h; + static QoreStringNode* decodeString(QoreStringNode* str, ExceptionSink* xsink) { + assert(xsink); + QoreStringNodeHolder decoded_str(new QoreStringNode(QCS_UTF8)); + decoded_str->concatDecodeUrl(*str, xsink); + return *xsink ? nullptr : decoded_str.release(); + } + + static void doInvalidPortException(ExceptionSink* xsink, const char* buf) { + xsink->raiseException("PARSE-URL-ERROR", "URL '%s' has an invalid port value; it must be between 0 and 65535", + buf); } }; @@ -280,97 +316,154 @@ QoreURL::QoreURL() : priv(new qore_url_private) { } QoreURL::QoreURL(const char* url) : priv(new qore_url_private) { - parse(url); + parse(url); } QoreURL::QoreURL(const QoreString* url) : priv(new qore_url_private) { - parse(url->getBuffer()); + parse(url->c_str()); } QoreURL::QoreURL(const char* url, bool keep_brackets) : priv(new qore_url_private) { - parse(url, keep_brackets); + parse(url, keep_brackets ? QURL_KEEP_BRACKETS : 0); } QoreURL::QoreURL(const QoreString* url, bool keep_brackets) : priv(new qore_url_private) { - parse(url->getBuffer(), keep_brackets); + parse(url->c_str(), keep_brackets ? QURL_KEEP_BRACKETS : 0); } QoreURL::QoreURL(const QoreString* url, bool keep_brackets, ExceptionSink* xsink) : priv(new qore_url_private) { - parse(url, keep_brackets, xsink); + parse(url, keep_brackets ? QURL_KEEP_BRACKETS : 0, xsink); +} + +QoreURL::QoreURL(const char* url, int options) : priv(new qore_url_private) { + parse(url, options); +} + +QoreURL::QoreURL(const QoreString& url, int options) : priv(new qore_url_private) { + parse(url, options); +} + +QoreURL::QoreURL(const std::string& url, int options) : priv(new qore_url_private) { + parse(url, options); +} + +QoreURL::QoreURL(ExceptionSink* xsink, const char* url, int options) : priv(new qore_url_private) { + parse(xsink, url, options); +} + +QoreURL::QoreURL(ExceptionSink* xsink, const QoreString& url, int options) : priv(new qore_url_private) { + parse(xsink, url, options); +} + +QoreURL::QoreURL(ExceptionSink* xsink, const std::string& url, int options) : priv(new qore_url_private) { + parse(xsink, url, options); } QoreURL::~QoreURL() { - delete priv; + delete priv; } int QoreURL::parse(const char* url) { - return priv->parse(url); + return priv->parse(url); } int QoreURL::parse(const QoreString* url) { - return priv->parse(url->getBuffer()); + return priv->parse(url->c_str()); } int QoreURL::parse(const char* url, bool keep_brackets) { - return priv->parse(url, keep_brackets); + return priv->parse(url, keep_brackets); } int QoreURL::parse(const QoreString* url, bool keep_brackets) { - return priv->parse(url->getBuffer(), keep_brackets); + return priv->parse(url->c_str(), keep_brackets); } int QoreURL::parse(const QoreString* url, bool keep_brackets, ExceptionSink* xsink) { - TempEncodingHelper tmp(url, QCS_UTF8, xsink); - if (*xsink) - return -1; - return priv->parse(tmp->c_str(), keep_brackets, xsink); + TempEncodingHelper tmp(url, QCS_UTF8, xsink); + if (*xsink) { + return -1; + } + return priv->parse(tmp->c_str(), keep_brackets, xsink); +} + +int QoreURL::parse(const char* url, int options) { + return priv->parse(url, options); +} + +int QoreURL::parse(const QoreString& url, int options) { + return priv->parse(url.c_str(), options); +} + +int QoreURL::parse(const std::string& url, int options) { + return priv->parse(url.c_str(), options); +} + +int QoreURL::parse(ExceptionSink* xsink, const char* url, int options) { + return priv->parse(url, options, xsink); +} + +int QoreURL::parse(ExceptionSink* xsink, const QoreString& url, int options) { + TempEncodingHelper tmp(url, QCS_UTF8, xsink); + if (*xsink) { + return -1; + } + return priv->parse(tmp->c_str(), options, xsink); +} + +int QoreURL::parse(ExceptionSink* xsink, const std::string& url, int options) { + TempEncodingHelper tmp(url, QCS_UTF8, xsink); + if (*xsink) { + return -1; + } + return priv->parse(tmp->c_str(), options, xsink); } bool QoreURL::isValid() const { - return (priv->host && priv->host->strlen()) || (priv->path && priv->path->strlen()); + return (priv->host && priv->host->strlen()) || (priv->path && priv->path->strlen()); } const QoreString* QoreURL::getProtocol() const { - return priv->protocol; + return priv->protocol; } const QoreString* QoreURL::getUserName() const { - return priv->username; + return priv->username; } const QoreString* QoreURL::getPassword() const { - return priv->password; + return priv->password; } const QoreString* QoreURL::getPath() const { - return priv->path; + return priv->path; } const QoreString* QoreURL::getHost() const { - return priv->host; + return priv->host; } int QoreURL::getPort() const { - return priv->port; + return priv->port; } // destructive QoreHashNode* QoreURL::getHash() { - return priv->getHash(); + return priv->getHash(); } char* QoreURL::take_path() { - return priv->path ? priv->path->giveBuffer() : 0; + return priv->path ? priv->path->giveBuffer() : nullptr; } char* QoreURL::take_username() { - return priv->username ? priv->username->giveBuffer() : 0; + return priv->username ? priv->username->giveBuffer() : nullptr; } char* QoreURL::take_password() { - return priv->password ? priv->password->giveBuffer() : 0; + return priv->password ? priv->password->giveBuffer() : nullptr; } char* QoreURL::take_host() { - return priv->host ? priv->host->giveBuffer() : 0; + return priv->host ? priv->host->giveBuffer() : nullptr; } diff --git a/lib/ql_misc.qpp b/lib/ql_misc.qpp index d2e7b6579f..9ff85cb36c 100644 --- a/lib/ql_misc.qpp +++ b/lib/ql_misc.qpp @@ -4,7 +4,7 @@ Qore Programming Language - Copyright (C) 2003 - 2019 Qore Technologies, s.r.o. + Copyright (C) 2003 - 2021 Qore Technologies, s.r.o. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), @@ -196,6 +196,24 @@ static const char* tlist[] = { "OPTION", "ALGORITHM", "FUNCTION" }; #define SIGXRES 0 #endif +/** @defgroup parse_url_options URL Parsing Options + + @since %Qore 1.0.10 + */ +//@{ +//! If the hostname or address is enclosed in square brackets, the brackets will be included in the \c "host" key +/** Square brackets are used by some %Qore methods to denote IPv6 addresses; for example see + @ref Qore::Socket::connect() "Socket::connect()" +*/ +const QURL_KEEP_BRACKETS = QURL_KEEP_BRACKETS; + +//! Perform percent decoding on the \c "host", \c "username", and \c "password" fields +const QURL_DECODE = QURL_DECODE; + +//! Decodes all fields like @ref QURL_DECODE plus also performs percent decoding on the \c "path" field +const QURL_DECODE_PATH = QURL_DECODE_PATH; +//@} + //! a hash describing a parsed URL /** @since %Qore 0.9.3 */ @@ -1000,30 +1018,48 @@ string get_class_name(object obj) [flags=CONSTANT] { return new QoreStringNode(obj->getClass()->getName()); } -//! Parses a URL string and returns a hash of the components; throws an exception if the string cannot be parsed as a URL -/** @param url the URL to parse (ex: \c "https://user:pass@host:8080/path"); either a hostname or path is required at a minimum or a \c PARSE-URL-ERROR exception is raised - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well; square brackets are used by some %Qore methods to denote IPv6 addresses; for example see @ref Qore::Socket::connect() "Socket::connect()" +//! Parses a URL string and returns a hash of the components +/** Throws an exception if the string cannot be parsed as a URL; does not perform percent decoding - @return a hash of the components of the URL with the following keys (if data in the URL is present; note that at least either the \c "host" or the \c "path" keys will always be returned if no \c PARSE-URL-ERROR is raised): + @param url the URL to parse (ex: \c "https://user:pass@host:8080/path"); either a hostname or path is required at + a minimum or a \c PARSE-URL-ERROR exception is raised + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then + the brackets will be included in the \c "host" key output as well; square brackets are used by some %Qore methods + to denote IPv6 addresses; for example see @ref Qore::Socket::connect() "Socket::connect()" + + @return a hash of the components of the URL with the following keys (if data in the URL is present; note that at + least either the \c "host" or the \c "path" keys will always be returned if no \c PARSE-URL-ERROR is raised): - \c protocol: the scheme in the URL (ex: \c "http") - - \c path: any path given in the URL; the path will be prefixed by \c "/" if a hostname is found in the URL argument string, otherwise it will not if it was not given as such in the argument string - - \c username: any username given in the URL - - \c password: any password given in the URL - - \c host: any hostname given in the URL; note that this key will be given if no other information can be found in the URL argument and the URL argument string has no \c "/" characters; depending on the usage context for this function, this may actually be a filename + - \c path: any path given in the URL; the path will be prefixed by \c "/" if a hostname is found in the URL + argument string, otherwise it will not if it was not given as such in the argument string - not subjected to + percent decoding (see note below) + - \c username: any username given in the URL - not subjected to percent decoding (see note below) + - \c password: any password given in the URL - not subjected to percent decoding (see note below) + - \c host: any hostname given in the URL; note that this key will be given if no other information can be found in + the URL argument and the URL argument string has no \c "/" characters; depending on the usage context for this + function, this may actually be a filename - \c port: any port number given in the URL @par Example: @code{.py} -hash h = parse_url(url_string); +hash h = parse_url(url_string, True); @endcode @throw PARSE-URL-ERROR The URL string given could not be parsed - @note URLs with UNIX sockets are generally supported in Qore with the following syntax: scheme://socket=url_encoded_path/path, where url_encoded_path is a path with URL-encoding as performed by @ref encode_url() "encode_url(string, True)"; for example: \c "http://socket=%2ftmp%socket-dir%2fsocket-file-1/url/path"; this allows a filesystem path to be used in the host portion of the URL and for the URL to include a URL path as well. + @note + - URLs with UNIX sockets are generally supported in Qore with the following syntax: + scheme://socket=url_encoded_path/path, where url_encoded_path is a + path with URL-encoding as performed by @ref encode_url() "encode_url(string, True)"; for example: + \c "http://socket=%2ftmp%socket-dir%2fsocket-file-1/url/path"; this allows a filesystem path to be used in the + host portion of the URL and for the URL to include a URL path as well. + - none of the string fields returned here are subjected to percent decoding; to decode percent encoding, call + @ref Qore::decode_url() "decode_url()" on the strings manually or use the @ref parse_url_ex(string, int) variant + with the @ref QURL_DECODE or @ref QURL_DECODE_PATH option @see parseURL() for a version of this function that does not throw exceptions if the URL cannot be parsed */ -hash parse_url(string url, bool keep_brackets = False) { +hash parse_url(string url, bool keep_brackets) { QoreURL qurl(url, keep_brackets, xsink); if (*xsink) return QoreValue(); @@ -1031,16 +1067,70 @@ hash parse_url(string url, bool keep_brackets = False) { return qurl.getHash(); } -//! Parses a URL string and returns a hash of the components; if the URL cannot be parsed then @ref nothing is returned -/** @param url the URL to parse (ex: \c "https://user:pass@host:8080/path"); either a hostname or path is required at a minimum or the function will return @ref nothing - @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then the brackets will be included in the \c "host" key output as well; square brackets are used by some %Qore methods to denote IPv6 addresses; for example see @ref Qore::Socket::connect() "Socket::connect()" +//! Parses a URL string and returns a hash of the components +/** Throws an exception if the string cannot be parsed as a URL + + @param url the URL to parse (ex: \c "https://user:pass@host:8080/path"); either a hostname or path is required at + a minimum or a \c PARSE-URL-ERROR exception is raised + @param options a bitfield of @ref parse_url_options - @return a hash of the components of the URL with the following keys (if data in the URL is present; note that at least either the \c "host" or the \c "path" keys will always be returned if a hash is returned): + @return a hash of the components of the URL with the following keys (if data in the URL is present; note that at + least either the \c "host" or the \c "path" keys will always be returned if no \c PARSE-URL-ERROR is raised): - \c protocol: the scheme in the URL (ex: \c "http") - - \c path: any path given in the URL; the path will be prefixed by \c "/" if a hostname is found in the URL argument string, otherwise it will not if it was not given as such in the argument string - - \c username: any username given in the URL - - \c password: any password given in the URL - - \c host: any hostname given in the URL; note that this key will be given if no other information can be found in the URL argument and the URL argument string has no \c "/" characters; depending on the usage context for this function, this may actually be a filename + - \c path: any path given in the URL; the path will be prefixed by \c "/" if a hostname is found in the URL + argument string, otherwise it will not if it was not given as such in the argument string; subject to percent + decoding if @ref QURL_DECODE passed in \a options + - \c username: any username given in the URL; subject to percent decoding if @ref QURL_DECODE or + @ref QURL_DECODE_PATH passed in \a options + - \c password: any password given in the URL; subject to percent decoding if @ref QURL_DECODE or + @ref QURL_DECODE_PATH passed in \a options + - \c host: any hostname given in the URL; note that this key will be given if no other information can be found in + the URL argument and the URL argument string has no \c "/" characters; depending on the usage context for this + function, this may actually be a filename; subject to percent decoding if @ref QURL_DECODE or + @ref QURL_DECODE_PATH passed in \a options + - \c port: any port number given in the URL + + @par Example: + @code{.py} +hash h = parse_url(url_string, QURL_DECODE); + @endcode + + @throw PARSE-URL-ERROR The URL string given could not be parsed + + @note + - URLs with UNIX sockets are generally supported in Qore with the following syntax: + scheme://socket=url_encoded_path/path, where url_encoded_path is a + path with URL-encoding as performed by @ref encode_url() "encode_url(string, True)"; for example: + \c "http://socket=%2ftmp%socket-dir%2fsocket-file-1/url/path"; this allows a filesystem path to be used in the + host portion of the URL and for the URL to include a URL path as well. + + @see + - parseURL() for a version of this function that does not throw exceptions if the URL cannot be parsed +*/ +hash parse_url(string url, *int options) { + QoreURL qurl(xsink, url, options); + return qurl.isValid() ? qurl.getHash() : QoreValue(); +} + +//! Parses a URL string and returns a hash of the components; if the URL cannot be parsed then @ref nothing is returned +/** @param url the URL to parse (ex: \c "https://user:pass@host:8080/path"); either a hostname or path is required at + a minimum or the function will return @ref nothing + @param keep_brackets if this argument is true then if the hostname or address is enclosed in square brackets, then + the brackets will be included in the \c "host" key output as well; square brackets are used by some %Qore methods + to denote IPv6 addresses; for example see @ref Qore::Socket::connect() "Socket::connect()" + + @return a hash of the components of the URL with the following keys (if data in the URL is present; note that at + least either the \c "host" or the \c "path" keys will always be returned if a hash is returned): + - \c protocol: the scheme in the URL (ex: \c "http") + - \c path: any path given in the URL; the path will be prefixed by \c "/" if a hostname is found in the URL + argument string, otherwise it will not if it was not given as such in the argument string - not subjected to + percent decoding (see note below) + - \c username: any username given in the URL - not subjected to percent decoding (see note below) + - \c password: any password given in the URL - not subjected to percent decoding (see note below) + - \c host: any hostname given in the URL; note that this key will be given if no other information can be found in + the URL argument and the URL argument string has no \c "/" characters; depending on the usage context for this + function, this may actually be a filename - not subjected to percent decoding (see note below) + - \c port: any port number given in the URL @par Example: @@ -1048,7 +1138,15 @@ hash parse_url(string url, bool keep_brackets = False) { *hash h = parseURL(url_string); @endcode - @note URLs with UNIX sockets are generally supported in Qore with the following syntax: scheme://socket=url_encoded_path/path, where url_encoded_path is a path with URL-encoding as performed by @ref encode_url() "encode_url(string, True)"; for example: \c "http://socket=%2ftmp%socket-dir%2fsocket-file-1/url/path"; this allows a filesystem path to be used in the host portion of the URL and for the URL to include a URL path as well. + @note + - URLs with UNIX sockets are generally supported in Qore with the following syntax: + scheme://socket=url_encoded_path/path, where url_encoded_path is a + path with URL-encoding as performed by @ref encode_url() "encode_url(string, True)"; for example: + \c "http://socket=%2ftmp%socket-dir%2fsocket-file-1/url/path"; this allows a filesystem path to be used in the + host portion of the URL and for the URL to include a URL path as well. + - none of the string fields returned here are subjected to percent decoding; to decode percent encoding, call + @ref Qore::decode_url() "decode_url()" on the strings manually or use the @ref parse_url_ex(string, int) variant + with the @ref QURL_DECODE or @ref QURL_DECODE_PATH options @see parse_url() for a version of this function that throws exceptions if the URL cannot be parsed */ diff --git a/qlib/ConnectionProvider/InvalidConnection.qc b/qlib/ConnectionProvider/InvalidConnection.qc index 41440aeddd..704131f9ab 100644 --- a/qlib/ConnectionProvider/InvalidConnection.qc +++ b/qlib/ConnectionProvider/InvalidConnection.qc @@ -1,7 +1,7 @@ # -*- mode: qore; indent-tabs-mode: nil -*- # Qore InvalidConnection class definition -/* InvalidConnection.qc Copyright 2016 - 2019 Qore Technologies, s.r.o. +/* InvalidConnection.qc Copyright 2016 - 2021 Qore Technologies, s.r.o. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), @@ -41,7 +41,7 @@ public namespace ConnectionProvider { */ deprecated constructor(string n_name, string n_desc, string n_url, *hash n_opts, string n_error, hash urlh) - : AbstractConnection(n_name, n_desc, n_url, {"monitor": False}, n_opts ?? {}) { + : AbstractConnection(n_name, n_desc, n_url, {"monitor": False}, n_opts ?? {}) { error = n_error; } @@ -59,7 +59,7 @@ public namespace ConnectionProvider { - \c error a custom error string */ constructor(string name, string description, string url, hash attributes = {}, hash options = {}) - : AbstractConnection(name, description, url, attributes, options) { + : AbstractConnection(name, description, url, attributes, options) { if (attributes.error.val()) { self.error = attributes.error; } @@ -78,7 +78,8 @@ public namespace ConnectionProvider { #! throws an exception because the object is invalid private object getImpl(bool connect = True, *hash rtopts) { - throw "INVALID-CONNECTION", sprintf("connection %y (type %y url %y) is invalid and therefore could not be loaded: %s", name, urlh.protocol, url, error); + throw "INVALID-CONNECTION", sprintf("connection %y (type %y url %y) is invalid and therefore could not " + "be loaded: %s", name, urlh.protocol, url, error); } #! returns \c "invalid"