#  Python URI

From an interview question for Quora:

http://www.businessinsider.com/heres-the-test-you-have-to-pass-to-work-at-quora-silicon-valleys-hot-new-86-million-startup-2010-4?IR=T

In Python, write a class or module with a bunch of functions for manipulating a URI. For this exercise, pretend that the urllib, urllib2, and urlparse modules don't exist. You can use other standard Python modules, such as re, for this. The focus of the class or module you write should be around usage on the web, so you'll want to have things that make it easier to update or append a querystring var, get the scheme for a URI, etc., and you may want to include ways to figure out the domain for a URL (British-site.co.uk, us-site.com, etc.)

We're looking for correctness (you'll probably want to read the relevant RFCs; make sure you handle edge cases), and elegance of your API (does it let you do the things you commonly want to do with URIs in a really straightforward way?,) as well as coding style. If you don't know Python already, then this is also an exercise in learning new things quickly and well. Your code should be well-commented and documented and conform to the guidlines in the PEP 8 Style Guide for Python Code. Include some instructions and examples of usage in your documentation. You may also want to write unit tests.


In [97]:
import re


class Uri(object):
    """
    Uniform resource identifier identifying an 
    abstract or physical resource.
    
    See https://tools.ietf.org/html/rfc3986
    """
    
# Dunder Methods...
    
    def __init__(self, uri_string):
        """
        Construct a new uri.
        
        :param uri_string: The string representation of the uri. 
        :type uri_string: str
        """
        self.uri_string = uri_string
    
    def __str__(self):
        """
        Get the string representation of this uri.
        
        :return: String representation of this uri.
        :rtype: str
        """
        return self.uri_string
    
# Public Properties...

    @property
    def scheme(self):
        """
        Get the uri scheme, this is the first part of the uri, for example http, ftp, file.
        
        :return: The uri scheme.
        :rtype: str
        """
        return self._get_uri_part(2)
    
    @scheme.setter
    def scheme(self, value):
        """
        Set the uri scheme, this is the first part of the uri, for example http, ftp, file.
        
        :param value: The uri scheme to set.
        :type value: str
        """
        self._set_uri_part(2, value)
    
    @property
    def authority(self):
        """
        Get the authority this is the userinfo + host.
        
        :return: The authority.
        :rtype: str
        """
        return self._get_uri_part(4)
    
    @authority.setter
    def authority(self, value):
        """
        Set the authority this is the userinfo + host.
        
        :param value: The authority to set.
        :type value: str
        """
        self._set_uri_part(4, value)
    
    @property
    def username(self):
        """
        Get the username part of the userinfo.
        
        :return: The username.
        :rtype: str
        """
        return self.userinfo.split(':')[0]
    
    @username.setter
    def username(self, value):
        """
        Set the username part of the userinfo.
        
        :param value: The username to set.
        :type value: str
        """
        # TODO
    
    @property
    def password(self):
        """
        Get the password part of the userinfo.
        
        :return: The password.
        :rtype: str
        """
        tokens = self.userinfo.split(':')
        return '' if len(tokens) == 1 else tokens[1]
    
    @password.setter
    def password(self, value):
        """
        Set the password part of the userinfo.
        
        :param value: The password to set.
        :type value: str
        """
        # TODO
    
    @property
    def userinfo(self):
        """
        Get the userinfo in the form username:password (password is optional).
        
        :return: The userinfo scheme.
        :rtype: str
        """
        tokens = self.authority.split('@')
        return '' if len(tokens) == 1 else tokens[0]
    
    @userinfo.setter
    def userinfo(self, value):
        """
        Set the userinfo in the form username:password (password is optional).
        
        :param value: The userinfo to set.
        :type value: str
        """
        # TODO
    
    @property
    def host(self):
        """
        Get the host part of the authority.
        
        :return: The host part of the authority.
        :rtype: str
        """
        tokens = self.authority.split('@')
        return tokens[0] if len(tokens) == 1 else tokens[1]
    
    @host.setter
    def host(self, value):
        """
        Set the host part of the authority.
        
        :param value: The host to set.
        :type value: str
        """
        # TODO
    
    @property
    def path(self):
        """
        Get the path of the resource.
        
        :return: The path.
        :rtype: str
        """
        # TODO : Special case for file uri
        return self._get_uri_part(5)
    
    @path.setter
    def path(self, value):
        """
        Set the path of the resource.
        
        :param value: The host to set.
        :type value: str
        """
        # TODO : Special case for file uri
        self._set_uri_part(5, value)
    
    @property
    def query(self):
        """
        Get the decoded query parameters of the uri as a dictionary, 
        for example:
        
        >>> Uri('http://www.web.com?param1=value1;param2=value2').query        
        {'param1':'value1', 'param2':'value2'}
        
        :return: The query parameters.
        :rtype: dict
        """
        value = self._get_uri_part(7)
        return dict(x.split('=') for x in value.split(';'))
    
    @query.setter
    def query(self, query_parameters):
        """
        Set query parameters of the uri as a dictionary, the values are 
        encoded to ensure a valid uri results even when setting non valid
        uri characters, for example:
        
        >>> Uri('http://www.web.com').query = {'param1':'val/ue1', 'param2':'value2'}
        http://www.web.com?param1=val%2fue1;param2=value2
        
        :param **query_parameters: named parameters
        """
        value = ';'.join(
            '='.join((param, value)) 
            for param, value in query_parameters.iteritems()
        )
        self._set_uri_part(7, value)
    
    @property
    def fragment(self):
        """
        Get the fragment part of the uri.
        
        :return: The fragment.
        :rtype: str
        """
        return self._get_uri_part(9)
    
    @fragment.setter
    def fragment(self, value):
        """
        Set the fragment part of the uri.
        
        :param value: The fragment.
        :type value: str
        """
        self._set_uri_part(9, value)
        
# Protected Class Members...

    _regex = re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?')
    
# Protected Instance Methods...

    def _get_uri_part(self, index):
        try:
            return self._regex.sub('\\' + str(index), self.uri_string)
        except re.error:
            return ''
        
    def _set_uri_part(self, index, value):
        
        sub = []
        
        prefix_map = { 7:'?', 9:'#'}
        postfix_map = { 2:'://' }
        
        # generate a substitution string which can be used to 
        # replace the specific part of the uri string with the 
        # value, leaving the rest of the uri intact
        
        for part in (2, 4, 5, 7, 9):
            
            if part != index:
                if len(self._get_uri_part(part)):
                    part_value = '\\' + str(part)
                else:
                    part_value = ''
            else:
                part_value = value
                
            if len(part_value):
                sub.append(prefix_map.get(part, ''))
                sub.append(part_value)
                sub.append(postfix_map.get(part, ''))
            
        sub = ''.join(sub)
        result = self._regex.sub(sub, self.uri_string)
        self.uri_string = result
    
    
def uri_test(uri_string):
    url = Uri(uri_string)
    print uri_string
    if len(url.scheme):
        print url.scheme
    if len(url.userinfo):
        print url.userinfo
    if len(url.username):
        print url.username
    if len(url.password):
        print url.password
    if len(url.authority):
        print url.authority
    if len(url.host):
        print url.host
    if len(url.path):
        print url.path
    if len(url.query):
        print url.query
    if len(url.fragment):
        print url.fragment
        
        
# uri_test('http://username:password@www.website.com/path?param=1#fragment')
# uri_test('http://username:password@www.website.com/path?param=1')
# uri_test('http://username@www.website.com/path?param=1')
# uri_test('http://www.website.com/path?param=1')
# uri_test('http://www.website.com/path')
# uri_test('http://www.website.com')
# uri_test('http://www.website.com')

url = Uri('http://username:password@www.website.com/path')
url.scheme = 'ftp'
url.fragment = 'hello'
print url


url = Uri('file://somepath/file')
print url.path

ftp://username:password@www.website.com/path#hello
/file
