In [1]:
%%file README.MD
收集python有用函数集


Writing README.MD


In [3]:
%%file flatten_list.py
# -*- coding: UTF-8 -*- 
#拉平多个list

def flatten(x):
    """
    >>> flatten([[1]])
    [1]
    >>> flatten([[1, 2], [3, 4]])
    [1, 2, 3, 4]
    >>> flatten([[[1], [2]], [[3], [4]]])
    [1, 2, 3, 4]
    """
    if isinstance(x[0], list):
        return list(toolz.concat(map(flatten, x)))
    else:
        return x

Writing flatten_list.py


In [2]:
%%file internal_utils.py
"""
Utility functions that are unrelated to datashape
Do not import datashape modules into this module.  See util.py in that case
"""

from __future__ import print_function, division, absolute_import

import keyword
import re


class IndexCallable(object):
    """ Provide getitem syntax for functions
    >>> def inc(x):
    ...     return x + 1
    >>> I = IndexCallable(inc)
    >>> I[3]
    4
    """
    __slots__ = 'fn',

    def __init__(self, fn):
        self.fn = fn

    def __getitem__(self, key):
        return self.fn(key)


def remove(predicate, seq):
    return filter(lambda x: not predicate(x), seq)


# Taken from theano/theano/gof/sched.py
# Avoids licensing issues because this was written by Matthew Rocklin
def reverse_dict(d):
    """Reverses direction of dependence dict
    >>> d = {'a': (1, 2), 'b': (2, 3), 'c':()}
    >>> reverse_dict(d)  # doctest: +SKIP
    {1: ('a',), 2: ('a', 'b'), 3: ('b',)}
    :note: dict order are not deterministic. As we iterate on the
        input dict, it make the output of this function depend on the
        dict order. So this function output order should be considered
        as undeterministic.
    """
    result = {}
    for key in d:
        for val in d[key]:
            result[val] = result.get(val, tuple()) + (key, )
    return result


# Taken from theano/theano/gof/sched.py
# Avoids licensing issues because this was written by Matthew Rocklin
def _toposort(edges):
    """ Topological sort algorithm by Kahn [1] - O(nodes + vertices)
    inputs:
        edges - a dict of the form {a: {b, c}} where b and c depend on a
    outputs:
        L - an ordered list of nodes that satisfy the dependencies of edges
    >>> _toposort({1: (2, 3), 2: (3, )})
    [1, 2, 3]
    Closely follows the wikipedia page [2]
    [1] Kahn, Arthur B. (1962), "Topological sorting of large networks",
    Communications of the ACM
    [2] http://en.wikipedia.org/wiki/Toposort#Algorithms
    """
    incoming_edges = reverse_dict(edges)
    incoming_edges = dict((k, set(val)) for k, val in incoming_edges.items())
    S = set((v for v in edges if v not in incoming_edges))
    L = []

    while S:
        n = S.pop()
        L.append(n)
        for m in edges.get(n, ()):
            assert n in incoming_edges[m]
            incoming_edges[m].remove(n)
            if not incoming_edges[m]:
                S.add(m)
    if any(incoming_edges.get(v) for v in edges):
        raise ValueError("Input has cycles")
    return L


# Taken from toolz
# Avoids licensing issues because this version was authored by Matthew Rocklin
def groupby(func, seq):
    """ Group a collection by a key function
    >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
    >>> groupby(len, names) # doctest: +SKIP
    {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
    >>> iseven = lambda x: x % 2 == 0
    >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])
    {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
    See Also:
        ``countby``
    """

    d = dict()
    for item in seq:
        key = func(item)
        if key not in d:
            d[key] = list()
        d[key].append(item)
    return d


def isidentifier(s):
    return (keyword.iskeyword(s) or
            re.match(r'^[_a-zA-Z][_a-zA-Z0-9]*$', s) is not None)

Writing internal_utils.py


In [9]:
%%file redis_storage.py
# -*- coding: UTF-8 -*- 
import abc
import redis

__all__ = ['storage']


def storage(storage_type, **kwargs):
    if not storage_type:
        return DictStorage()
    elif storage_type == 'redis':
        return RedisStorage(**kwargs)
    else:
        raise ValueError('storage_type not supported.')


class Storage(object):
    @abc.abstractmethod
    def __init__(self, flush_db=False, **kwargs):
        """
        Initializes an object to store values. If the storage needs a database connection, it also
        connects to the database.
        """
        return

    @abc.abstractmethod
    def __del__(self):
        """
        Deletes in-memory data structures
        """
        return

    @abc.abstractmethod
    def __getitem__(self, item):
        """
        Looks for the `item` and possibly returns its value. None is returned if `item` is not present.
        """
        return

    @abc.abstractmethod
    def __setitem__(self, key, value):
        """
        Stores `value` as the value of item  named `key`. If `key` is not present it is added to the storage.
        If `key` contained an old value, the old value is overwritten.
        """
        return

    def incrby(self, key, incr):
        if key not in self:
            self[key] = 0
        self[key] = int(self[key]) + incr
        return int(self[key])


class RedisStorage(Storage):
    def __init__(self, flush_db=False, **kwargs):
        r = redis.StrictRedis(**kwargs)
        if flush_db:
            r.flushdb()
        self._r = r

    def __del__(self):
        del self._r

    def __getitem__(self, key):
        val = self._r.get(key)
        try:
            return int(val)
        except ValueError:
            return val

    def __setitem__(self, key, value):
        self._r.set(key, value)

    def __contains__(self, key):
        return True if self._r.get(key) else False

    def smembers(self, key):
        """
        Get all the members in a set.
        """
        res = self._r.smembers(key)
        return set([int(el) if el.isdigit() else el for el in res])  # possibly convert values to integers

    def sadd(self, key, value):
        """
        Insert a `value` into a set.
        """
        self._r.sadd(key, value)

    def sclear(self, key):
        """
        Clear the contents of a set
        """
        self._r.delete(key)

    def keys(self):
        return self._r.keys()


class DictStorage(Storage):
    def __init__(self):
        self._items = dict()

    def __del__(self):
        self._items.clear()
        del self._items

    def __getitem__(self, key):
        return self._items[key] if key in self._items else None

    def __setitem__(self, key, value):
        self._items[key] = value

    def __contains__(self, key):
        return key in self._items

    def smembers(self, key):
        """
        Get all the members in a set.
        """
        return self._items[key] if key in self._items else set()

    def sadd(self, key, value):
        """
        Insert a `value` into a set.
        """
        if not key in self._items:
            self._items[key] = set([value])
        else:
            self._items[key].update([value])

    def sclear(self, key):
        """
        Clear the contents of a set
        """
        self._items[key].clear()

    def keys(self):
        return self._items.keys()
    
def prepender(func):
    """
    Prepends some text to the *second* argument with which function `func` has been called.
    The *first* argument is a reference (`self`) to an instance of class Terms or one of its sub classes.
    `self` is accessed to look for a prefix `_prefix` to append to the second argument.
    This function is meant to be used as decorator.
    """
    def prepend(self, *args):
        prepended = self._prefix + args[0]
        return func(self, prepended, *args[1:])
    return prepend

pre='dddd'
class Terms(object):
    def __init__(self, store=None):
        self._items = store
    @property
    def terms(self):
        """
        Returns all the terms we have stored without their prefix
        """
        return [k[len(self._prefix):] for k in self._items.keys() if k.startswith(self._prefix)]


class OriginalTerms(Terms):
    #_prefix = pre#'t:'  # this prefix stands for `term:`

    def __init__(self, store, pre='tre'):
        self._prefix = pre
        _prefix = self._prefix
        super(OriginalTerms, self).__init__(store)
    @prepender
    def __setitem__(self, word, count):
        """
        Adds the `word` to the original terms. The number of occurrences is specified in `count`.
        """
        print (word)
        self._items.incrby(word, count)

    @prepender
    def __getitem__(self, word):
        """
        Returns the number of occurrences of `word` in the corpus or 0 if it wasn't present.
        """
        print word
        return self._items[word] if word in self._items else 0



###test:
redis_host = 'localhost'
redis_port = 6379
redis_db = 5


def redis_storage():
    return storage('redis', flush_db=False, host=redis_host, port=redis_port, db=redis_db)


store=redis_storage()
oi = OriginalTerms(store,'ddd')
oi['foo']=1

r = redis.StrictRedis(db=redis_db)
print r.get('t:foo')

Overwriting redis_storage.py


In [10]:
%%file string2dict.py
# -*- coding: UTF-8 -*- 
"""
我将数据库连接相关的一些用户名/密码/host/port等各种东西作为一个string保存在了数据库中，我要用MySQLdb检查这些数据库连接信息是够能正常使用，需要将数据库信息中的用户名/密码/host/port等信息作为参数传给MySQLdb.Connect()。者就需要将'{"host":"192.168.11.22", "port":3306, "user":"abc","passwd":"123", "db":"mydb", "connect_timeout":10}'”这样的string转化为dict。

在python中，将string转为一个dict，我所知有如下3中方法：
1. ast.literal_eval() 这是我常用的，依赖python2.6以上，据介绍时说比直接eval更安全一些，我没细究哈。
2. eval() 在string内容比较可控/安全的前提下，eval是不错的方法。
3. json.loads() 用json提供的loads方法是不错的，不过key/value中的string被转化为了unicode哦。

看实例代码：https://github.com/smilejay/python/blob/master/py2014/string2dict.py
其他类似，比如list '[...]'
"""
import MySQLdb
import ast
import json

def my_run():
    try:
        s = '{"host":"192.168.11.22", "port":3306, "user":"abc",\
              "passwd":"123", "db":"mydb", "connect_timeout":10}'
        d = ast.literal_eval(s)
        print type(d)
        print d
        d1 = eval(s)
        print type(d1)
        print d1
        d2 = json.loads(s)
        print type(d2)
        print d2
        MySQLdb.Connect(host=d['host'], port=d['port'], user=d['user'],
                        passwd=d['passwd'], db=d['db'],
                        connect_timeout=d['connect_timeout'])
        print 'right'
    except Exception, e:
        print 'wrong %s' % e
 
 
if __name__ == '__main__':
    my_run()


Writing string2dict.py


In [None]:
%%file timeutils.py
import datetime
import sys

try:
    from urllib.parse import urlparse
except ImportError:
    # python 2
    from urlparse import urlparse

PY2 = (sys.version_info.major == 2)


def parse_time(timestr):
    if not timestr:
        return datetime.datetime.utcnow()
    try:
        timestr = timestr.replace(' ', 'T')
        return datetime.datetime.strptime(timestr, "%Y-%m-%dT%H:%M:%S.%f")
    except ValueError:
        return datetime.datetime.strptime(timestr, "%Y-%m-%dT%H:%M:%S")


def datestr(then, now=None):
    """Converts time to a human readable string.
    Wrapper over web.datestr.
        >>> from datetime import datetime
        >>> datestr(datetime(2010, 1, 2), datetime(2010, 1, 1))
        '1 day ago'
    """
    s = _web_datestr(then, now)
    if 'milliseconds' in s or 'microseconds' in s:
        s = 'Just now'
    return s


def _web_datestr(then, now=None):
    """
    datestr utility from web.py (public domain).
    source: https://github.com/webpy/webpy
    Converts a (UTC) datetime object to a nice string representation.
        >>> from datetime import datetime, timedelta
        >>> d = datetime(1970, 5, 1)
        >>> datestr(d, now=d)
        '0 microseconds ago'
        >>> for t, v in iteritems({
        ...   timedelta(microseconds=1): '1 microsecond ago',
        ...   timedelta(microseconds=2): '2 microseconds ago',
        ...   -timedelta(microseconds=1): '1 microsecond from now',
        ...   -timedelta(microseconds=2): '2 microseconds from now',
        ...   timedelta(microseconds=2000): '2 milliseconds ago',
        ...   timedelta(seconds=2): '2 seconds ago',
        ...   timedelta(seconds=2*60): '2 minutes ago',
        ...   timedelta(seconds=2*60*60): '2 hours ago',
        ...   timedelta(days=2): '2 days ago',
        ... }):
        ...     assert datestr(d, now=d+t) == v
        >>> datestr(datetime(1970, 1, 1), now=d)
        'January  1'
        >>> datestr(datetime(1969, 1, 1), now=d)
        'January  1, 1969'
        >>> datestr(datetime(1970, 6, 1), now=d)
        'June  1, 1970'
        >>> datestr(None)
        ''
    """
    def agohence(n, what, divisor=None):
        if divisor: n = n // divisor

        out = str(abs(n)) + ' ' + what       # '2 day'
        if abs(n) != 1: out += 's'           # '2 days'
        out += ' '                           # '2 days '
        if n < 0:
            out += 'from now'
        else:
            out += 'ago'
        return out                           # '2 days ago'

    oneday = 24 * 60 * 60

    if not then: return ""
    if not now: now = datetime.datetime.utcnow()
    if type(now).__name__ == "DateTime":
        now = datetime.datetime.fromtimestamp(now)
    if type(then).__name__ == "DateTime":
        then = datetime.datetime.fromtimestamp(then)
    elif type(then).__name__ == "date":
        then = datetime.datetime(then.year, then.month, then.day)

    delta = now - then
    deltaseconds = int(delta.days * oneday + delta.seconds + delta.microseconds * 1e-06)
    deltadays = abs(deltaseconds) // oneday
    if deltaseconds < 0: deltadays *= -1 # fix for oddity of floor

    if deltadays:
        if abs(deltadays) < 4:
            return agohence(deltadays, 'day')

        # Trick to display 'June 3' instead of 'June 03'
        # Even though the %e format in strftime does that, it doesn't work on Windows.
        out = then.strftime('%B %d').replace(" 0", "  ")

        if then.year != now.year or deltadays < 0:
            out += ', %s' % then.year
        return out

    if int(deltaseconds):
        if abs(deltaseconds) > (60 * 60):
            return agohence(deltaseconds, 'hour', 60 * 60)
        elif abs(deltaseconds) > 60:
            return agohence(deltaseconds, 'minute', 60)
        else:
            return agohence(deltaseconds, 'second')

    deltamicroseconds = delta.microseconds
    if delta.days: deltamicroseconds = int(delta.microseconds - 1e6) # datetime oddity
    if abs(deltamicroseconds) > 1000:
        return agohence(deltamicroseconds, 'millisecond', 1000)

    return agohence(deltamicroseconds, 'microsecond')


def truncate(text, width):
    if len(text) > width:
        text = text[:width-3] + "..."
    return text

def get_host_name(url):
    host = urlparse(url).netloc
    host_name = host.split(':')[0]
    return host_name

###methods 2
class Unit(object):
    def __init__(self, name, nsecs):
        self.name = name
        self.plural = name + "s"
        self.nsecs = nsecs


MINUTE = Unit("minute", 60)
HOUR = Unit("hour", MINUTE.nsecs * 60)
DAY = Unit("day", HOUR.nsecs * 24)
WEEK = Unit("week", DAY.nsecs * 7)


def format_duration(td):
    remaining_seconds = int(td.total_seconds())
    result = []

    for unit in (WEEK, DAY, HOUR, MINUTE):
        if unit == WEEK and remaining_seconds % unit.nsecs != 0:
            # Say "8 days" instead of "1 week 1 day"
            continue

        v, remaining_seconds = divmod(remaining_seconds, unit.nsecs)
        if v == 1:
            result.append("1 %s" % unit.name)
        elif v > 1:
            result.append("%d %s" % (v, unit.plural))

    return " ".join(result)

from datetime import timedelta as td
import datetime
 
t1 = datetime.datetime.strptime("2017-9-06 10:30:00", "%Y-%m-%d %H:%M:%S")
t2 = datetime.datetime.strptime("2017-9-06 12:30:00", "%Y-%m-%d %H:%M:%S")
tf=t2-t1
format_duration(tf)