### Collections Module - Counter

In [1]:
# counter
from collections import Counter

In [3]:
l = [1,1,1,1,1,1,12,12,2,2,2,23,3,3,3,4,4,4,44]
Counter(l)

Counter({1: 6, 12: 2, 2: 3, 23: 1, 3: 3, 4: 3, 44: 1})

In [4]:
s = 'sgoadgjksnfkjskjdnfksjdfnksdjfnksdjn'
Counter(s)

Counter({'s': 6,
         'g': 2,
         'o': 1,
         'a': 1,
         'd': 5,
         'j': 6,
         'k': 6,
         'n': 5,
         'f': 4})

In [6]:
s = 'How many times does each word show up in this sentence show show up up and tell me me me'
words = s.split()

Counter(words)

Counter({'How': 1,
         'many': 1,
         'times': 1,
         'does': 1,
         'each': 1,
         'word': 1,
         'show': 3,
         'up': 3,
         'in': 1,
         'this': 1,
         'sentence': 1,
         'and': 1,
         'tell': 1,
         'me': 3})

In [7]:
c = Counter(words)
c.most_common(2)

[('show', 3), ('up', 3)]

In [8]:
sum(c.values())

20

### Collections Module - Defaultdict

In [9]:
from collections import defaultdict

In [10]:
d = {'k1':1}

In [11]:
d['k1']

1

In [12]:
d['k2']

KeyError: 'k2'

In [13]:
d = defaultdict(object)

In [14]:
d['one']

<object at 0x1316f261170>

In [16]:
for item in d:
    print (item)

one


In [17]:
d = defaultdict(lambda:0)
d['one']

0

In [18]:
d['two'] = 2

In [19]:
d

defaultdict(<function __main__.<lambda>()>, {'one': 0, 'two': 2})

### Collections Module - OrderedDict

In [23]:
d = {}

d['a'] = 1
d['b'] = 2
d['c'] = 3
d['d'] = 4
d['e'] = 5

In [24]:
d

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

In [26]:
for k,v in d.items():
    print (k,v)

a 1
b 2
c 3
d 4
e 5


In [27]:
from collections import OrderedDict

In [28]:
d = OrderedDict()

In [29]:
d['a'] = 1
d['b'] = 2
d['c'] = 3
d['d'] = 4
d['e'] = 5

In [30]:
for k,v in d.items():
    print(k,v)

a 1
b 2
c 3
d 4
e 5


In [35]:
d1 = OrderedDict()
d1['a'] = 1
d1['b'] = 2

d2 = OrderedDict()
d2['b'] = 2
d2['a'] = 1

In [36]:
print (d1 == d2)

False


### Collections Module - namedtuple

In [37]:
t = (1,2,3)

In [38]:
t[0]

1

In [39]:
from collections import namedtuple

In [40]:
Dog = namedtuple('Dog', 'age breed name')

In [41]:
sam = Dog(age=2, breed='Lab', name='Sammy')

In [42]:
sam

Dog(age=2, breed='Lab', name='Sammy')

In [43]:
sam.age

2

In [44]:
sam[0]

2

In [45]:
Cat = namedtuple('Cat', 'fur claws name')

In [46]:
c = Cat(fur='Fuzzy', claws=False, name='Kitty')

In [47]:
c

Cat(fur='Fuzzy', claws=False, name='Kitty')

### Datetime

In [48]:
import datetime

In [49]:
t = datetime.time(5,25,1)

In [51]:
print (t)

05:25:01


In [52]:
t.min

datetime.time(0, 0)

In [53]:
datetime.time

datetime.time

In [55]:
print(datetime.time.min)

00:00:00


In [56]:
print(datetime.time.max)

23:59:59.999999


In [57]:
today = datetime.date.today()

In [58]:
print(today)

2019-08-26


In [59]:
today.timetuple()

time.struct_time(tm_year=2019, tm_mon=8, tm_mday=26, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=238, tm_isdst=-1)

In [60]:
today.day

26

In [62]:
print(datetime.date.min)

0001-01-01


In [64]:
d1 = datetime.date(2015, 3, 11)
print(d1)

2015-03-11


In [65]:
d2 = d1.replace(year=1990)

In [66]:
d2

datetime.date(1990, 3, 11)

In [67]:
d1-d2

datetime.timedelta(days=9131)

### Python Debugger

In [68]:
import pdb

In [71]:
x = [1,3,4]
y = 2
z = 3

result = y + z
print(result)

pdb.set_trace()

result2 = y + x
print(result2)

5
--Return--
> <ipython-input-71-14481e838565>(8)<module>()->None
-> pdb.set_trace()
(Pdb) x
[1, 3, 4]
(Pdb) y
2
(Pdb) z
3
(Pdb) q


BdbQuit: 

### Timing your code - timeit module

In [78]:
import timeit

In [79]:
'0-1-2-3-....-99'

'0-1-2-3-....-99'

In [81]:
timeit.timeit('"-".join(str(n) for n in range(100))',number=10000)

0.5435876999999891

In [82]:
timeit.timeit('"-".join([str(n) for n in range(100)])',number=10000)

0.4638727999990806

In [83]:
%timeit "-".join(str(n) for n in range(100))

56.4 µs ± 4.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [84]:
%timeit "-".join([str(n) for n in range(100)])

52.5 µs ± 2.59 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


### Regular Expressions

In [1]:
import re

In [4]:
patterns = ['term1', 'term2']

In [5]:
text = 'This is a string with term1, but not the other term'

In [6]:
for pattern in patterns:
    print('Searching for "%s" in:\n "%s"\n' %(pattern,text))
    
    #Check for match
    if re.search(pattern,text):
        print('Match was found. \n')
    else:
        print('No Match was found.\n')

Searching for "term1" in:
 "This is a string with term1, but not the other term"

Match was found. 

Searching for "term2" in:
 "This is a string with term1, but not the other term"

No Match was found.



In [7]:
print(re.search('h', 'w'))

None


In [8]:
match = re.search(patterns[0],text)

In [9]:
type(match)

re.Match

In [10]:
match.start()

22

In [11]:
match.end()

27

In [12]:
split_term = '@'

phrase = 'What is your email, is it hello@gmail.com?'

In [13]:
re.split(split_term, phrase)

['What is your email, is it hello', 'gmail.com?']

In [14]:
'hello world'.split()

['hello', 'world']

In [15]:
re.findall('match', 'Here is one match, here is another match')

['match', 'match']

In [16]:
def multi_re_find(patterns,phrase):
    '''
    Takes in a list of regex patterns
    Prints a list of all matches
    '''
    for pattern in patterns:
        print('Searching the phrase using the re check: %r' %(pattern))
        print(re.findall(pattern,phrase))
        print('\n')

In [17]:
test_phrase = 'sdsd..sssddd...sdddsddd...dsds...dsssss...sdddd'

test_patterns = [ 'sd*',     # s followed by zero or more d's
                'sd+',          # s followed by one or more d's
                'sd?',          # s followed by zero or one d's
                'sd{3}',        # s followed by three d's
                'sd{2,3}',      # s followed by two to three d's
                ]

multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: 'sd*'
['sd', 'sd', 's', 's', 'sddd', 'sddd', 'sddd', 'sd', 's', 's', 's', 's', 's', 's', 'sdddd']


Searching the phrase using the re check: 'sd+'
['sd', 'sd', 'sddd', 'sddd', 'sddd', 'sd', 'sdddd']


Searching the phrase using the re check: 'sd?'
['sd', 'sd', 's', 's', 'sd', 'sd', 'sd', 'sd', 's', 's', 's', 's', 's', 's', 'sd']


Searching the phrase using the re check: 'sd{3}'
['sddd', 'sddd', 'sddd', 'sddd']


Searching the phrase using the re check: 'sd{2,3}'
['sddd', 'sddd', 'sddd', 'sddd']




In [18]:
# character sets
test_phrase = 'sdsd..sssddd...sdddsddd...dsds...dsssss...sdddd'

test_patterns = ['[sd]',    # either s or d
                's[sd]+']   # s followed by one or more s or d

multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: '[sd]'
['s', 'd', 's', 'd', 's', 's', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 's', 'd', 'd', 'd', 'd', 's', 'd', 's', 'd', 's', 's', 's', 's', 's', 's', 'd', 'd', 'd', 'd']


Searching the phrase using the re check: 's[sd]+'
['sdsd', 'sssddd', 'sdddsddd', 'sds', 'sssss', 'sdddd']




In [19]:
# exclusion
test_phrase = 'This is a string! But it has punctuation. How can we remove it?'

In [20]:
re.findall('[^!.? ]+',test_phrase)

['This',
 'is',
 'a',
 'string',
 'But',
 'it',
 'has',
 'punctuation',
 'How',
 'can',
 'we',
 'remove',
 'it']

In [21]:
# character ranges
test_phrase = 'This is an example sentence. Lets see if we can find some letters.'

test_patterns=['[a-z]+',      # sequences of lower case letters
               '[A-Z]+',      # sequences of upper case letters
               '[a-zA-Z]+',   # sequences of lower or upper case letters
               '[A-Z][a-z]+'] # one upper case letter followed by lower case letters
                
multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: '[a-z]+'
['his', 'is', 'an', 'example', 'sentence', 'ets', 'see', 'if', 'we', 'can', 'find', 'some', 'letters']


Searching the phrase using the re check: '[A-Z]+'
['T', 'L']


Searching the phrase using the re check: '[a-zA-Z]+'
['This', 'is', 'an', 'example', 'sentence', 'Lets', 'see', 'if', 'we', 'can', 'find', 'some', 'letters']


Searching the phrase using the re check: '[A-Z][a-z]+'
['This', 'Lets']




In [22]:
# escape codes
test_phrase = 'This is a string with some numbers 1233 and a symbol #hashtag'

test_patterns=[ r'\d+', # sequence of digits
                r'\D+', # sequence of non-digits
                r'\s+', # sequence of whitespace
                r'\S+', # sequence of non-whitespace
                r'\w+', # alphanumeric characters
                r'\W+', # non-alphanumeric
                ]

multi_re_find(test_patterns,test_phrase)

Searching the phrase using the re check: '\\d+'
['1233']


Searching the phrase using the re check: '\\D+'
['This is a string with some numbers ', ' and a symbol #hashtag']


Searching the phrase using the re check: '\\s+'
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']


Searching the phrase using the re check: '\\S+'
['This', 'is', 'a', 'string', 'with', 'some', 'numbers', '1233', 'and', 'a', 'symbol', '#hashtag']


Searching the phrase using the re check: '\\w+'
['This', 'is', 'a', 'string', 'with', 'some', 'numbers', '1233', 'and', 'a', 'symbol', 'hashtag']


Searching the phrase using the re check: '\\W+'
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' #']


