# reduce, accumulate

In [5]:
import operator

from functools import reduce
from itertools import accumulate

**reduce**

In [12]:
a = [(i, i*100) for i in range(100)]
a[:10]

[(0, 0),
 (1, 100),
 (2, 200),
 (3, 300),
 (4, 400),
 (5, 500),
 (6, 600),
 (7, 700),
 (8, 800),
 (9, 900)]

In [14]:
reduce(lambda tup_one, tup_two: (max(tup_one[0], tup_two[0]), tup_one[1] + tup_two[1]), a)

(99, 495000)

In [15]:
a = list(range(10))
a

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [16]:
reduce(lambda x, y: x + y, a)

45

**accumulate**

In [22]:
list(accumulate(a[1:], lambda x, y: x * y))

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880]

**Count certain type of events**

In [26]:
event_log = [
    (11214, 'search', 5),
    (11215, 'item_view', 1),
    (11216, 'item_viewphone', 10),
    (11217, 'item_view', 2),
    (11218, 'item_viewphone', 4),
    (11219, 'item_view', 6),
    (11210, 'item_viewphone', 2),
    (11234, 'item_view', 4),
    (11264, 'item_view', 3),
    (11224, 'item_viewphone', 1),
    (11204, 'search', 6),
    (12214, 'search', 34),
    (13214, 'item_view', 3),
    (14214, 'item_view', 1000),
    (15214, 'item_viewphone', 2000),
    (16214, 'item_viewphone', 3444),
    (17214, 'item_view', 0),
    (18214, 'item_viewphone', 12),
    (19214, 'search', 244),
    (29214, 'item_viewphone', 4),
    (30214, 'item_view', 56),
    (48214, 'item_viewphone', 5),
    (67214, 'item_view', 2),
]

1. Посчитать количество аномальных полей. Будем считать поле аномальным, если у него какое-то слишком большое значение для метрики. Например, количество событий > 999

Можно, например, использовать filter() или list-comp

In [32]:
anomaly_events_num = 999

len(list(filter(lambda elem: elem[2] > anomaly_events_num, event_log)))

3

In [33]:
cnt = 0

for elem in filter(lambda elem: elem[2] > anomaly_events_num, event_log):
    cnt += 1

2. Посчитать общее количество событий по неаномальным полям

In [47]:
eventtype = 'item_view'

# code
reduce(
    lambda tup_one, tup_two: (max(tup_one[0], tup_two[0]), eventtype, tup_one[-1] + tup_two[-1]),
    filter(lambda elem: elem[1] == eventtype and elem[-1] <= anomaly_events_num, event_log),
)

(67214, 'item_view', 77)

# Dicts

## Creation

In [48]:
some_empty_dict = dict()
print(some_empty_dict, type(some_empty_dict))

{} <class 'dict'>


### Simple example

In [50]:
student_card = {
    'name': 'Denis',
    'year': 2015,
    'core subjects': ('Statistics', 'Algorimthms'),
    'optional subjects': ['Urban Studies', 'Basic Physics'],
    42: 'always the answer',
    (42,): 'always the answer',
}

student_card

{'name': 'Denis',
 'year': 2015,
 'core subjects': ('Statistics', 'Algorimthms'),
 'optional subjects': ['Urban Studies', 'Basic Physics'],
 42: 'always the answer',
 (42,): 'always the answer'}

In [52]:
student_card['surname'] = 'Belyakov'

In [53]:
student_card

{'name': 'Denis',
 'year': 2015,
 'core subjects': ('Statistics', 'Algorimthms'),
 'optional subjects': ['Urban Studies', 'Basic Physics'],
 42: 'always the answer',
 (42,): 'always the answer',
 'surname': 'Belyakov'}

## Внутри

Ключ может быть только hashable

    An object is hashable if it has a hash value which never changes during its lifetime (it
    needs a __hash__() method), and can be compared to other objects (it needs an
    __eq__() method). Hashable objects which compare equal must have the same hash
    value. […]

Данное требование существует, потому что внутри dict -- это hash таблица!

Операции доставания значения по ключу, добавления и проверки нахождения -- в среднем **O(1)**. 

` Может быть O(n), если у вас отвратительная хеш-функция `

![hashtable](https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/Hash_table_3_1_1_0_1_0_0_SP.svg/1200px-Hash_table_3_1_1_0_1_0_0_SP.svg.png)

## Basic operations

### Get values

In [55]:
student_card.get('name', 'key is not there')

'key is not there'

### Remove values

In [57]:
student_card.pop((42,))

'always the answer'

In [58]:
student_card

{'name': 'Denis',
 'year': 2015,
 'core subjects': ('Statistics', 'Algorimthms'),
 'optional subjects': ['Urban Studies', 'Basic Physics'],
 42: 'always the answer',
 'surname': 'Belyakov'}

### Update with many `key:value` pairs

In [61]:
student_card.update({'name': 1, 'b': 2})

In [62]:
student_card

{'name': 1,
 'year': 2015,
 'core subjects': ('Statistics', 'Algorimthms'),
 'optional subjects': ['Urban Studies', 'Basic Physics'],
 42: 'always the answer',
 'surname': 'Belyakov',
 'a': 1,
 'b': 2}

### Iterate over

In [63]:
for key in student_card:
    print(key)

name
year
core subjects
optional subjects
42
surname
a
b


In [64]:
for key, value in student_card.items():
    print(key, value)

name 1
year 2015
core subjects ('Statistics', 'Algorimthms')
optional subjects ['Urban Studies', 'Basic Physics']
42 always the answer
surname Belyakov
a 1
b 2


In [65]:
student_card.values()

dict_values([1, 2015, ('Statistics', 'Algorimthms'), ['Urban Studies', 'Basic Physics'], 'always the answer', 'Belyakov', 1, 2])

### clear

In [66]:
student_card.clear()

In [67]:
student_card

{}

## Sample task

### Let's count unique symbols in a string!

In [68]:
sample_string = 'Eddie ate dynamite, goodbye Eddie'

In [72]:
symbols = {}

# your code
# {s.lower(): sample_string.lower().count(s) for s in sample_string} -- не очень эффективно из-за count()

In [73]:
for s in sample_string.lower():
    if s not in symbols:
        symbols[s] = 1
    else:
        symbols[s] += 1

In [74]:
symbols

{'e': 7,
 'd': 6,
 'i': 3,
 ' ': 4,
 'a': 2,
 't': 2,
 'y': 2,
 'n': 1,
 'm': 1,
 ',': 1,
 'g': 1,
 'o': 2,
 'b': 1}

In [75]:
from collections import defaultdict

In [80]:
symbols = defaultdict(int)

In [81]:
for s in sample_string.lower():
    symbols[s] += 1

In [82]:
symbols

defaultdict(int,
            {'e': 7,
             'd': 6,
             'i': 3,
             ' ': 4,
             'a': 2,
             't': 2,
             'y': 2,
             'n': 1,
             'm': 1,
             ',': 1,
             'g': 1,
             'o': 2,
             'b': 1})

What can we do to make our solution above better?

Bonus: try to use `defaultdict` *(from collections import defaultdict)*

# Sets

Коллекция уникальных объектов

In [83]:
a = ['one', 'two', 'two', 'one']
set(a)

{'one', 'two'}

set -- mutable, frozenset -- immutable

In [84]:
first_set = {1, 2, 3, 4, 5}
second_set = {4, 5, 6, 7, 8}

In [85]:
first_set & second_set, first_set.intersection(second_set)

({4, 5}, {4, 5})

In [86]:
first_set | second_set, first_set.union(second_set)

({1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8})

In [90]:
second_set - first_set

{6, 7, 8}

Внутри хеш-таблица, что дает нам операцию проверки вхождения за O(1) при хорошей хеш-функции

Пример:

In [91]:
import random

In [92]:
good_values = set(range(100))

filtered_values = []
for i in range(100000):
    val = random.randint(1, 500)
    if val in good_values:
        filtered_values.append(val)

In [93]:
assert len(list(filter(lambda x: x >= 100, filtered_values))) == 0

Задачка. Дедуплицируем список с сохранением порядка

In [119]:
a = list(range(10000)) + list(range(10000)) + list(range(10000))

In [105]:
%%time

seen_values = []
dedup_a = []

for value in a:
    if value not in seen_values:
        dedup_a.append(value)
        seen_values.append(value)
        

CPU times: user 452 ms, sys: 0 ns, total: 452 ms
Wall time: 451 ms


In [106]:
%%time

seen_values = set()
dedup_a = []

for value in a:
    if value not in seen_values:
        dedup_a.append(value)
        seen_values.add(value)

CPU times: user 5.36 ms, sys: 105 µs, total: 5.47 ms
Wall time: 5.16 ms


Другая задачка. 2-sum

In [111]:
a = [-1, -6, -3, 1, 2, 3, 4, 5, 6, 7, 3, 6, 8, 2, 10]
target_sum = 7

set_a = set(a)
ans = set()

for elem in a:
    pair_elem = target_sum - elem
    if pair_elem in a:
        ans.add((min(elem, pair_elem), max(elem, pair_elem)))

ans

{(-3, 10), (-1, 8), (1, 6), (2, 5), (3, 4)}