# Школа алготрейдеров. Блок торгового ПО и программирования
## Занятие 2. Стандартная библиотека языка Python. Обработка табличных данных с помощью библиотеки pandas

### [Тип `set`](https://docs.python.org/3/library/stdtypes.html#set) (неупорядоченные множества)

In [None]:
s = {1, 7, 'abc', 3.2}
s

In [None]:
len(s), list(s)

In [None]:
for x in s:
    print(x * 2)

In [None]:
[x * 2 for x in s]

#### Получить все уникальные элементы списка

In [None]:
a = [x % 7 for x in range(10) if x % 3 in (0, 2)]
a

In [None]:
set(a)

In [None]:
list(set(a))

In [None]:
{x % 7 for x in range(10) if x % 3 in (0, 2)}

#### Операции с множествами

In [None]:
a = set()  # пустое множество
b = {2, 3, 4}
a.add(1)
a.add(2)
b.remove(4)
a, b

In [None]:
a | b

In [None]:
a & b

In [None]:
a - b

In [None]:
a ^ b

In [None]:
a ^= b
a

In [None]:
1 in a, 2 in a

In [None]:
{3, 1, 2} > {3, 2}

In [None]:
{3, 1, 2} > {2, 1, 3}

#### Ограничения на элементы

In [None]:
s = set()
s.add(8)
s.add('text')
s.add(True)
s

In [None]:
s.add([5, 6, 7])  # элементы изменяемых типов нельзя класть в set

In [None]:
s.add((5, 6, 7))
s

#### Неизменяемый `set` — `frozenset`

In [None]:
s.add({1})

In [None]:
s.update((frozenset({2, 3}), frozenset({3, 2})))
s

#### Особенности

In [None]:
{False, True, 0, 1, 0.0, 1.0}

In [None]:
0 == False == 0.0 != 1 == True == 1.0

### [Тип `dict`](https://docs.python.org/3/library/stdtypes.html#mapping-types-dict) (словари)

In [None]:
d = {5: 'five', '7': 'seven', 3.14: '≈ pi', (9, 0): []}
d

In [None]:
type({})  # пустой словарь

In [None]:
d[9, 0]

In [None]:
d[9, 0] = 49
d

In [None]:
d[frozenset({-3, 'ab'})] = {89}
d

In [None]:
{x: x ** 2 for x in range(10)}

In [None]:
a = {1: 2}
b = a
b[1] = 89
a, b

In [None]:
a = {1: 2}
b = deepcopy(a)
b[1] = 89
a, b

In [None]:
d = {'One': 1, 'Two': 2, 'Three': 3}
for x in d:
    print(x)

In [None]:
for key in d.keys():
    print(key)

In [None]:
type(d.keys())

In [None]:
for value in d.values():
    print(value)

In [None]:
for item in d.items():
    print(item)

In [None]:
for key, value in d.items():
    print('d[{}] = {}'.format(key, value))

In [32]:
list(d.items())

NameError: name 'd' is not defined

#### Распаковка словарей

In [43]:
a = {'one': 1, 'two': 2}
{'three': 3, **a, 'four': 4}

{'four': 4, 'one': 1, 'three': 3, 'two': 2}

In [44]:
'one = {one}, two = {two}'.format(**a)

'one = 1, two = 2'

### Функции

In [2]:
def find_all_squares_in_range(max_value):
    squares = []
    square_base = 0
    while square_base ** 2 <= max_value:
        squares.append(square_base ** 2)
        square_base += 1
    return squares

In [3]:
find_all_squares_in_range(0)

[0]

In [4]:
find_all_squares_in_range(111)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [5]:
sum(find_all_squares_in_range(10))

14

In [6]:
def find_all_squares_in_range(max_value, skip_even=False):
    squares = []
    square_base = 0
    while square_base ** 2 <= max_value:
        if not (skip_even and square_base % 2 == 0):
            squares.append(square_base ** 2)
        square_base += 1
    return squares

In [7]:
find_all_squares_in_range(111, True)

[1, 9, 25, 49, 81]

In [8]:
find_all_squares_in_range(111, skip_even=True)

[1, 9, 25, 49, 81]

In [9]:
def strange_abs(x):
    if x < 0:
        return -x

In [12]:
print(strange_abs(-9))

9


In [81]:
print(strange_abs(9))

None


#### [Лямбда-функции](https://docs.python.org/3.5/tutorial/controlflow.html#lambda-expressions)

In [82]:
average = lambda x, y: (x + y) / 2
average

<function __main__.<lambda>>

In [84]:
average(6, 9)

7.5

#### Сортировка по параметру

In [85]:
a = [(1, 'one'), (4, 'four'), (2, 'two'), (3, 'three')]
a.sort()
a

[(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')]

In [88]:
a.sort(key=lambda item: item[1])  # «ключ» для сортировки — 2-й элемент каждого кортежа
a

[(4, 'four'), (1, 'one'), (3, 'three'), (2, 'two')]

In [90]:
companies = ['Finam', 'MOEX', 'Yandex', 'Google']
sorted(companies)

['Finam', 'Google', 'MOEX', 'Yandex']

In [92]:
# сравниваем сначала по длине, затем по названию; обратный порядок
sorted(companies, key=lambda name: (len(name), name), reverse=True)

['Yandex', 'Google', 'Finam', 'MOEX']

#### Обработка произвольного числа аргументов

In [25]:
def print_all_args(*args, **kwargs):
    print('Positional arguments: {},\nkeyword arguments: {}'.format(args, kwargs))
    
print_all_args(2, 'text', None, first=1, second=2, third=3)

Positional arguments: (2, 'text', None),
keyword arguments: {'first': 1, 'third': 3, 'second': 2}


#### Генераторы

In [15]:
def find_all_squares_in_range_better(max_value):
    square_base = 0
    while square_base ** 2 <= max_value:
        yield square_base ** 2
        square_base += 1

In [16]:
type(find_all_squares_in_range_better(10))

generator

In [17]:
sum(find_all_squares_in_range_better(10))

14

### [PEP8](https://www.python.org/dev/peps/pep-0008/): рекомендации по оформлению кода

### [`Fraction`](https://docs.python.org/3/library/fractions.html#module-fractions) — рациональные дроби

### [`Decimal`](https://docs.python.org/3/library/decimal.html#module-decimal) — вещественные числа произвольной точности

### [Модуль `datetime`](https://docs.python.org/3/library/datetime.html#module-datetime): работа с датой и временем

In [15]:
import datetime as dt
dt.time(21, 4, 39)

datetime.time(21, 4, 39)

In [16]:
from datetime import time
time(21, 4, 39)

datetime.time(21, 4, 39)

In [17]:
ts = dt.datetime(2016, 6, 3, 17, 24, 4)
ts

datetime.datetime(2016, 6, 3, 17, 24, 4)

In [18]:
ts.day

3

In [19]:
ts.minute

24

In [20]:
ts.strftime('%d.%m.%Y %H:%M:%S')

'03.06.2016 17:24:04'

In [21]:
dt.datetime.strptime('03.06.2016 17:24:04', '%d.%m.%Y %H:%M:%S')

datetime.datetime(2016, 6, 3, 17, 24, 4)

In [24]:
lecture_start = dt.datetime(2016, 9, 8, 20, 45)
lecture_end = dt.datetime(2016, 9, 8, 22, 15)
td = lecture_end - lecture_start
td

datetime.timedelta(0, 5400)

In [27]:
td.total_seconds()

5400.0

In [45]:
lecture_start + td

datetime.datetime(2016, 9, 8, 22, 15)

### Загрузка данных из файлов

#### [Загружаем котировки](http://www.finam.ru/profile/moex-akcii/pllc-yandex-n-v/export/?market=1&em=388383&code=YNDX&apply=0&df=7&mf=8&yf=2016&from=07.09.2016&dt=7&mt=8&yt=2016&to=07.09.2016&p=7&f=YNDX_160907_160907&e=.txt&cn=YNDX&dtf=1&tmf=1&MSOR=1&mstime=on&mstimever=1&sep=1&sep2=1&datf=1&at=1)

![](finam-2-yndx-params.png)

In [80]:
%ls

 ’®¬ ў гбва®©бвўҐ C ­Ґ Ё¬ҐҐв ¬ҐвЄЁ.
 ‘ҐаЁ©­л© ­®¬Ґа в®¬ : B019-4922

 ‘®¤Ґа¦Ё¬®Ґ Ї ЇЄЁ C:\Users\User\Documents\IPython Notebooks\”Ё­ ¬\jupyter-notebooks

07.09.2016  22:21    <DIR>          .
07.09.2016  22:21    <DIR>          ..
07.09.2016  21:41    <DIR>          .ipynb_checkpoints
06.09.2016  22:14            68я675 finam-1.ipynb
07.09.2016  22:21           250я757 finam-2.ipynb
07.09.2016  22:09            33я693 finam-2-yndx-params.png
07.09.2016  22:09           162я023 YNDX_160901_160907.csv
               4 д ©«®ў        515я148 Ў ©в
               3 Ї Ї®Є   2я294я702я080 Ў ©в бў®Ў®¤­®


In [67]:
filename = 'YNDX_160901_160907.csv'
file = open(filename)

In [100]:
file.read()[:100]

'YNDX,0,20160901,100010,1431.000000000,9\nYNDX,0,20160901,100010,1430.500000000,6\nYNDX,0,20160901,1000'

In [None]:
file = open(filename)

In [73]:
file.readline()

'YNDX,0,20160901,100010,1431.000000000,23\n'

In [101]:
lines = open(filename).readlines()
lines[:10]

['<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>\n',
 'YNDX,0,20160901,100003,1430.500000000,7\n',
 'YNDX,0,20160901,100010,1431.000000000,1\n',
 'YNDX,0,20160901,100010,1431.000000000,23\n',
 'YNDX,0,20160901,100010,1431.000000000,9\n',
 'YNDX,0,20160901,100010,1430.500000000,6\n',
 'YNDX,0,20160901,100014,1438.000000000,1\n',
 'YNDX,0,20160901,100018,1431.500000000,16\n',
 'YNDX,0,20160901,100018,1431.500000000,5\n',
 'YNDX,0,20160901,100018,1431.500000000,5\n']

In [115]:
header, *lines = open(filename)
header, lines[:10]

('<TICKER>,<PER>,<DATE>,<TIME>,<LAST>,<VOL>\n',
 ['YNDX,0,20160901,100003,1430.500000000,7\n',
  'YNDX,0,20160901,100010,1431.000000000,1\n',
  'YNDX,0,20160901,100010,1431.000000000,23\n',
  'YNDX,0,20160901,100010,1431.000000000,9\n',
  'YNDX,0,20160901,100010,1430.500000000,6\n',
  'YNDX,0,20160901,100014,1438.000000000,1\n',
  'YNDX,0,20160901,100018,1431.500000000,16\n',
  'YNDX,0,20160901,100018,1431.500000000,5\n',
  'YNDX,0,20160901,100018,1431.500000000,5\n',
  'YNDX,0,20160901,100018,1430.500000000,21\n'])

In [116]:
header = [name[1:-1].capitalize() for name in header.strip().split(',')]
header

['Ticker', 'Per', 'Date', 'Time', 'Last', 'Vol']

In [119]:
[
    line.strip().split(',')
    for line in lines
][:10]

[['YNDX', '0', '20160901', '100003', '1430.500000000', '7'],
 ['YNDX', '0', '20160901', '100010', '1431.000000000', '1'],
 ['YNDX', '0', '20160901', '100010', '1431.000000000', '23'],
 ['YNDX', '0', '20160901', '100010', '1431.000000000', '9'],
 ['YNDX', '0', '20160901', '100010', '1430.500000000', '6'],
 ['YNDX', '0', '20160901', '100014', '1438.000000000', '1'],
 ['YNDX', '0', '20160901', '100018', '1431.500000000', '16'],
 ['YNDX', '0', '20160901', '100018', '1431.500000000', '5'],
 ['YNDX', '0', '20160901', '100018', '1431.500000000', '5'],
 ['YNDX', '0', '20160901', '100018', '1430.500000000', '21']]

In [124]:
# zip позволяет одновременно пройтись по двум последовательностям одинаковой длины
[
    {
        name: value
        for name, value in zip(header, line.strip().split(','))
    }
    for line in lines
][:5]

[{'Date': '20160901',
  'Last': '1430.500000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100003',
  'Vol': '7'},
 {'Date': '20160901',
  'Last': '1431.000000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '1'},
 {'Date': '20160901',
  'Last': '1431.000000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '23'},
 {'Date': '20160901',
  'Last': '1431.000000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '9'},
 {'Date': '20160901',
  'Last': '1430.500000000',
  'Per': '0',
  'Ticker': 'YNDX',
  'Time': '100010',
  'Vol': '6'}]