In [1]:
import course;course.header()

AttributeError: module 'pandas' has no attribute 'set_option'

# The csv module

There are several ways to interact with files that contain data in a "comma separated value" format.

We cover the [basic csv module](https://docs.python.org/3/library/csv.html), as it is sometimes really helpful to retain only a fraction of the information of a csv to avoid memory overflow.

In [2]:
import csv

with open("../data/amino_acid_properties.csv") as aap:
    aap_reader = csv.DictReader(aap, delimiter=",") 
    for line_dict in aap_reader:
        print(line_dict)
        break

FileNotFoundError: [Errno 2] No such file or directory: '../data/amino_acid_properties.csv'

We can also use the csv module to write csvs, or tab separated value files if we change the delimiter to "\t"

In [3]:
with open("../data/test.csv", "w") as output:
    aap_writer = csv.DictWriter(output, fieldnames=["Name", "3-letter code"]) #, extrasaction="ignore")
    aap_writer.writeheader()
    aap_writer.writerow({"Name": "Alanine", "3-letter code": "Ala", "1-letter code": "A"})

FileNotFoundError: [Errno 2] No such file or directory: '../data/test.csv'

In [4]:
!cat ../data/test.csv

Der Befehl "cat" ist entweder falsch geschrieben oder
konnte nicht gefunden werden.


In [5]:
# fix it
with open("c", "w") as output:
    aap_writer = csv.DictWriter(output, fieldnames=["Name", "3-letter code"], extrasaction='ignore')
    aap_writer.writeheader()
    aap_writer.writerow({"Name": "Alanine", "3-letter code": "Ala", "1-letter code": "A"})

# Collections - high performance containers ... sorta

## [collections.Counter](https://docs.python.org/3.7/library/collections.html#counter-objects)
A counter tool is provided to support convenient and rapid tallies. For example

In [6]:
from collections import Counter
s = """
MQRLMMLLATSGACLGLLAVAAVAAAGANPAQRDTHSLLPTHRRQKRDWIWNQMHIDEEK
NTSLPHHVGKIKSSVSRKNAKYLLKGEYVGKVFRVDAETGDVFAIERLDRENISEYHLTA
VIVDKDTGENLETPSSFTIKVHDVNDNWPVFTHRLFNASVPESSAVGTSVISVTAVDADD
PTVGDHASVMYQILKGKEYFAIDNSGRIITITKSLDREKQARYEIVVEARDAQGLRGDSG
TATVLVTLQDINDNFPFFTQTKYTFVVPEDTRVGTSVGSLFVEDPDEPQNRMTKYSILRG
DYQDAFTIETNPAHNEGIIKPMKPLDYEYIQQYSFIVEATDPTIDL RYMSPPAGNRAQVI
"""
Counter(s)

Counter({'\n': 7,
         'M': 8,
         'Q': 14,
         'R': 20,
         'L': 24,
         'A': 29,
         'T': 28,
         'S': 23,
         'G': 20,
         'C': 1,
         'V': 31,
         'N': 16,
         'P': 17,
         'D': 28,
         'H': 10,
         'K': 18,
         'W': 3,
         'I': 23,
         'E': 21,
         'Y': 13,
         'F': 13,
         ' ': 1})

In [None]:
# Counter objects can be added together
Counter("AABB") + Counter("BBCC")

In [7]:
# Works with any type of object that are comparable
Counter([(1, 1), (1, 2), (2, 1), (1, 1)])

Counter({(1, 1): 2, (1, 2): 1, (2, 1): 1})

## [collections.deque](https://docs.python.org/3.7/library/collections.html#deque-objects)
Deque \[deck\] or double-ended queue can be used for many tasks, e.g. building a sliding window

In [9]:
from collections import deque
s = """MQRLMMLLATSGACLGLLAVAAVAAAGANPAQRDTHSLLPTHRRQKRDWIWNQMHIDEEKNTSLPHHVGKIKSSVSRKNAKYLLKGEYVGKVFRVDAETGDVFAIERLDRENISEYHLTA"""
window = deque([], maxlen=5)

In [None]:
for pos, aa in enumerate(s):
    window.append(aa)
    print(window)
    if pos > 7:
        break

In [None]:
Counter(window)

## [collections.defaultdicts](https://docs.python.org/3.7/library/collections.html#defaultdict-objects)
Defaultdicts are like dicts yet they treat missing values not with an error, thus testing if key exists is not neccessary and makes life easier :) Ofcourse, one needs to define the default value that is taken if a key is not existent. 

I use it a lot for counting 
```python
counter["error"] += 1
```
or collecting elements in lists
```python
sorter["typeA"].append({"name": "John"})
```

In [None]:
from collections import defaultdict

ddict_int = defaultdict(int)
#                        ^---- default factory
ddict_list = defaultdict(list)

In [None]:
ddict_int[10] += 10
ddict_int

In [None]:
ddict_int[0]

In [None]:
def default_factory_with_prefilled_dictionary():
    return {"__name": "our custom dict", "errors": 0}
ddict_custom = defaultdict(default_factory_with_prefilled_dictionary)


In [None]:
ddict_custom[10] += 10

In [None]:
ddict_custom["what_ever_key"]

In [None]:
ddict_custom[10]['errors'] += 10

In [None]:
ddict_custom