# Problems

### Comparing lists
Write a function, returning True (False) if two lists do (do not) contain the same elements.

- First imagine that elements in the list do not repeat,
```python
# these lists are now considered the same
a = [1, 2, 3, 1]
b = [3, 2, 1]
```
- then if some elements can repeat.
```python
# now these lists are are not considered the same
a = [1, 2, 3, 1]
b = [3, 2, 1]
```



In [6]:
def compare(x: list, y: list) -> bool:
    return set(x) == set(y)

compare([1, 2, 3], [3, 2,2, 1])

True

In [9]:
def compareD(x: list, y: list) -> bool:
    """Compare two lists.
     The algorithm is O(n*log(n)) due to sorting algorithm, comparison is only O(n)."""
    return sorted(x) == sorted(y)

compareD([1, 2, 3], [3, 2, 2, 1])

False

In [7]:
from collections import defaultdict

def count(x: list) -> dict:
    """Count the number of occurrences of each element in the list.
    The algorithm is O(n)."""
    cetnosti = defaultdict(int)
    for a in x:
        cetnosti[a] += 1
    return cetnosti

def compareEffective(x: list, y: list) -> bool:
    """Compare two lists.
    The algorithm is O(n)."""
    return count(x) == count(y)

compareEffective([1, 2, 3], [3, 2, 2, 1])

False

In [12]:
from random import randint
import timeit

l1 = [randint(1, 100) for _ in range(int(1e5))]
l2 = [randint(1, 100) for _ in range(int(1e5))]

# do 100 iterations for each function and compare the time
%timeit -n 100 compareD(l1, l2)
%timeit -n 100 compareEffective(l1, l2)

13.9 ms ± 91.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.13 ms ± 99.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### n-grams
Count the number of [n-grams](https://en.wikipedia.org/wiki/N-gram) in a text. Sort them by frequency.
- for each n-gram find which letters follow it.

You can use for example the text [here](https://ipnp.cz/strelecek/supplementary/23ZS/poe.txt) and read it by lines:

```python
# create empty dictionary using defaultdict

for row in open('poe.txt'):
    # save n-grams from line into dictionary 
```

In [None]:
from collections import defaultdict

n = 3
kgrams = defaultdict(int)

for row in open('poe.txt'):
    # separate the line to n-grams
    for i in range(len(row)-n):
        kgrams[row[i:i+n]] += 1

# create pairs (frequency, n-gram)
pairs = [ (frequency, gram) for gram, frequency in kgrams.items() ]

# sort the pairs and print them
for frequency, gram in reversed(sorted(pairs)):
    print(frequency, gram)

### Parsing the input
Parse the following input
```
first line: 3 blue, 4 red; 1 red, 2 green, 6 blue; 2 green
second line: 1 blue, 2 green; 3 green, 4 blue, 1 red; 1 green, 1 blue
third line: 8 green, 6 blue, 20 red; 5 blue, 4 red, 13 green; 5 green, 1 red
```
as a list of lists of dictionary. Elements of the outer list are the rounds, the inner list elements are separated by semicolon, and the dictionary consists of colors.
The resulting object should look like
```python
[
    [{'blue': 3, 'red': 4}, 
     {'blue': 6, 'red': 1, 'green': 2}, 
     {'green': 2}],
    ...
 ]
```

In [None]:
from collections import defaultdict
colors=[]
for l in open("colors.txt"):
    line = l.split(":")[1].rstrip("\n").split(";")
    linelist = []
    for i in line:
        d = i.split(",")
        
        linedict = defaultdict(int)
        for elem in d:
            num,col = elem.split()
            linedict[col]=num
        linelist.append(linedict)
    colors.append(linelist)

print(colors[0])

[defaultdict(<class 'int'>, {'blue': '3', 'red': '4'}), defaultdict(<class 'int'>, {'red': '1', 'green': '2', 'blue': '6'}), defaultdict(<class 'int'>, {'green': '2'})]


In [None]:
def reverse_lines(filename: str, outputname: str)->None:
    """Prints the lines of a file in reverse order into outputname."""
    with open(outputname, 'w') as outfile:
        f = list(open(filename))
        # the last line doesn't have a newline character
        outfile.write(f[-1]+'\n') 
        for line in reversed(f[1::-2]):
            outfile.write(line) 
        outfile.write(f[0].rstrip())

def reverse_lines_and_words(filename: str, outputname: str)->None:
    """Prints the lines of a file in reverse order into outputname. 
    Now reverse even the words in lines."""
    def reverse_line(line: str)->str:
        return ' '.join(reversed(line.split()))

    with open(outputname, 'w') as outfile:
        f = list(open(filename))
        # the last line doesn't have a newline character
        outfile.write(reverse_line(f[-1])+'\n')
        for line in reversed(f[1::-2]):
            outfile.write(reverse_line(line)+'\n') 
        outfile.write(reverse_line(f[0].rstrip()))

reverse_lines('input.txt', 'output.txt')
reverse_lines_and_words('input.txt', 'output2.txt')

# Problematic problems

### Generating some nonsense
Use the previous code for generating text. *Choose the first n-gram randomly, then randomly choose the next letter from the list of letters that follow the n-gram. For random selection, you can use*
```python
import random
random.choice(your_list_of_letters)
```

In [None]:
from collections import defaultdict
import random

n = 3

text = ""
for row in open('poe.txt'):
    text += row[:-1] + ' '

# create a dictionary of continuations
continuation = defaultdict(list)
for i in range(len(text)-n-1):
    continuation[text[i:i+n]] += text[i+n]

# start from random n-gram
gen = random.choice(list(continuation))

# append random continuations, from the most probable ones
while len(gen) < 300:
    p = continuation[gen[-n:]]
    if p:
        gen += random.choice(p)
    # if the n-gram is not in the dictionary, choose another random one
    else:
        gen += '/' + random.choice(list(continuation))

print(gen)
