# LAB3 CDI-FIB

In [1]:
import math
from math import floor

## Entropy

In [2]:
def source_fromtext(txt, n=1):
    freq_packs = {}
    for i in range(len(txt) - n + 1):
        packs = txt[i:i+n]
        if packs in freq_packs:
            freq_packs[packs] += 1
        else:
            freq_packs[packs] = 1
            
    freq_list = [(k, v) for k, v in freq_packs.items()]
    return sorted(freq_list, key=lambda x: x[0]) 

In [3]:
def entropy(txt, k=1, pre=""):
    freq = {}
    n = len(txt) - k + 1
    l = 0
    for i in range(n):
        if txt[i-len(pre):i] == pre:
            if txt[i:i+k] not in freq:
                freq[txt[i:i+k]] = 0
            freq[txt[i:i+k]] += 1
            l += 1

    probs = {}
    for block, count in freq.items():
        probs[block] = count / l

    entropy = 0
    for p in probs.values():
        entropy -= p * math.log2(p)

    return entropy

def entropy_src(src):
    src = dict(src)
    total = sum(src.values())
    prob = {k:v/total for k,v in src.items()}
    
    entropy = sum([-v*math.log2(v) for _,v in prob.items()])
    return entropy

In [4]:
def mean_length(src,code):
    freq = []
    length_code = 0
    for i in range(0, len(src)):
        freq.append(src[i][1] * len(code[i]))
        length_code += src[i][1]
    
    return sum(freq) / length_code

def product_prob(src):
    dict2 = {}
    src2 = dict(src)
    for k, v in src2.items():
        for l, m in src2.items():
            dict2[k+l] = v*m
    my_list = [(k, v) for k, v in dict2.items()]
    return my_list

In [5]:
def kraft_inequality(lengths, q):
    s = 0
    for l in lengths:
        s += q**-l

    return s <= 1

def format_to_alf(number, base, length, alf):
    if number == 0:
        res = alf[0]
        if length == 1:
            return res
        else:
            count = 1
            digits = []
            digits.append(res)
            while (count < length):
                digits.insert(0, alf[0])
                count +=1
            res = ''.join(str(e) for e in digits[::-1])
            return res

    digits = []
    while number > 0:
        digits.append(alf[int(number % base)])
        number //= base
    count = len(digits)
    while (count < length):
                digits.append(alf[0])
                count +=1        
    res = ''.join(str(e) for e in digits[::-1])
    return res

In [6]:
from collections import Counter
def canonical_code(L,q=2, alf = [0,1]):
    if not kraft_inequality(L, q):
        return 'The entry does not satisfy Kraft-McMillan inequality.'
    
    bl_count = Counter(L)
    code = 0
    bl_count[0] = 0
    next_code = {}
    maximum = max(L) + 1       
    for l in range (1, maximum):
        code = (code + bl_count[l-1])*q
        next_code[l] = code 
    def_code = []
    lengths = {}
    for l in L:
        length = l
        def_code.append(next_code[length])
        lengths[next_code[length]] = length
        next_code[length] += 1
    def_code = list(map(lambda x: format_to_alf(x,q,lengths[x], alf),def_code))
    return def_code

In [7]:
def huffman_code(txt, src, package_size):
    #src = source_fromtext(txt,package_size)
    
    d_nodes = {}
    for c in src:
        d_nodes[c[0]] = 0
    
    sorted_d = sorted(src, key=lambda x: x[1]) 

    while len(sorted_d) > 1:
        new_c = sorted_d[0][0] + sorted_d[1][0]
        new_f = sorted_d[0][1] + sorted_d[1][1]
        
        for i in range(0, len(sorted_d[0][0]), package_size):
            package = sorted_d[0][0][i:i+package_size]
            d_nodes[package] += 1
            
        for i in range(0, len(sorted_d[1][0]), package_size):
            package = sorted_d[1][0][i:i+package_size]
            d_nodes[package] += 1
        
        sorted_d[1] = (new_c,new_f)
        sorted_d.pop(0);
        sorted_d = sorted(sorted_d, key=lambda x: x[1])
    
    result = [(key,value) for key, value in zip(d_nodes.keys(), canonical_code(d_nodes.values(), 2, ['0','1']))]
    return result

In [8]:
def arithmetic_encode_bin(txt,src,k):
    suma = sum([x[1] for x in src])
    init_probs = [(x[0],x[1]/suma) for x in src]
    cumulative_probs = [0] + [sum(p[1] for p in init_probs[:i+1]) for i in range(len(init_probs))]
    
    alpha = '0' * k
    beta = '1' * k
    c = ""
    u = 0
    s = ""

    for i in range(0, len(txt)):
        s += txt[i]
        if not any(s in tupla for tupla in init_probs):
            continue

        delta = int(beta,2) - int(alpha,2) + 1
        current_intervals = [(int(alpha,2) + int(floor(delta * cumulative_probs[j-1])),
                              int(alpha,2) + int(floor(delta * cumulative_probs[j]) - 1)) 
                             for j in range(1, len(cumulative_probs))]
        
        pos = [x[0] for x in init_probs].index(s)
        alpha = bin(current_intervals[pos][0])[2:].zfill(k)
        beta = bin(current_intervals[pos][1])[2:].zfill(k)
        
        while alpha[0] == beta[0]:
            c += alpha[0]
            if alpha[0] == '0':
                    c += '1' * u
            else:
                    c += '0' * u
            u = 0
            alpha = alpha[1:] + '0'
            beta = beta[1:] + '1'
            
        while alpha[:2] == '01' and beta[:2] == '10':
            alpha = alpha[0] + alpha[2:] + '0'
            beta = beta[0] + beta[2:] + '1'
            u += 1
        
        s = ""
    return c + '1'

In [9]:
def arithmetic_decode_bin(code, k, src, l):
    suma = sum([x[1] for x in src])
    init_probs = [(x[0],x[1]/suma) for x in src]
    cumulative_probs = [0] + [sum(p[1] for p in init_probs[:i+1]) for i in range(len(init_probs))]
    
    alpha = '0' * k
    beta = '1' * k
    gamma = code[:k]
    used = k
    x = ''
    
    while len(x) != l:
        delta = int(beta,2) - int(alpha,2) + 1
        current_intervals = [(int(alpha,2) + int(floor(delta * cumulative_probs[j-1])),
                              int(alpha,2) + int(floor(delta * cumulative_probs[j]) - 1)) 
                             for j in range(1, len(cumulative_probs))]
        
        for pos, subinterval in enumerate(current_intervals):
            if subinterval[0] <= int(gamma,2) <= subinterval[1]:
                x += init_probs[pos][0]
                alpha = bin(subinterval[0])[2:].zfill(k)
                beta = bin(subinterval[1])[2:].zfill(k)
               
        if len(x) >= l:
            break
            
        while alpha[0] == beta[0]:
            alpha = alpha[1:] + '0'
            beta = beta[1:] + '1'
            if used == len(code):
                gamma = gamma[1:] + '0'
            else:
                gamma = gamma[1:] + code[used]
                used += 1
                
        while alpha[:2] == '01' and beta[:2] == '10':
            alpha = alpha[0] + alpha[2:] + '0'
            beta = beta[0] + beta[2:] + '1'
            if used == len(code):
                gamma = gamma[0] + gamma[2:] + '0'
            else:
                gamma = gamma[0] + gamma[2:] + code[used]
                used += 1
    return x

In [10]:
def encode(txta,corr):
    corr = dict(corr)
    txt_encoded = ''
    i, j = 0, 0
    while j<=len(txta):
        substring = txta[i:j]
        if substring in corr:
            txt_encoded += corr[substring]
            i = j
        j += 1
        
    if i != len(txta): # all the text could not be processed
        return 'Message could not be encoded'
    return txt_encoded

In [11]:
def decode(txtb,corr):
    corr = dict(corr)
    corr_keys = list(corr.keys())
    corr_values = list(corr.values())
    txt_decoded = ''
    i, j = 0, 0
    while j<=len(txtb):
        substring = txtb[i:j]
        if substring in corr_values:
            pos = corr_values.index(substring)
            txt_decoded += corr_keys[pos]
            i = j
        j += 1
        
    if i != len(txtb): # all the text could not be processed
        return 'Message could not be decoded'
    return txt_decoded

## Testing

In [None]:
#quijote = open("quijote_clean.txt","r",encoding="utf-8").read(); quijote[:1000]

In [None]:
#[entropy(quijote,k) for k in range(1,5)]

In [None]:
#[entropy(quijote,k)/k for k in range(1,5)]

In [None]:
#entropy(quijote,1," ")

In [None]:
#[entropy(quijote,1,"q"), entropy(quijote,1,"a"), entropy(quijote,1,"j"), entropy(quijote,1,"m")]

In [None]:
#[entropy(quijote,2,pre="res"), entropy(quijote,1,pre="quij"), entropy(quijote,3,pre="al"), entropy(quijote,1,pre="espadas")]

In [None]:
#[entropy(quijote,k+1) - entropy(quijote,k) for k in range(1,5)]

In [None]:
#src = source_fromtext(quijote); print(src)

In [None]:
#huf = huffman_code(quijote,1)
#print(huf)

In [None]:
#cod = [y for x,y in huf];
#entropy(quijote,1), mean_length(src,cod)

In [None]:
#src2 = source_fromtext(quijote,2);
#print(src2)

In [None]:
#huf = huffman_code(quijote,2)
#print(huf)

In [None]:
#cod2 = [y for x,y in huf2];
#entropy(quijote,2), mean_length(src2,cod2)

In [None]:
#txt="001110001000000000001000"; src = source_fromtext(txt); print(src)

In [None]:
#cod = arithmetic_encode_bin(txt,src,8);
#print(cod)

In [None]:
#txt_decoded = arithmetic_decode_bin(cod,8,src,len(txt));
#txt_decoded == txt

In [None]:
#txt="Setze jutges d'un jutjat mengen fetge d'un penjat.";
#src = source_fromtext(txt);
#print(src);

In [None]:
#cod = arithmetic_encode_bin(txt,src,8);
#cod

In [None]:
#txt_decoded = arithmetic_decode_bin(cod,8,src,len(txt));
#txt_decoded == txt

In [None]:
#len(txt)*entropy(txt,1),len(cod)

## Atenea (intento 1)

### Ejercicio 1

In [None]:
txt = "dont kill her bosh you go for her looks you slit her nostrils you notch her ears like a sow by god thats keep your opinion to yourself it will be safest for you ill tie her to the bed if she bleeds to death is that my fault ill not cry if she does my friend youll help me in this thing for my sake thats why youre here i mightnt be able alone if you flinch ill kill you do you understand that and if i have to kill you ill kill her and then i reckon nobodyll ever know"
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]

In [None]:
answer = arithmetic_encode_bin(txt,src,20);
print(answer)

### Ejercicio 2

In [None]:
txt = "11011101011110011110111110100111011110011111111001111111111111111101100101110111011001111111101111111111000111111111010110011101011111011110101011110111011011011001111101111010111101111111011011111111010111111111011111011111111111111101111111111011110111111111101101111011110111110111111111011110111101111011110111011111110110110111111001011111111111011111111101100111111011011111110111011100111111110111101011110111111111111111111011111111110101111110111111111110111101101111011101111111011111111011100111111111111001010111011111111101111011111001011101111101011111101111111011110111011101111111011111110111011101110110011111010101110110111111110111111001111111111101111101110111111011111111111001111011011111100101110110111111110110111011100111111110101011011111111111110110110111111111111111111101110101010110111010110111111011010111111111111111111101110101011111111111111010101110110111101111011111111111011111110111111110101010110101101111111000100101101111110101111011010110111110101011011110110111010111101101111101100111111101111110111111111111110110111111111111111100111001101101111111101011110101111111111111111111111111011111010101111110111111111111111111111011111111011011111111111111111110110111010111111111110011110111100111101101010111011101111101011111111111110111111111111111111101111011110111110111011110111011110110111111011111111111111111111111011001011111111100111101010111111110101011111111111101101011111011011110110111111011011110110111101111101011110111111101111011110011111111101111011101110110110110111101111111011111111111111110011111111100110110101111110111110110111111111111111111111111111111111110111111111111101010111101111111111111111111111011110111101111011101011101111101011010111111011111111111111111110111011111111110110111101101101111111111111111011011110111110111110111101111110111010111111101101011111010111111100111110111011011011111110001111101011011011111101110011110111010111101111111101111111111011111111111100011111111101111010110111011111111111111110111110010111011111011111111111111101011111110110111111111100101111111110111111011111101101101101010101111111110001011110110011011111110010111100111011111011001110111101111011110101111111111111110011111100101011110110011111111111011111110100111010111011110111110111111101110101101010111010110110111101111111110101111111011101111111111111011110110111111010110111111110111111110101001010111010111111011111111100111111101111101010111111011010101011111100101011111111111011111111111111111111011110111111110010110101111111101111011111101011111110101110111111111010111011111011111111111111111111101111111101111111111111111101101011101111101111111111101101111110110111111111111101111111111001011011111111101101111111111011110011111111110111001111111110101110111110111010111100111101111110111111111111111111110101110111111011111111111111110011111110011111101111111110011111111110111011110111110111101111110011101110111011111011011101111111111101110111111011111111111011110011111111111111110111111110110111111110111101111011110101111101011111101100111110111111100111101111001111111011001111110111011110111110111111111101111011111011101110110111101011110111111011101011011111011110100011011111111010110111111110110111111111111100111101110011111111101111111111010101010111111111110011111111111101101111110101111101101101111111111110111011111011001111001100111101011111101110111111111101011111111101110111111111011011111101101111110111110111001011111110111011111111111111111111111111111111111011010101111011101100111111111111110111101111011111111011111110111101111111111110101101110100110111111111111010001111111111101111111111101100111111110111111110111111110111110111111101111101101111110101111100101111101110111111111111010010100101111011111110110110111111111111111011110011011111110101111011111111101111011111010111011010111101111100111111011110111111111010101111101111111111111111111101111111011110110110111110111101111111111101111110111011110111111111111111101110111011101101110111110011011011111011101011110111111101001011001011011111111110111111111111010101011111111011111111011011111101111111101011011011111111011101101101111111111011111111111110111101111011111111111101011111011011111111011110101111110101110111111001011011001011111011111111111111111111111101011111111101111110110111111111111110111111011011111111101110101110111110101111111111101101101111011111110011110011110111111111101110111111111101110111111101110111111111011111010111101111110111101111111111111111111100111101111101111111111011111111101111111111111111110111101111111111001111010111111111110010110101010111111111110111110110111111110101101111010011111111101111111011001011101011011110111101010111111011111110111101111110110110101011110101010111110101111111101101110111110100110111111101101011110111001111111111110010101111111111010110111111011011101110111111111111010101111111111111010110011101111111011011111011111111111101101010111111110101110111011110011101101111110111110111101111011111101111011011111111011111001111110101111010101111101111110100111111101101001111111100111101111111111111100011011111110011111111111111111111111111111110110001111111010111111100101011111111110111101111110110111111011111111111111011011111111111111111101111111101110111111111111001011011111011111111111111101011110111111100111110111111111111111111111110111111001111010010111111111110111111101101110110111111111111111101010111111110011110111111111101111101111111001101101101111101001010111011011111111111111110111011010111011101101111011101111010111011101110101111011110101110110101111111111111101110111010111110101111111110111110111011010101111111011100101101111111011011111111011111111111110111110111101111111101111111111111111011111110111100110111110111010110111111011111101101111111011111110101101101111110111101101101011011110111111101110111110011111111111010111111010011111010111111111010101111110101101110111101111111011111110111110111111111111011101111110011110111111111111101110110111100011111101011101011111011011010101111111111011111101101110110111111110111111011111101111011011110010011111111110001111010111101011011111011101101001111110111111011001011111111101111011111111011111111111111111011101111011011101111111110111101101011111100100111111111110110110011110111010110110010011111101011101110110111110111111111011111110111111110110101111111111110011111110100111111010111110011011110111111011111011010110110111011111111011100111111110111111111011011110101111110110111011110111111110111101111011101111111110111011101010011111011111111101111011111101101111111111110001111101111110101001110110111110111101010111011111111101011111111101111010111111011110011111011111111111101001111111101011101101011111011101010101010111011011111100111001101111111101111111111111110111110111111111110111011101111111111011110101110101101111111110111011110011111111111110110101111110111101101111111101110110101011001111111010111010101111111101110111111110011101111110011111110111"
src = [('00', 10), ('01', 209), ('10', 162), ('11', 604)]

In [None]:
answer = arithmetic_encode_bin(txt,src,16);
print(answer)

### Ejercicio 3

In [None]:
txt = "them too come along huck weve been in here a long time its getting late i reckon im hungry too well eat and smoke when we get to the skiff they presently emerged into the clump of sumach bushes looked warily out found the coast clear and were soon lunching and smoking in the skiff as the sun dipped toward the horizon they pushed out and got under way tom skimmed up the shore through the long twilight chatting"
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]

In [None]:
huf = huffman_code(txt,src,1)
corr = dict(huf)
answer = encode(txt,corr)
print(answer)

### Ejercicio 4

In [None]:
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]
code = "11100111010010000010101001101000011000100000011000100000010001110111000111101010001101001000101111111100000010101100101000010111101101001001110100011011100100101101101000001011001000111100111011101001100001100010110101100100100011001011110101000101000110001101101101001001111101100010111000010001000111001110111110001100110111010011011001101010111100111010010001010111001000001101001000001010000001101110001101010011001110101111101001100101110101000000010001110110001000010111100001011000001001110000001000010000011001001000111110100110001010100101110011001111010101010100110011100010011001101000101111001010100110010001101101000001011111001001011110110010000011001110010001010111011100010000000101010101110101001101100000110110100000001110111000000110100111111000111110010110110100010100000011110101011010100100100001100110101111010101000001100011110010110010011000101011101001010110101100000000110001000010010111011110000101000001000000100000101001110010111000101111110000001110101001110110101011001001100110010111111100101001110001101011000111001001111000111000011000100001001010101100010011010000000100010010000010011010000011110001110010111000101001101011011111000110110111111101010010010011111101010001101001010111111010111010111001110110001010011111111100000101111000110010000001110000110011010000001110010100011110011101100001011101111010110000001110100100011101010111101100100101100000010110110100111000010000001011001111011001100110101110000100011100011101011010001001100111111011001100111000010110101000100100111101111001111010000110110101011110011011111001111110001101000111011100011011000000101000110000101110000110001001101000011010011001001001010111000101100101001010011100100010010011100001101001110101011001111001010101100001011110000111110100100100100101001101001110010011110111100100100000110011011111010011000000101000010001110011000010010101101000101100111000100111111101100101010001010011011110010000000111011001000100001111110000011000110011100111110010110001011101000110100001011111000101001110111111011111111111001001100001111111110000010000000111110101000111110000110010000001111110100011010111010100111011001001101101011111111010000110100011101000110001110011100101110011000101001000100011111011000001000000100101111010101110010001010010010100011111010001010000110000110101001010100001110000111100100101011000010100000111110100001100100011110110111010001010001100000111111100111111110100110011010011001000110110110110011000111010011000101000100010110010001110001100001111111101000001101001110010001000101110011101011001111010000100110010110100111010111000000110101011110110010010100000010000010001010011001000101110111110000101110001100110001000100110010000111000011010111111111011000000100000111011111011101011111010000011000000010010000011010000000110001101010101010001100011110110100100011011100011100110000111110011100000011111011110100110011111010110101001111001001001101010010101111101110010010000010110110000100011010001000010010101101100001110000001000110011111011001010001011100001100011011110110010111001110010010110111101100010100001000101010010010111001101001011100100001010111110001111000110010111101100011100001001011100101000101101001100011101010101001000000000110101110111001000100011011110001001001001000011101000110100100010101110110111100001100111010111101111101010100101000000011010100001111110100000011011001110001101111010000000010011011011111011001101011100110101110001110110101100110100110011000001000100001111000011000001110011011011111010000001000001111011101100010111000101"

In [None]:
answer = arithmetic_decode_bin(code,22,src,872)
print(answer)

### Ejercicio 5

In [None]:
src = [('00', 3325), ('01', 197), ('10', 13), ('11', 799)]
code = "0100100110011100110011011101010010101011000001100001011001101101011110010101110011110100000100010101010010110100011001011111110100001010001000110110101001000111001011000010001001100100000000111110001110110011011010110110001101001000100011001110001101011011001100111000111000101000001101010101110101010001110000111111111010100011111010111111001101110001010000000100110111111110011000011000100100111111011100110111111101010000100000110111000010001101111101100100101111101010000100100111111111110001000101000110011000101110100101100010010010011111000101101100100111001011001111010110011011110100001010101000111010100100101000001010101001111011010101010000011010110010101000110001101110111111010001100000111111110101000001000110101001011111011111011010100000011010111101110001010001011011100100001000011000110101110000000010000101100101101100101100101111101111111111011000100010110011110110111000010000111111100110100111001001111111001110001111100110011100001100110011101101111100110101001101111011000010011111101101011000101110110001010111001111010010100100111100110010001000010000000001111101100101011011111111010011011000011000111111111110000110011000011111001001001011011001011111101111000001010111100010000001001111011010110111100100101110000111110111110101100100111111000110000001001111101011001111100110101101001000110101000101110111101110101110110001010011110000001101001101001010000010111110000101110000110001110110001000001000000101101100010011111010111011110001011111010111000111011111011001001111000010000100111111111011011010011001001101011010001110001101001100100001010100110001100101011011011111000000100111010000101111101100110011101000101000101111000100101100111011000000101011001000010001011100000001100111011101000000010111111000011001011110101000011100100110100101000010110011101011001100010010111000010010100000110000100010110101100100010101111111000010011111011000010110010001011111111110011110111001110011010101110000100000100101010001011000010100110010011000100110010100000101110000010001001101100011010000101010011101000000000010101110000100110100010000011001111000011110011110001010000000101111110010110100100010010101101010101000110011101010101110110100000100100000110011110100110010010110110101110000000010110110001110001001111011000100001010100111110010111001110111101100100110011111111001111010011000100111111001010011111001101000001110101111101001110101100111100011110101110011011001001010101001111110100110111101100000000100011001001111011001010101100100010000111110100111001110011011011111111111001000001101001101100010011111011110111010100001101101111110111111001001011100111101110000110001111011101100111111011101110110111001110101100000101111011001101101010001101111010011110110010010100101100100001100010110011111110110000111001000011000000011111100001110101"

In [None]:
answer = arithmetic_decode_bin(code,15,src,5914)
print(answer)

### Ejercicio 6

In [None]:
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]
code = "10110110010100100001011111010010011101011000010111010001001111000001101110100110000101101110100010011110000010110101111010111011010111010010010001101110101001101100101011110001110010001001011101111010010011011110110101110000011011011111000101101100101001010011010011111011111100011011101100100110100100110111101101011101000100111100000101101100101011111010001101001011110010100110010111010000011001010011101111010100111101001111010010111000001011100100010011011010101011010001111000001110001101010011110100010101110101001111111001111000111001001101110110010111110101111000111001000100100011000001110111101010011110000100100001111011111110000100101000110011001100011100100010010100001100101001101001011110100100011110000101110000001000011101001011110101101100101110100010001001111101000110010100111000100111110010001100000100111110010110001000010000101111101001011011001111000111001001000010011101001011100111111000101101100100101100101110010011101111010100111101001111010010100100010011011001011100100110001001000100001011011001111000111001000111101000101101100101001010111100110100101101010110011110101001111100001111000001011011001010011110110011101011001110000010111001001110100100111111001010001000011011010011100011111000111101010010001011001011100100111001100100010010001100000110001001001011011001001011001111111001011101011111000101101100111100011100100101110011110100010101001100110000011100010011111001000110000001100111111010101001011100111100000101110011101011101001011000101101011100000111101011110110110000100001100001011010011111010010100101110010011000110100111100011111100001001000110000010101111010101010011010001011011001010011000010110100111110100101001110011101001011111010010111001001101100101001010100100011110001011010110001010010100101101100100101100100010011011011001111000111001001101101111001011001011011001010001101001111011010100100111100000010000110111011001001000110111010100101110010011000011110101110111100101001111100001100111111010101001011100111100000011110000001100111101000110100101110000010100100101001100011111100101111011101100011001111110100011100011010100111101000111101011110110110110001101001001111010111100011100100111000110101001111010001011011001010010011101011000010111010001110001001111100110101011011000100111100000111111110111100110011111100011110101001010001101111001111010011110001110010011011011110010110001100101001010100110011000001110010100111111100101001011011001001011001111001110110011100101001111111001010001111011001111001110110011011001011110001001110100101000110010100011001001100000111101100111010100000011001111010001010011001001101111111110110010101101000100111111110010111010001110001001110101011111110000101011111100101101011000110100111110011010101000010010001100000111000011111111111100101110000001100111101000011010011110110101101000111100111011100110000010011100111000001111111101111001100011100101010011100011010010011111111111010111010001111111101111001010101101111101110101001001111000"

In [None]:
huf = huffman_code(code,src,1)
corr = dict(huf)
answer = decode(code,corr)
print(answer)

### Ejercicio 7

In [None]:
src = [('a', 3348), ('b', 50315), ('c', 1191), ('d', 12), ('e', 3705), ('f', 1934), ('g', 4), ('h', 12439), ('i', 227), ('j', 616), ('k', 1), ('l', 1687), ('m', 13664), ('n', 92701), ('o', 179), ('p', 1), ('q', 313), ('r', 7), ('s', 59803), ('t', 3), ('u', 2), ('v', 43347), ('w', 37965), ('x', 50), ('y', 7), ('z', 3)]

In [None]:
pp = product_prob(src)
huf = huffman_code('',pp,2)
huf_code = [x[1] for x in huf]
m_length = mean_length(pp, huf_code)
v_entropy = entropy_src(pp)
answer = m_length - v_entropy
print(answer)

### Ejercicio 8

In [3]:
quijote = open("quijote_clean.txt","r",encoding="utf-8").read(); print(quijote[:1000])

el ingenioso hidalgo don quijote de la mancha tasa yo juan gallo de andrada escribano de camara del rey nuestro senor de los que residen en su consejo certifico y doy fe que habiendo visto por los senores del un libro intitulado el ingenioso hidalgo de la mancha compuesto por miguel de cervantes saavedra tasaron cada pliego del dicho libro a tres maravedis y medio el cual tiene ochenta y tres pliegos que al dicho precio monta el dicho libro docientos y noventa maravedis y medio en que se ha de vender en papel y dieron licencia para que a este precio se pueda vender y mandaron que esta tasa se ponga al principio del dicho libro y no se pueda vender sin ella y para que dello conste di la presente en valladolid a veinte dias del mes de deciembre de mil y seiscientos y cuatro anos juan gallo de andrada testimonio de las erratas este libro no tiene cosa digna que no corresponda a su original en testimonio de lo haber correcto di esta fee en el colegio de la madre de dios de los teologos de 

In [None]:
answer = entropy(quijote,2,pre="pe")
print(answer)

## Atenea 2

### Ejercicio 1

In [12]:
txt = "clasped each other suddenly in an agony of fright which of us does he mean gasped huckleberry i dono peep through the crack quick no you tom i cant i cant do it huck please tom there tis again oh lordy im thankful whispered tom i know his voice its bull harbison if mr harbison owned a slave named bull tom would have spoken of him as harbisons bull but a son or a dog of that name was bull harbison oh thats good i tell you tom i was most scared to death id a bet anything it was a stray dog the dog howled again the boys hearts sank once more oh"
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]

In [13]:
answer = arithmetic_encode_bin(txt,src,21);
print(answer)

0100011111111100001100010111111101000010000111000100000110110010100100111111001100111001001010100101001111010010011110111110000100110111101010100100110011010110010110100000011001001100101111110110100000100010111100001110001000001000110110001101101110010001110001011101000001110101000111010001011000000010010111010111001111110010011101010101001010111001110000101110011100001010110000001100101101101000101110100010000010000001000001011111010111001011000000000010111011001110000110100010100011101001011000110010110010001010101010110100101011000110100111011011000000110000001101010111001011010101101100110111011111100000001001110011101111001011010001010111100000000110110010001111101000111000101010001000111001101100010111001100001001111111011110111110101111110100101100000010110000000010111110011011100111001001010110001011100101100100001100101101000000001000101010110001101111000110100011100010000111110011110010011011000001110010001111010101100100011000011000101010110011010010011011111101000011001111

### Ejercicio 2

In [15]:
txt = "11010101010101111111101111011111111111111111011111011101010111111111010111110111111111111111111101110101011111000110111111111111011110111101111111011100010110111111110111111101010111111110011111111111111111011010111111111111111111111111110111011111001111110110110111011111111111011101111101011011110111011111111111010111111111101101001101111111011111110111111001011111111111001111101101011111111101111011110111010111110011111111110111111001101111100011011111111111111101110011100111011101111111110111111011011101111111011011100111101101111110110111011111110111110111111110010111011111110101111111110011100111110101011111110111111011010111110111110111111111011111101111101101111111111101011111111111111111010111011111111101011111111111011101011111010111111111011111010111110111111111111111110111111111111111111111111111011011111001110111111111011111111011111100111110011111110111010101110111111110011001111101111110110111011111111101111101010011110111111111011111101111111111110101010111011111011100110111111111011111111011010111111111111111011100010111011111011111011111001111010001010111011111111001111111111101011111011101110101111111101011011111111100110111011111101111110111110111111111011111111111111111111111011111111111111101110100111111111111000111110111111111110011011111110110101101111111011111111111110110111111001101011111111111011101111101011111011110011111111101111111111101101111111110111111011111001111111110011101111110110111111111111111111001010111011111111110111101111101111111111111110110111101111111110111110001111111000110011111111110011101110011100111111111111101110101011101111011110110110101111111011011011011011111111111111101011111111111110011111111111101111111111111111111110111011111111111110111111111110111011111010111111111010101100101011111010101111101111001111101010011000111011111110111010011111101110101101101101111111111110111111111010101011111111011000101010011101111011101110111110110110111111111001111101101101111110101111111111111011111011111111111111101011101110011111111101011111011111001011011111111110111111111111111111111111011111100111101011011110111011101010111111001011111111110011101110111011111110111111111111111110101111110111101111101111011111111010111111011111011011101111111110101111111111111100101010111110011110000110111110111110111001000010111111111110111111111111111110111001101111110011111111011110101101101111111010111110011111101011110111111111111010111111111011111111101011111011111111101111111011111111111011111010011101101010101001111110101101111100011111101001111011110011101111110011111010111110111111111111111111111110101110111111111111101111101101111101110110011010011101111011111111100111011001111111011111111110111111111111111011111110111011111111111111111110110110111011100111111110111110110111111101111111111110011000101111110101111011101111110010111111011111111110001110100010001011111011101111111111110111111101101100011011111011101100101101111111001110011011101111101111111010111111110111110111111110100010101011100110111110001111110111101111101001111011110111101111111101111111111011111111010001101111111111100111111111111011111111111111000010111110111110111110110000011010101011111001111110101110110011110011111101111011111110011011111111101110111001111110001011111111111111111001111011111111111111101111101101111111111011101010110011111011111101111111111101101011111111111111111011100111101111101100111011101110111110001111110011011111111111110111111101111111101011101110110011111111101011101110111011101111111110101011111011101010111001100001111111111111101110111101111111111011100110101110111110101101111111111010100001111101111011101111101111111101100010011010111001111011111111011110101011111111110110111111101111111110111001111111111110110111101010101110011010101011011110101010111111111011100011111111111111101111111111111110101011111011001101000011101111111111111111111100101001111110111111000111110011111010101011111111101111111111011110111111111011011111011011111110111100111111111111111000101111001010111111101011111011101111110111001111100100101011101100111011011111011001111111111111111011101111111110110011111000101111111100001011111111101001101111011110111110011111001111101111101111101001111001110011101010101111101111111110111110110111101111111111111110111111101110011111101110111101111110101011111111111111111110100110111011111111111110001110111011101011111101101011001011111111111111101100011110111010100011111100110011100111111111000111111010100001101111111110111011111110111111111011111111110111101110110110101111110110111101111111111111001110101010111010101111111101111011111110111010101111111011110111101110111110111101111010111100101111110111111111111111111110011111111111111011011011101010101110111010111111000111110111010111101011111111111111110011111111111111101111111110111110111110111001100100101011111011111011111111101110111110111110111011111111111110110011101111111011111111111100111111111111111011111011101111111111111111111111111111111111111011111110111110110111111100101110101110111110111111111111111101111011111111111111111111011110111010111010111111111110101110111111111111111111100111111111111111111110101111010111111101111110111110101011111001111011111111100110111111111110111110111111011111111110011011101010100110111010111110111111111011111111111111111011111011111111111010010011110110111111101011111101110110111111100110111110101110101010111011111110111111111111111100100010111111111111111111101101111010111010111111111010101010111111100110111110111111101110111110101111111011101101010110100101011111101111011110111110101111111001101011111110110011111110111111101011110111110111111011101111111010011111110111111111111101011110011101111011111011100111111101111111111011111111111111111110111011111111110111001111111010110111111101101111001110111111101011110111110111111110011011111111101111101111111111100110111111111011101111111110011111111111111111101110101110111100001110111110101111111011101110111111111111111101101011111111111110111111101101111101111111111110101110111010111111110011111111111110110111111111111011101111111111101111011011011110111111111111111111111111111011111011111011110011111110111110101110111110101111111011101111111000101111111110111101101010111111111011110011111111111111110111111111111110111111101111111111111011111011111011111111011111111011110111111111111011111011111111111110111111111011111011000001100111111111001011110111111111111011111111100010111011111110101111101111111011111110111010100111111101101010111011111010111110111111111000011111011110101111110011111111111101001110111111111110111111111111111111111111111111111110100111111111111111110110110111101111111111111111101011111011111011100010111111101111110110101100011011110010111110111110000011111110111011101111111111111110111100111110110011101110111110111111111101111111100111111111101111101111110111111111110011101111011111101111111111111110001111111111111111111000101100011110111111111111111001100011111001101110011011101111111001100110111110111111011001011111101100111101111011101111111110111111110111111100111111111111111111111111111111111111111111111000111011111110111010011011011111111011111111110011111111111011100010111111111110101110111100101110101111011101101111111101111110111101111111111111101111111111111111111111111111001111101111101111111011111111111111111111110010111111111010101111110011111011101110110110111001111111111111111111111110111011101011110111011111111011111111111110111011111011110111001111100110011111111110101111111001111110111011100111111011111110011110001011111100001111001111101110111111101110111011111011111111111111111110111000011111111110111111101111101111111011111111101111111110111011111111101111111111101110101111011111111111111111111110111111011111011111111111111110111011111111101111111111111111011111110010110101111110111001111111111111101111111111001110111111100110001100111111111101111111111010111110100110101001001111111100101001111011101111001001011001111111100100110111101111111010111111111111111111111111111010011111111011111111111101111111001111101101111011111110111111111010111110111"
src = [('00', 49), ('01', 295), ('10', 94), ('11', 764)]

In [16]:
answer = arithmetic_encode_bin(txt,src,15);
print(answer)

0110011000011001000010011011100001110110100110010011100001011110100100110110010011000110000011011100010000111100011010101110001110111100001010000010111000001101001000000001011000110001111101111111100000111011010101001100011000110010111101100000010111000110101111010100001111001011000010011010110001000110100101110001110101000000101111011010011000010011010001111100010011011011101011110100011010100000011110101111010101101011111010110000110000100010101001000011011010100111101111100110111100111010111111101100101101010111111111000111001100100100001000010100000010111111000100101111011001100010111111110100010000111001010010100000101100010010011101110000111111001101001100010101111001010111100010100000111111101111110101111110000011011111000110100110101111100001101011010001010111001100100011100101000001001110111011111110001011000011011000001101011000110011101010011100111111100100001111011011110101010001000111100110110110100101010011111011110110001000001000010101100100010100011001011001001000011000

### Ejercicio 3

In [18]:
txt = "spent it was broad daylight before he found himself fairly abreast the island bar he rested again until the sun was well up and gilding the great river with its splendor and then he plunged into the stream a little later he paused dripping upon the threshold of the camp and heard joe say no toms true blue huck and hell come back he wont desert he knows that would be a disgrace to a pirate and toms too proud for that sort of thing hes up to something or other now i wonder what well the things is ours anyway aint they pretty near but not yet huck the writing says they are if he aint back here to breakfast which he is exclaimed tom with fine dramatic effect stepping grandly into camp a sumptuous breakfast of"
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]

In [19]:
huf = huffman_code(txt,src,1)
corr = dict(huf)
answer = encode(txt,corr)
print(answer)

1010111011010110001011000111101100111101010010100011011011010100101001100000110000100111110110010111111001011010110011011001011110001001110100101000110010100111000100111110010001100000011001111110101010010111001111000001110000100011111010110011111100001001101101101001010100101010110010110110010100011110101100101001000110000011011001001101000011001010011010010110101011010111000000100111001010001111000001111001000101101111100100101101100101001010111100100000111101010010100011110101011100111001001111001110110001001000110000011100101111100111000011110001110010010110110010100111001110100101010010110011010011111111110010111010001111010111101101100001111011101000101011101111001010110001100010011101000010010001100000101101100101100000011001010011101111001111100100011100101011100000011110001011100100101101100101001010101111010010101001110100001000011001011110111011110010101001100101001011010111010000110010100111011010011110010100101110000011000110100111111011111011011110001110010011110011101110

### Ejercicio 4

In [21]:
txt = "011010111110111010011110101000001110011101101011000000110011111001011010101110111010111001100110000111110111000111101100111000110000010110101110110011000000011001010111110011101101000111100000110111010001100001110100001010011111100110110001000111000110110100011011011010011110110000101110101010110100101100000000101011010101101011111100110010011000011001111110000001111000111100001101011000100011010000111010001110101100111001000100101100011010000100110011101111111011111001111101111011010001100000011111000010111010000100100100011010000010101110011101100001010000010000000001111010010011001111111111101100110111010000010110011110100000101010110110110110011100010011100001110010011100100100100100100000101110011110000111101010000101100110100001100100000000001110101010010010000110001110111111011000011110001110100010011010001111100011101100001011011000110111010110010000001011110101111010000011010001100111100011100001101100000110101001011110011111011000110000001001000101110001101100011000000100011001001010000110000111100111100100101011000001100000111001000011000010101000110110110000001110011000110000010100111101100110101001110010001011111011101100101001010111111011011000111000111110000010000100101101100000101000001110110011111110010000010011110111001001001110100101111001000111011000110100100000111010110110011011001010110000101101000000011101000001001111011001001000100100010110110101000000110100100000111110001011101000010000101000010011110000011010101001100010111100010100111100101111101010100001000100000101111011010000010111111000011011100011111101011111110011011110011110111000100001000011000001110000010100001000010001111110001000011111101110111101110101010010101101000100101010010011001110011000100101110011101101101011100000011110010100001101111100111100010111100100101101100111000000001100000011010101010111011100001001101011000111101001111110011000101001110101000010000101111111111100111010110000000110101011001000011011001010000101100010001000001110000010000010011001011001011110110111110100111101100101111011000110010111110111011110011100110011110100001110111101000010001011011000001010101101011100100111110000100000010001000011010100111001011010000010000000110010111111111111001011000111010010000101111010110011111110001001110100101011001110100000000101001001111011111100000001010011100001010100011001000101110010010101011000000101101110001001110101000110000101101001110011110111011001001111111110011000011001011011010101000100101010111101011111101110101010100011001101001011110111010100011111110001000010011101011010110010110111011101001101110100111101111011101111101011001100010110010100010100011011100101011100000010011101100100001101111010101101110101000010101011100000100111111110010100101000100000011101011111100111100111100011001101001001011000010100110010100001011010110110111011010000100111100001110101000001111011110100011100111001001011010011011000011011100000111000011000010001101110100011111001110010011100111110101111011011000001000100011000011111100001000011000101110111100010101011100001001110110100001111011111010111111001100000101000001010010010101001001111101100100110010100011001110100101111001010000100001110101010001000101111100011101011011001101101010111111001110001001010000110111101011110110100110100001010010000001101001111110000011001110111100001111110011010000111100101001000100010011100111011111100110110110101010111011101100001010101010101010100110100010111011001010110011111000110001101011010010010101100000011011010000010111000010011010100111001111000000000011111110111101100111011010111001010010111100100001011110000011001110111011100010111110010111"
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]

In [22]:
answer = arithmetic_decode_bin(txt,21,src,890)
print(answer)

ever done to me when im gone oh tom you aint dying are you dont tom oh dont maybe i forgive everybody sid groan tell em so sid and sid you give my window sash and my cat with one eye to that new girl thats come to town and tell her but sid had snatched his clothes and gone tom was suffering in reality now so handsomely was his imagination working and so his groans had gathered quite a genuine tone sid flew downstairs and said oh aunt polly come toms dying dying yesm dont wait come quick rubbage i dont believe it but she fled upstairs nevertheless with sid and mary at her heels and her face grew white too and her lip trembled when she reached the bedside she gasped out you tom tom whats the matter with you oh auntie im whats the matter with you what is the matter with you child oh auntie my sore toes mortified the old lady sank down into a chair and laughed a little then cried a


### Ejercicio 5

In [24]:
txt = "11100111100001100110110110110100011111111100010001110000010000101110111100010001000100101011011001111110110111001111000000010110001110110011100010111100101110111111100111101000111001100000011101011000000101011001110011000001000111000111001111111010111001111100111000111100010010100101000000011000110010011010111101011100011011110110011110010100110010111000010010011010010110000111011010011011100000001100010111111010100011101011111010001001011010010010001111110001010001101001010110100001101000011011001110010000101111101000111101001100000111001110000101000111101101010110111100100111001001111110101000011001001110010011110101110110001111111111010001001101000111011000110100001011011100100011001001100101111001010001011101000001000101101111001110111101101001110111110001001111011011011001101010000110010010000101111001110010001001110110011101101101011011100100000011010111101001000011010111010010011011011011011000000101010111111010010110110101110111010011110110110001001011111110010001101100101101101001100011111010110011110011011110001100000110100110001011110000010011101000001101110111001011011111101101010001111100000110000110001011011010010100011100101000110001000000110110101011010010101001101111011110110001100110011111110111011101010000000100000111011100110010101111011001011111111100101110001001100101101100000001110110010010000110000000011100010101100000000101000011101101010011001101110111010010010100010111111011010000100101000100111010111000100010100001100001110110101000011110001101011100000011001101110100001010011101001101101110111111111000101010001101001010100000100101011011001100000101110111100000011111001001110000000010101111011000010011010101011000110111111010000110111010111111011101010110010001001100000101110110011111010010000100010110110010001101000000010111101011101010000010100100010101001001101100101010001101111001110000110111101001011110000000000010000100001000100011100100011100110011101001001001010001101100010001011001100011001101010101111000001110111010110101010101010000110110111010110101001001110010000001110000001001110110000101001000101000101110101110110100110001100100110100110101011111011011100110100100110011010001011100110110101111010110110110111000010010001010101110100011011110111010110111111000101110101010000110101111111101011011010011001000011000111000100111010010111101110010011000100101111110110011001000001010000100111101111100000010101100010000001001001111011110100101111000110000110111111110100110101001110101110111101110111110111001010011100011010100101001110001111011011110011001010100100011100111010110100010110111011001001011001110111000010010000100000000110100100011010000100011010111011010111011111111010001101001010111001101000011000111000010000000111000110110111010011101000111001100111100110111000100011111101111111001111011111000000100111"
src = [('00', 3386), ('01', 148), ('10', 38), ('11', 740)]

In [25]:
answer = arithmetic_decode_bin(txt,16,src,5794)
print(answer)

1100000011000001110000010000000000000000001101000011100000000000001111000011000011000011000000000000000000000000001100000000110000110011000000000011000000000000111111010011000000000000000011000000000000001100010011000111000000000000010000110000110000000000000011110000000000000000001000110011000000000100000000110000000011000011000000000000000000110000000000000000001100110000001100000000000011000000110000110000000011000000000000000000110000000011000000000001101111001111000000000000001100010000000000001100000001000000000000001100001100000000000011001100000000000000000011000011000000000000000000001100110000000000010000000000000000000011110000110011000000001100000111000000000011000011000000000000000011010000000000000000000000000000110000000000001100001100000000000000111100000000000000000000110000000000000000110011111100010100000011000011000000000000110000000000000000000000001100000000110000110000001100000011000011110000000000000000000000001100110011000000000000000000000011000000000000110011

### Ejercicio 6

In [27]:
txt = "1101111001111010010100100101100011000100110001011001101100101000100111000010101001101011000000111001100010011000101100110110010111001011101011111111001010010110110010111111011001110010011011010011011011001011101000111100101000111101010100010001111000101100110001001011110001110010001001000111110000110010011010111010000111111000001111010101001111110010101011110110011101101011101011100001011101111011110011111100010101011011111001110010011101001001111101101100101001011011001011111100011110110011000101100100010011011011111011101010011110010100001001011000100110011100100011111001110010010111101011111000101110010010111001111010001101101111001011001100110011101011000000111111010000100110011100100100111100000010000101001100111111111100101110100011001011110101011010110000010110110010100110110100111111010100011011001011000101100101101100101011111010000110010101001100010100010111001111001010110110110010111010000100100011000001010110111010011010110111010111001111110001101101101001010100101101100101110000001000011101011110011100011100011001010111000001010100111110010001100000100111100000111111101001011111011101011010001110001100110010100101101011100000111100111011001110001101010011110100010110110010100111000010011010000101100011000001001111000001011011001010011100111010010011111110010111111001001101011000001100110011001111111000101001010101001011011001011101001010011110101100111101011101101011101001011100000101110011110100011110101100100101100011110100001111011000111101110100011000010111111110011111001001110000111110100101001001011000101110011110100010110110011110100001111010000100111101111000111100110010010101001111010010100111111100100111001111100010100111000011111100111110011010010110100001001110111110111101010010100110111011001011100000101101101101010011111001110010110001011011001010011100111001100110011110100010101111010111100011100101111000111001000100100000100111001110000011100001001010011001111001100001011100000101101111000001100101001000101101011101010000010110110010010110011100011010010111011111111101100101011100000101101100101001110011101010011111001000110000011110101111011011000011110001000111100111010010111010010011011011001010100110010111101110111100101010010101110110100100011100111001010110100010011110000011001011111100101101011001110111101001011010010110001011110011111100001101111001101111111110110010101110110010111010110101111100011110101100111101011101101011101001011100000111101011110110110000100001010011011110011000110000101110100001111011101000101101100101001100001011111111001111100110100010101111001101001010001011000100111110011100101100010110110110100101010100100111100000010111101000110011001110101100011111000101110011110100011110101011101001010011100110011000010111010101000110111010010000011111010011111000011101111010010011111000011111001110010010111101011111000110110111100101100110001001100010110011111010011111000011011001010001001110000101010011010110000010110110010111111000010001111000101100111001100101111000111001001011100100011011110011010101100111100101000100010011111010001110011001010011111000111010010100110001001111101100000101110010010101100101010101111011000111"
src = [(' ', 72190), ('a', 23858), ('b', 5017), ('c', 6697), ('d', 15027), ('e', 36243), ('f', 6113), ('g', 6699), ('h', 19838), ('i', 19165), ('j', 662), ('k', 3070), ('l', 12294), ('m', 7309), ('n', 20475), ('o', 23601), ('p', 4825), ('q', 180), ('r', 15584), ('s', 18060), ('t', 29363), ('u', 9107), ('v', 2433), ('w', 8111), ('x', 412), ('y', 6809), ('z', 158)]

In [28]:
huf = huffman_code(txt,src,1)
corr = dict(huf)
answer = decode(txt,corr)
print(answer)

come oh dont be afeard i dont believe theyll bother us we aint doing any harm if we keep perfectly still maybe they wont notice us at all ill try to tom but lord im all of a shiver listen the boys bent their heads together and scarcely breathed a muffled sound of voices floated up from the far end of the graveyard look see there whispered tom what is it its devil fire oh tom this is awful some vague figures approached through the gloom swinging an old fashioned tin lantern that freckled the ground with innumerable little spangles of light presently huckleberry whispered with a shudder its the devils sure enough three of em lordy tom were goners can you pray ill try but dont you be afeard they aint going to hurt us now i lay me down to sleep i


### Ejercicio 7

In [12]:
src = [('a', 1), ('b', 54651), ('c', 1739), ('d', 112322), ('e', 49), ('f', 33427), ('g', 1), ('h', 48749), ('i', 14532), ('j', 5839), ('k', 20), ('l', 101668), ('m', 9649), ('n', 13), ('o', 125075), ('p', 1), ('q', 5), ('r', 15), ('s', 82), ('t', 4), ('u', 20305), ('v', 84633), ('w', 25), ('x', 3), ('y', 3), ('z', 5233)]

In [14]:
pp = product_prob(src)
huf = huffman_code('',pp,2)
huf_code = [x[1] for x in huf]
m_length = mean_length(pp, huf_code)
v_entropy = entropy_src(pp)
answer = m_length - v_entropy
print(answer)

0.024409267995515016


### Ejercicio 8

In [8]:
quijote = open("quijote_clean.txt","r",encoding="utf-8").read(); print(quijote[:1000])

el ingenioso hidalgo don quijote de la mancha tasa yo juan gallo de andrada escribano de camara del rey nuestro senor de los que residen en su consejo certifico y doy fe que habiendo visto por los senores del un libro intitulado el ingenioso hidalgo de la mancha compuesto por miguel de cervantes saavedra tasaron cada pliego del dicho libro a tres maravedis y medio el cual tiene ochenta y tres pliegos que al dicho precio monta el dicho libro docientos y noventa maravedis y medio en que se ha de vender en papel y dieron licencia para que a este precio se pueda vender y mandaron que esta tasa se ponga al principio del dicho libro y no se pueda vender sin ella y para que dello conste di la presente en valladolid a veinte dias del mes de deciembre de mil y seiscientos y cuatro anos juan gallo de andrada testimonio de las erratas este libro no tiene cosa digna que no corresponda a su original en testimonio de lo haber correcto di esta fee en el colegio de la madre de dios de los teologos de 

In [33]:
answer = entropy(quijote,3,pre="qu")
print(answer)

4.867598561440164
