## 1 Hiztegi iragarleak

In [1]:
def aurrizkiak(w):
    for i in range(1,len(w)):
        yield w[:i]
def get_predict(h):
    pred = {}
    for w,n, in h.items():
        for a in aurrizkiak(w):
            if n > h.get(pred.get(a,None),0):
                pred[a] = w
    return pred

Erabilpen adibide bat (ez da eskatzen):

In [2]:
for w in ["ez", "erre", "eraman"]:
    print(w,"-->",*aurrizkiak(w))

ez --> e
erre --> e er err
eraman --> e er era eram erama


In [3]:
h = {"ez": 30, "erre": 15, "eraman": 8}
pred = get_predict(h)
pred

{'e': 'ez',
 'er': 'erre',
 'err': 'erre',
 'era': 'eraman',
 'eram': 'eraman',
 'erama': 'eraman'}

In [4]:
def saved_chars(w, pred):
    for a in aurrizkiak(w):
        w2 = pred.get(a,None)
        if w2 is None :
            return 0
        elif w2 == w :
            return len(w)-len(a)
    return 0

Erabilpen adibide bat (ez da eskatzen):

In [5]:
for w in ["ez", "erre", "eraman"]:
    print(w,"-->",saved_chars(w,pred))

ez --> 1
erre --> 2
eraman --> 3


In [6]:
def percent_saved_chars(pred, filename, encoding="utf8"):
    with open(filename, encoding=encoding) as f:
        n_saved,n_total = 0,0
        for line in f:
            for w in line.split():
                n_saved += saved_chars(w,pred)
                n_total += len(w)
        return n_saved/n_total*100

Erabilpen adibide bat (ez da eskatzen):

In [7]:
with open("bible.txt", encoding="utf8") as f:
    h = {}
    for line in f:
        for w in line.split():
            h[w] = h.get(w,0) + 1
            
print(f"words: {sum(h.values())} , unique: {len(h)}")
pred = get_predict(h)
print(f"prefixes: {len(pred)}")
print("some examples:", *list(f"  {k} --> {v}" for k,v in pred.items())[:10], sep="\n")
x = percent_saved_chars(pred,"bible.txt")
print(f"saved chars: %{x:.1f}")

words: 790957 , unique: 12871
prefixes: 25650
some examples:
  i --> in
  t --> the
  th --> the
  b --> be
  be --> before
  beg --> begat
  begi --> beginning
  begin --> beginning
  beginn --> beginning
  beginni --> beginning
saved chars: %40.2


## 2 Konplexutasun tenporal asintotikoa

### 2a)

```python
def batura_zehatza_v1(z, b):
    s = set(z)                     # n
    for x in z:                    # [1,n] (barrukoa [1,n] aldiz)
        if b-x in s and x!=b-x:    # | [1,n] x 1
            return True            # |
    return False                   # [0,1]
```

Multzoa sortzeak dagoeneko $n$ pausu suposatzen ditunez, berdin dio `for` kontrol egitura $1$ edo $n$ aldiz exekuatzten den, izan ere behin exekutatzean $t(n)=n+2$ eta $n$ aldiz exekutatzean $t(n)=3n+1$. Beraz, orden zehatza dugu, $\Theta(n)$

### 2b)

Demagun kasu ona eta txarra existitzen direla...

**Kasu ona**: zerrendako lehenengo bi balioen batura $b$ da:

```python
def batura_zehatza_v2(z, b):
    for i in range(len(z)-1):          # 1
        for j in range(i+1, len(z)):   # 1
            if z[i]+z[j] == b:         # | 1
                return True            # |
    return False
```

**Kasu txarra**: zerrendan ez daude $b$ batzen duten bi elementu:

```python
def batura_zehatza_v2(z, b):
    for i in range(len(z)-1):          # n (barrukoa n-1 aldiz)
        for j in range(i+1, len(z)):   # n-1 + n-2 + ... + 1
            if z[i]+z[j] == b:         # n-1 + n-2 + ... + 1
                return True            # 0
    return False                       # 1
```

Beraz,
* **Kasu ona**: $t(n)=3 \to \Omega(1)$
* **Kasu txarra**: $t(n)= n + 2 \cdot \frac{(n-1)\cdot n}{2} + 1 \to O(n^2)$

### 2c)

Demagun kasu ona eta txarra existitzen direla...

**Kasu ona**: zerrendako balio txikienaren eta handienaren batura $b$ da:

```python
def batura_zehatza_v3(z, b):
    z = sorted(z)        # n logn
    i = 0                # | 1
    j = len(z)-1         # |
    while i<j:           # 1
        b2 = z[i]+z[j]   # | 1
        if b2==b:        # |
            return True  # |
        elif b2<b:
            i+=1
        else :
            j-=1
    return False
```

**Kasu ona**: zerrendan ez daude $b$ batzen duten bi elementu:

```python
def batura_zehatza_v3(z, b):
    z = sorted(z)        # n logn
    i = 0                # | 1
    j = len(z)-1         # |
    while i<j:           # n+1 (barrukoa n aldiz)
        b2 = z[i]+z[j]   # | n x 1
        if b2==b:        # |
            return True  # |
        elif b2<b:       # |
            i+=1         # |
        else :           # |
            j-=1         # |
    return False         # 1
```

Beraz,
* **Kasu ona**: $t(n)=3 + n \log n$
* **Kasu txarra**: $t(n)= 3 + 2n +  n \log n$

Biak $ n \log n$ ordenekoak direnez, ez da kasu on/txarrik existitzen eta orden zehatza dugu, $\Theta(n \log n)$

## 3 `IndexSet` klasea

In [67]:
class IndexSet(object):
    
    def __init__(self, n, it=()):
        self.z = [False]*(n+1)
        self.n = n
        self.len = 0
        for i in it:
            self.add(i)
            
    def add(self, i):
        if i<0 or i>self.n :
            raise IndexError("wrong index")
        if not self.z[i] :
            self.z[i] = True
            self.len += 1
        
    def rank(self):
        return self.n
    
    def remove(self, i):
        if i not in self :
            raise IndexError("wrong index")
        self.z[i] = False
        self.len -= 1
        
    def __contains__(self, i):
        return i>=0 and i<=self.n and self.z[i]
    
    def __len__(self):
        return self.len
    
    def __eq__(self, other):
        return type(other)==IndexSet and self.z==other.z
    
    def __str__(self):
        return repr(self)

    def __repr__(self):
        return f'IndexSet({self.n},{repr(list(self))})'
    
    def __iter__(self):
        for i,b in enumerate(self.z):
            if b :
                yield i
    
    def union(self, other):
        n = max(self.n,other.n)
        it = list(self) + list(other)
        return IndexSet(n,it)
    
    def intersection(self, other):
        n = min(self.n,other.n)
        it = [i for i,(x,y) in enumerate(zip(self.z,other.z)) if x and y]
        return IndexSet(n,it)    

Erabilpen adibide bat (ez da eskatzen):

In [69]:
s = IndexSet(7,(0,3,4,6))
print(f"3 in s: {3 in s}")
print(f"2 in s: {2 in s}")
print(f"-1 in s: {-1 in s}")
print(f"10 in s: {10 in s}")
print("*s:", *s)
print("s:", s)
print("eval(repr(s)))==s:", eval(repr(s)) == s)
s2 = IndexSet(10,(1,2,4,6,10))
print("s2:", s2)
print("s.union(s2):", s.union(s2))
print("s.intersection(s2):", s.intersection(s2))

3 in s: True
2 in s: False
-1 in s: False
10 in s: False
*s: 0 3 4 6
s: IndexSet(7,[0, 3, 4, 6])
eval(repr(s)))==s: True
s2: IndexSet(10,[1, 2, 4, 6, 10])
s.union(s2): IndexSet(10,[0, 1, 2, 3, 4, 6, 10])
s.intersection(s2): IndexSet(7,[4, 6])
