# Hiztegi baten inplementazioa


## Kolisioen ebazpena II : Helbideratze Irekia (*Open addressing*)

* $g$ gako bati taulan dagokion posizioa ez da $i \; = \; hash(g)\; \% \; N$ izango
* $i$ posiziotik abiatuko gara
    * elementua topatu arte
    * gelaxka huts bat topatu arte
* $LF \; \le \; 1$
   * Kolisio tasa mugatzeko: $LF \; < \; 0.7$

Hiztegi berri bat sortzeko funtzioa, `dict()`-en baliokidea

In [4]:
def h_new(n=7):
    return [None] * n

Sortu hiztegi bate ta *eskuz* gorde ondokoak: `3:'hiru'` eta `'lau':4`

In [5]:
h = h_new()
h[hash(3)%len(h)] = (3,'hiru')
h[hash('lau')%len(h)] = ('lau',4)
print(h)

[None, None, ('lau', 4), (3, 'hiru'), None, None, None]


Hiztegi baten `(gako,balio)` bikote zerrenda bueltatuko duen funtzioa, `dict.items()`-ren baliokidea

In [6]:
def h_items(h):
    z = []
    for x in h :
        if x :
            z.append(x)
    return z

In [7]:
print('h zerrendako elementuak:',*h)
print('h hiztegiko elementuak:',*h_items(h))

h zerrendako elementuak: None None ('lau', 4) (3, 'hiru') None None None
h hiztegiko elementuak: ('lau', 4) (3, 'hiru')


Hiztegi baten tamaina bueltatuko duen funtzioa, `len()`-ren baliokidea

In [5]:
def h_len(h):
    n = 0
    for x in h :
        if x :
            n += 1
    return n

def h_len2(h):
    return len(h_items(h))

In [6]:
print(f'h zerrendaren tamaina: {len(h)}')
print(f'h hiztegiaren tamaina: {h_len(h)}')
print(f'h hiztegiaren tamaina: {h_len2(h)}')

h zerrendaren tamaina: 7
h hiztegiaren tamaina: 2
h hiztegiaren tamaina: 2


Hiztegi batetik karaktere kate bat sortzeko funtzioa, `str()`-ren baliokidea

In [7]:
def h_str(h):
    z = []
    for k,v in h_items(h) :
        z.append(repr(k) + ':' + repr(v))
    return '{ ' + ' , '.join(z) + ' }'

In [8]:
print(str(h))
print(h_str(h))

[None, None, ('lau', 4), (3, 'hiru'), None, None, None]
{ 'lau':4 , 3:'hiru' }


Gako bati dagokion balioa kontsultatzeko funtzioa, `dict.get()`-en baliokidea

In [16]:
def h_get(h,k,d=None):
    i = hash(k) % len(h)
    while h[i] :
        k2,v2 = h[i]
        if k2 == k :
            return v2
        i = (i+1) % len(h)
    return d 

In [19]:
print(h)
print(h_str(h))
print(f'hizt[3] = {h_get(h,3)}')
print(f'hizt["hiru"] = {h_get(h,"hiru")}')
print(f'hizt["lau"] = {h_get(h,"lau")}')
print(f'hizt[4] = {h_get(h,4)} , baina h[2]={h[2]}')

[None, None, ('lau', 4), (3, 'hiru'), None, None, None]
{ 'lau':4 , 3:'hiru' }
hizt[3] = hiru
hizt["hiru"] = None
hizt["lau"] = 4
hizt[4] = None , baina h[2]=('lau', 4)


Hiztegi batetan gako bati dagokion balioa gordeko duen funtzioa, `h[k]=v`-en baliokidea.

In [23]:
def h_put(h,k,v,check_lf=True):
    i = hash(k) % len(h)
    while h[i] :
        k2,v2 = h[i]
        if k2 == k :
            h[i] = (k,v)
        i = (i+1) % len(h)
    h[i] = (k,v)
    if check_lf :
        # LF < 0.7
        if h_len(h) / len(h) >= 0.7 :
            h_resize(h,int(len(h)*1.7),check_lf=False)

Hiztegi baten *barne tamaina* aldatuko duen funtzioa

In [24]:
def h_resize(h,n):
    check_lf = n < len(h)
    items = h_items(h)
    h.clear()
    h.extend([None] * n)
    for k,v in items :
        h_put(h,k,v,check_lf)

`h_put()` eta `h_resize()` biak batera frogatuko ditugu...

In [27]:
h = h_new()
h_put(h,3,'hiru')
h_put(h,'lau',4)

print('-'*50+'\n',len(h),h)
print(h_str(h))

h_resize(h,9)
print('-'*50+'\n',len(h),h)
print(h_str(h))


--------------------------------------------------
 7 [None, None, ('lau', 4), (3, 'hiru'), None, None, None]
{ 'lau':4 , 3:'hiru' }
--------------------------------------------------
 9 [None, None, None, (3, 'hiru'), None, ('lau', 4), None, None, None]
{ 3:'hiru' , 'lau':4 }


Frogak egin ditzagun....

In [32]:
for n in 10,100,1000,10000 :
    h = h_new()
    for i in range(n):
        h_put(h,i,str(i))
    print(len(h),h_len(h))
    if len(h) < 1000 :
        print(h)

18 10
[(0, '0'), (1, '1'), (2, '2'), (3, '3'), (4, '4'), (5, '5'), (6, '6'), (7, '7'), (8, '8'), (9, '9'), None, None, None, None, None, None, None, None]
146 100
[(0, '0'), (1, '1'), (2, '2'), (3, '3'), (4, '4'), (5, '5'), (6, '6'), (7, '7'), (8, '8'), (9, '9'), (10, '10'), (11, '11'), (12, '12'), (13, '13'), (14, '14'), (15, '15'), (16, '16'), (17, '17'), (18, '18'), (19, '19'), (20, '20'), (21, '21'), (22, '22'), (23, '23'), (24, '24'), (25, '25'), (26, '26'), (27, '27'), (28, '28'), (29, '29'), (30, '30'), (31, '31'), (32, '32'), (33, '33'), (34, '34'), (35, '35'), (36, '36'), (37, '37'), (38, '38'), (39, '39'), (40, '40'), (41, '41'), (42, '42'), (43, '43'), (44, '44'), (45, '45'), (46, '46'), (47, '47'), (48, '48'), (49, '49'), (50, '50'), (51, '51'), (52, '52'), (53, '53'), (54, '54'), (55, '55'), (56, '56'), (57, '57'), (58, '58'), (59, '59'), (60, '60'), (61, '61'), (62, '62'), (63, '63'), (64, '64'), (65, '65'), (66, '66'), (67, '67'), (68, '68'), (69, '69'), (70, '70'), (71,

Zenbaki osoen hash balioa bere burua delako, `h` zerrendaren tamaina gordetzen ari garen `i` gako handiena baina handiagoa izatea nahikoa da... **EZ DA KOLISIORIK EGONGO**

In [31]:
for n in 10,100,1000,10000 :
    h = h_new()
    for i in range(n):
        h_put(h,str(i),i)
    print(len(h),h_len(h))
    if len(h) < 1000 :
        print(h)

18 10
[('5', 5), ('3', 3), None, None, ('9', 9), None, ('4', 4), None, None, ('2', 2), ('8', 8), ('6', 6), None, None, ('0', 0), None, ('1', 1), ('7', 7)]
146 100
[('90', 90), None, None, ('36', 36), ('12', 12), None, ('21', 21), None, ('48', 48), None, ('17', 17), ('19', 19), ('31', 31), ('59', 59), ('85', 85), ('58', 58), ('95', 95), None, None, ('94', 94), ('9', 9), ('16', 16), ('20', 20), ('39', 39), ('42', 42), ('64', 64), ('68', 68), ('7', 7), ('74', 74), None, ('84', 84), ('98', 98), ('66', 66), ('11', 11), ('53', 53), ('28', 28), ('87', 87), None, ('88', 88), ('38', 38), ('54', 54), ('13', 13), ('8', 8), ('69', 69), ('56', 56), ('79', 79), ('89', 89), None, None, ('78', 78), None, None, ('29', 29), ('44', 44), ('33', 33), None, None, None, None, None, ('4', 4), None, None, ('67', 67), None, None, ('10', 10), ('37', 37), ('70', 70), ('80', 80), ('91', 91), ('99', 99), None, None, None, ('46', 46), None, None, ('34', 34), ('6', 6), ('25', 25), ('61', 61), ('62', 62), ('52', 52), 

Lehenengo kasuan, gakoak `[0,n]` tarteko zenbakiak izan beharrean auzazkoak izan balira...

In [33]:
from random import randrange
for n in 10,100,1000,10000 :
    h = h_new()
    for i in range(n):
        h_put(h,randrange(1000000000),'KAKA!')
    print(len(h),h_len(h))
    if len(h) < 1000 :
        print(h)

18 10
[None, (993856429, 'KAKA!'), (230995352, 'KAKA!'), (401499811, 'KAKA!'), (906925018, 'KAKA!'), None, (531333816, 'KAKA!'), None, (796369742, 'KAKA!'), None, (161651566, 'KAKA!'), None, None, None, None, (382689429, 'KAKA!'), (216732202, 'KAKA!'), (929514149, 'KAKA!')]
146 100
[(323231589, 'KAKA!'), None, (167658518, 'KAKA!'), (66316560, 'KAKA!'), None, (757034241, 'KAKA!'), (388340003, 'KAKA!'), None, (270628674, 'KAKA!'), (586624651, 'KAKA!'), (394040432, 'KAKA!'), (698428679, 'KAKA!'), (885151292, 'KAKA!'), (889495377, 'KAKA!'), (959884167, 'KAKA!'), (823705151, 'KAKA!'), (120451476, 'KAKA!'), (609655134, 'KAKA!'), (612200938, 'KAKA!'), (377299783, 'KAKA!'), (447297446, 'KAKA!'), (847366793, 'KAKA!'), (148077602, 'KAKA!'), (362163243, 'KAKA!'), (369259134, 'KAKA!'), (384575995, 'KAKA!'), (100185080, 'KAKA!'), (119361889, 'KAKA!'), (754784696, 'KAKA!'), (784116674, 'KAKA!'), None, (771853121, 'KAKA!'), None, None, (652252260, 'KAKA!'), (526180238, 'KAKA!'), None, (267590881, 'KA