## Importing the required packages

In [0]:
import random
import pandas as pd
import pylab as pl
import numpy as np
import time
import math

## About the dataset: (Synthetic)
#### Library dataset with the ID's of the students stored who enter the library through the E-Gates.
We convert the dataset from a Panda's dataframe to an Array using numpy. The integer values are stored in the array called "Hash_values".

There are 1068 entries (number of values to be hashed 'n'). As we are considering Open Address hashing, we do not want load factor (n/h, where 'h' is the size of the hash table) to ever exceed 1. Therefore, we choose a prime near the value of n and greater than it. Therefore, h = 1069, which is define as "hash_size".

## Reading and cleaning of the dataset 

In [2]:
url = 'https://raw.githubusercontent.com/arjunmann73/Machine-Learning/master/Regression/Simple%20Multivariate%20Linear%20Regression/FuelConsumption.csv'
csv_data = pd.read_csv(url)
data = csv_data["CO2EMISSIONS"]
Hash_values = np.asanyarray(data)
data = csv_data["CO2EMISSIONS"]
Hash_values = np.asanyarray(data)
Hash_values = Hash_values*10
len(Hash_values)

1067

## Creating the hash tables
---
We create 2 hash tables to check and compare different rehashing functions. In this lab, we will be looking at:
### - Folding method of rehashing (hash_table_double1)
### - Congruential method of rehashing (hash_table_double2)



In [0]:
hash_table_double1 = [None] * 1069
hash_table_double2 = [None] * 1069
hash_size = 1069

## Defining the Auxiliary hash function and the rehashing functions

In [0]:
def Auxiliary(k):
    return (k % hash_size)

def Folding(k):
    k = k*k
    c = bin(k)
    c = c[2:]
    c = c[0:5] + c[len(c)-5]
    c = int(c)
    return c

def Congruential(k):
  a = 6 * math.floor(hash_size/29) + 7 
  return (a*k)%hash_size

## Defining the Double Hash functions

In [0]:
def Hash_Folding(k, i):
    return (Auxiliary(k) + i*Folding(k)) % hash_size

def Hash_Congruential(k, i):
    return (Auxiliary(k) + i*Congruential(k)) % hash_size

## 1) Folding Method
---
#### a) Insertion of elements in the hash table



In [0]:
def hash_double_fold(key):
    i = 0 
    while(i != 1068):
        j = Hash_Folding(key, i)
        if(hash_table_double1[j] == None):
            hash_table_double1[j] = key;
            return 
        else:
            i += 1
    print("Overflow")

#### b) Searching an element in the hash table

In [0]:
def lookup_folding(val):
    code = Auxiliary(val)
    loc = code
    flag = 0
    i = 0
    while(hash_table_double1[loc] != None):
        if(hash_table_double1[loc] == val):
            flag = 1
            break;
        else:
            i+=1
            loc = Hash_Folding(val, i)
            if(loc == code):
                break;
    return [i+1, flag]

### Load Factor ≅ 1

In [0]:
for j in range(1067):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 1

In [10]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  5370
Average Comparisions:  1.9361266294227188  Average time taken in milliseconds:  2.320829494483422
Unsuccessful Searches:  4630
Average Comparisions:  336.2611231101512  Average time taken in milliseconds:  444.1136160370598


### Load Factor ≅ 0.75

In [0]:
hash_table_double1 = [None] * 1069
for j in range(802):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.75

In [12]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  5031
Average Comparisions:  1.2285827867223216  Average time taken in milliseconds:  1.3220833869559512
Unsuccessful Searches:  4969
Average Comparisions:  4.606761923928356  Average time taken in milliseconds:  5.923375010514408


### Load Factor ≅ 0.5

In [0]:
hash_table_double1 = [None] * 1069
for j in range(535):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.5

In [14]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  4578
Average Comparisions:  1.1114023591087812  Average time taken in milliseconds:  1.2491196198669674
Unsuccessful Searches:  5422
Average Comparisions:  1.8087421615639985  Average time taken in milliseconds:  1.9667993740418088


### Load Factor ≅ 0.25

In [0]:
hash_table_double1 = [None] * 1069
for j in range(268):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.25

In [16]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  3646
Average Comparisions:  1.0957213384530993  Average time taken in milliseconds:  1.2476759432961635
Unsuccessful Searches:  6354
Average Comparisions:  1.1062322946175638  Average time taken in milliseconds:  0.7748040981851061


## Multiplicative Congruential Method

---
#### a) Insertion of elements in the hash table


In [0]:
def hash_double_congruential(key):
    i = 0
    while(i != 1068):
        j = Hash_Congruential(key, i)
        if(hash_table_double2[j] == None):
            hash_table_double2[j] = key;
            return 
        else:
            i += 1
    print("Overflow")

#### b) Searching an element in the hash table

In [0]:
def lookup2(val):
    code = Auxiliary(val)
    loc = code
    flag = 0
    i = 0
    while(hash_table_double2[loc] != None):
        if(hash_table_double2[loc] == val):
            flag = 1
            break;
        else:
            i+=1
            loc = Hash_Congruential(val, i)
            if(loc == code):
                break;
    return [i+1, flag]

### Load Factor ≅ 1

In [0]:
for j in range(1067):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 1

In [20]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  5310
Average Comparisions:  1.9661016949152543  Average time taken in milliseconds:  2.0107980501853815
Unsuccessful Searches:  4690
Average Comparisions:  340.5682302771855  Average time taken in milliseconds:  331.8543118962855


### Load Factor ≅ 0.75

In [0]:
hash_table_double2 = [None] * 1069
for j in range(802):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.75

In [23]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  4921
Average Comparisions:  1.3521641942694573  Average time taken in milliseconds:  1.4759083875575898
Unsuccessful Searches:  5079
Average Comparisions:  3.5481393975191966  Average time taken in milliseconds:  3.7998635836017485


### Load Factor ≅ 0.5

In [0]:
hash_table_double2 = [None] * 1069
for j in range(535):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.5

In [25]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  4473
Average Comparisions:  1.2412251285490723  Average time taken in milliseconds:  1.631725455407139
Unsuccessful Searches:  5527
Average Comparisions:  1.7483263976840961  Average time taken in milliseconds:  1.7340238904875356


### Load Factor ≅ 0.25

In [0]:
hash_table_double2 = [None] * 1069
for j in range(268):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.25

In [27]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  3653
Average Comparisions:  1.099096632904462  Average time taken in milliseconds:  1.227663146153953
Unsuccessful Searches:  6347
Average Comparisions:  1.2541358121947377  Average time taken in milliseconds:  0.9491666388432773
