## Importing the required packages

In [1]:
import random
import pandas as pd
import pylab as pl
import numpy as np
import time
import math

## About the dataset: (Synthetic)
#### Library dataset with the ID's of the students stored who enter the library through the E-Gates.
We convert the dataset from a Panda's dataframe to an Array using numpy. The integer values are stored in the array called "Hash_values".

There are 1068 entries (number of values to be hashed 'n'). As we are considering Open Address hashing, we do not want load factor (n/h, where 'h' is the size of the hash table) to ever exceed 1. Therefore, we choose a prime near the value of n and greater than it. Therefore, h = 1069, which is define as "hash_size".

## Reading and cleaning of the dataset 

In [2]:
url = 'https://raw.githubusercontent.com/arjunmann73/Machine-Learning/master/Regression/Simple%20Multivariate%20Linear%20Regression/FuelConsumption.csv'
csv_data = pd.read_csv(url)
data = csv_data["CO2EMISSIONS"]
Hash_values = np.asanyarray(data)
data = csv_data["CO2EMISSIONS"]
Hash_values = np.asanyarray(data)
Hash_values = Hash_values*10
len(Hash_values)

1067

## Creating the hash tables
---
We create 2 hash tables to check and compare different rehashing functions. In this lab, we will be looking at:
### - Folding method of rehashing (hash_table_double1)
### - Congruential method of rehashing (hash_table_double2)



In [3]:
hash_table_double1 = [None] * 1069
hash_table_double2 = [None] * 1069
hash_size = 1069

## Defining the Auxiliary hash function and the rehashing functions

In [4]:
def Auxiliary(k):
    return (k % hash_size)

def Folding(k):
    k = k*k
    c = bin(k)
    c = c[2:]
    c = c[0:5] + c[len(c)-5]
    c = int(c)
    return c

def Congruential(k):
  a = 6 * math.floor(hash_size/29) + 7 
  return (a*k)%hash_size

In [5]:
def Hash_Folding(k, i):
    return (Auxiliary(k) + i*Folding(k)) % hash_size

def Hash_Congruential(k, i):
    return (Auxiliary(k) + i*Congruential(k)) % hash_size

## 1) Folding Method
---
#### a) Insertion of elements in the hash table



In [6]:
def hash_double_fold(key):
    i = 0 
    while(i != 1068):
        j = Hash_Folding(key, i)
        if(hash_table_double1[j] == None):
            hash_table_double1[j] = key;
            return 
        else:
            i += 1
    print("Overflow")

#### b) Searching an element in the hash table

In [7]:
def lookup_folding(val):
    code = Auxiliary(val)
    loc = code
    flag = 0
    i = 0
    while(hash_table_double1[loc] != None):
        if(hash_table_double1[loc] == val):
            flag = 1
            break;
        else:
            i+=1
            loc = Hash_Folding(val, i)
            if(loc == code):
                break;
    return [i+1, flag]

### Load Factor ≅ 1

In [8]:
for j in range(1067):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 1

In [9]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  5373
Average Comparisions:  1.9326260934301136  Average time taken in milliseconds:  3.147852968819066
Unsuccessful Searches:  4627
Average Comparisions:  340.7568618975578  Average time taken in milliseconds:  608.9336237795079


### Load Factor ≅ 0.75

In [10]:
hash_table_double1 = [None] * 1069
for j in range(802):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.75

In [11]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  4957
Average Comparisions:  1.2473270123058302  Average time taken in milliseconds:  3.6136022324953587
Unsuccessful Searches:  5043
Average Comparisions:  4.544715447154472  Average time taken in milliseconds:  8.914670654877817


### Load Factor ≅ 0.5

In [12]:
hash_table_double1 = [None] * 1069
for j in range(535):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.5

In [13]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  4586
Average Comparisions:  1.1140427387701701  Average time taken in milliseconds:  1.7444691281624356
Unsuccessful Searches:  5414
Average Comparisions:  1.820096047284817  Average time taken in milliseconds:  2.394226322237486


### Load Factor ≅ 0.25

In [14]:
hash_table_double1 = [None] * 1069
for j in range(268):
    hash_double_fold(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.25

In [15]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup_folding(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  3632
Average Comparisions:  1.097466960352423  Average time taken in milliseconds:  2.4737896898244447
Unsuccessful Searches:  6368
Average Comparisions:  1.0970477386934674  Average time taken in milliseconds:  0.940383079662994


## Multiplicative Congruential Method

---
#### a) Insertion of elements in the hash table


In [16]:
def hash_double_congruential(key):
    i = 0
    while(i != 1068):
        j = Hash_Congruential(key, i)
        if(hash_table_double2[j] == None):
            hash_table_double2[j] = key;
            return 
        else:
            i += 1
    print("Overflow")

#### b) Searching an element in the hash table

In [17]:
def lookup2(val):
    code = Auxiliary(val)
    loc = code
    flag = 0
    i = 0
    while(hash_table_double2[loc] != None):
        if(hash_table_double2[loc] == val):
            flag = 1
            break;
        else:
            i+=1
            loc = Hash_Congruential(val, i)
            if(loc == code):
                break;
    return [i+1, flag]

### Load Factor ≅ 1

In [18]:
for j in range(1067):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 1

In [19]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  5340
Average Comparisions:  2.033895131086142  Average time taken in milliseconds:  2.2464477167593855
Unsuccessful Searches:  4660
Average Comparisions:  336.31716738197423  Average time taken in milliseconds:  526.7234830897254


### Load Factor ≅ 0.75

In [20]:
hash_table_double2 = [None] * 1069
for j in range(802):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.75

In [21]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  5039
Average Comparisions:  1.3514586227426078  Average time taken in milliseconds:  1.5757201706329305
Unsuccessful Searches:  4961
Average Comparisions:  3.6797016730497885  Average time taken in milliseconds:  5.83027380412924


### Load Factor ≅ 0.5

In [22]:
hash_table_double2 = [None] * 1069
for j in range(535):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.5

In [23]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  4516
Average Comparisions:  1.2593002657218777  Average time taken in milliseconds:  1.7678135786571791
Unsuccessful Searches:  5484
Average Comparisions:  1.7478118161925602  Average time taken in milliseconds:  1.6302805065152248


### Load Factor ≅ 0.25

In [24]:
hash_table_double2 = [None] * 1069
for j in range(268):
    hash_double_congruential(Hash_values[j])

#### Time Analysis and Number of Key Comparisions for Load Factor ≅ 0.25

In [25]:
value_success = []
value_fail = []
comparisions_success = []
comparisions_fail = []
time_taken_success = []
time_taken_fail = []

for x in range(10000):
  
  j = random.randint(180,420)
  j = j*10
  
  start_time = time.time()
  a = lookup2(j)
  
  temp = (time.time() - start_time)*1000
  
  if(a[1]):
    comparisions_success.append(a[0])
    time_taken_success.append(temp)
    value_success.append(j)
    
  else:
    comparisions_fail.append(a[0])
    time_taken_fail.append(temp)
    value_fail.append(j)

print("Successful Searches: " , len(value_success))
print("Average Comparisions: " , sum(comparisions_success)/len(comparisions_success) , " Average time taken in milliseconds: " , (sum(time_taken_success)/len(time_taken_success))*1000)
print("Unsuccessful Searches: ", len(value_fail))
print("Average Comparisions: " , sum(comparisions_fail)/len(comparisions_fail) , " Average time taken in milliseconds: " , (sum(time_taken_fail)/len(time_taken_fail))*1000)

Successful Searches:  3644
Average Comparisions:  1.1029088913282108  Average time taken in milliseconds:  1.63516935480413
Unsuccessful Searches:  6356
Average Comparisions:  1.2572372561359346  Average time taken in milliseconds:  0.7840130147579858
