Hash Table - Double Hash

In [9]:
import pandas as pd

class HashTable:
    def __init__(self, size=500000):
        self.size = size
        self.table = [None] * self.size

    def convert_string_to_number(self, key):
        result = ""
        for char in key:
            if char.isdigit():
                result += char
            else:
                result += str(ord(char) % 10)
        return int(result)

    def hash_function1(self, key):
        total = self.convert_string_to_number(key)
        return total % self.size

    def hash_function2(self, key):
        total = 0
        for char in key:
            if char.isdigit():
                total += ord(char) % 10
            else:
                total += ord(char) % 7
        return (total % (self.size - 1)) + 1

    def double_hash(self, key, i):
        return (self.hash_function1(key) + i * self.hash_function2(key)) % self.size

    def insert(self, key, value, count_collisions):
        i = 0
        hash_index = self.double_hash(key, i)
        while self.table[hash_index] is not None and self.table[hash_index][0] != key:
            i += 1
            hash_index = self.double_hash(key, i)
        count_collisions += i
        self.table[hash_index] = (key, value)

    def search(self, key):
        i = 0
        hash_index = self.double_hash(key, i)
        while self.table[hash_index] is not None:
            if self.table[hash_index][0] == key:
                return self.table[hash_index][1]
            i += 1
            hash_index = self.double_hash(key, i)
        return None

    def delete(self, key):
        i = 0
        hash_index = self.double_hash(key, i)
        while self.table[hash_index] is not None:
            if self.table[hash_index][0] == key:
                self.table[hash_index] = None
                return True
            i += 1
            hash_index = self.double_hash(key, i)
        return False

    def update(self, key, value):
        i = 0
        hash_index = self.double_hash(key, i)
        while self.table[hash_index] is not None:
            if self.table[hash_index][0] == key:
                self.table[hash_index] = (key, value)
                return True
            i += 1
            hash_index = self.double_hash(key, i)
        return False

    def contains(self, key):
        i = 0
        hash_index = self.double_hash(key, i)
        while self.table[hash_index] is not None:
            if self.table[hash_index][0] == key:
                return True
            i += 1
            hash_index = self.double_hash(key, i)
        return False

# Ví dụ sử dụng HashTable
if __name__ == "__main__":
    data_path = 'D:\Workspace\Python code\Data\Book_raw.csv'
    df = pd.read_csv(data_path)
    hash_table = HashTable()
    count_collisions = 0

    # Chèn các phần tử vào bảng băm từ DataFrame
    for index, row in df.iterrows():
        key_columns = ["ISBN"]
        value_columns = ["Book-Title", "Book-Author", "Year-Of-Publication", "Publisher", "Image-URL-M", "Image-URL-L", "Image-URL-S"]

        # Tạo khóa và giá trị
        key = row[key_columns[0]]
        value = {col: row[col] for col in value_columns}

        # Chèn vào bảng băm
        hash_table.insert(str(key), value, count_collisions)

    print("Tổng số va chạm:", count_collisions)

    # Tìm kiếm một cuốn sách
    search_key = "0385504209"
    result = hash_table.search(search_key)
    if result:
        print(f"Tìm thấy cuốn sách với ISBN {search_key}: {result}")
    else:
        print(f"Không tìm thấy cuốn sách với ISBN {search_key}")

    # Xóa một cuốn sách
    delete_key = "0385504209"
    if hash_table.delete(delete_key):
        print(f"Đã xóa cuốn sách với ISBN {delete_key}")
    else:
        print(f"Không tìm thấy cuốn sách với ISBN {delete_key} để xóa")

    # Cập nhật một cuốn sách
    update_key = "0385504209"
    new_value = {"Book-Title": "New Title", "Book-Author": "New Author", "Year-Of-Publication": "2021", "Publisher": "New Publisher", "Image-URL-M": "new_url_m", "Image-URL-L": "new_url_l", "Image-URL-S": "new_url_s"}
    if hash_table.update(update_key, new_value):
        print(f"Đã cập nhật cuốn sách với ISBN {update_key}")
    else:
        print(f"Không tìm thấy cuốn sách với ISBN {update_key} để cập nhật")

    # Kiểm tra sự tồn tại của một cuốn sách
    check_key = "0385504209"
    if hash_table.contains(check_key):
        print(f"Cuốn sách với ISBN {check_key} tồn tại trong bảng băm")
    else:
        print(f"Cuốn sách với ISBN {check_key} không tồn tại trong bảng băm")


Tổng số va chạm: 0
Tìm thấy cuốn sách với ISBN 0385504209: {'Book-Title': 'The Da Vinci Code', 'Book-Author': 'Dan Brown', 'Year-Of-Publication': '2003', 'Publisher': 'Doubleday', 'Image-URL-M': 'http://images.amazon.com/images/P/0385504209.01.MZZZZZZZ.jpg', 'Image-URL-L': 'http://images.amazon.com/images/P/0385504209.01.LZZZZZZZ.jpg', 'Image-URL-S': 'http://images.amazon.com/images/P/0385504209.01.THUMBZZZ.jpg'}
Đã xóa cuốn sách với ISBN 0385504209
Không tìm thấy cuốn sách với ISBN 0385504209 để cập nhật
Cuốn sách với ISBN 0385504209 không tồn tại trong bảng băm


In [3]:
class Node:
    def __init__(self, book):
        self.book = book
        self.left = None
        self.right = None
        self.height = 1


class AVLTree:
    def __init__(self):
        self.root = None

    def insert(self, book):
        self.root = self._insert(self.root, book)

    def _insert(self, node, book):
        if not node:
            return Node(book)
        elif book.isbn < node.book.isbn:
            node.left = self._insert(node.left, book)
        else:
            node.right = self._insert(node.right, book)

        node.height = 1 + max(self._get_height(node.left), self._get_height(node.right))
        balance = self._get_balance(node)
        # Case 1 - Left Left
        if balance < -1 and book.isbn < node.left.book.isbn:
            return self._right_rotate(node)
        # Case 2 - Right Right
        if balance > 1 and book.isbn > node.right.book.isbn:
            return self._left_rotate(node)
        # Case 3 - Left Right
        if balance < -1 and book.isbn > node.left.book.isbn:
            node.left = self._left_rotate(node.left)
            return self._right_rotate(node)
        # Case 4 - Right Left
        if balance > 1 and book.isbn < node.right.book.isbn:
            node.right = self._right_rotate(node.right)
            return self._left_rotate(node)

        return node

    def _left_rotate(self, z):
        y = z.right
        T2 = y.left

        y.left = z
        z.right = T2

        z.height = 1 + max(self._get_height(z.left), self._get_height(z.right))
        y.height = 1 + max(self._get_height(y.left), self._get_height(y.right))

        return y

    def _right_rotate(self, z):
        y = z.left
        T3 = y.right

        y.right = z
        z.left = T3

        z.height = 1 + max(self._get_height(z.left), self._get_height(z.right))
        y.height = 1 + max(self._get_height(y.left), self._get_height(y.right))

        return y

    def _get_height(self, node):
        if not node:
            return 0
        return node.height

    def _get_balance(self, node):
        if not node:
            return 0
        return self._get_height(node.right) - self._get_height(node.left)

    def search(self, isbn):
        return self._search(self.root, isbn)

    def _search(self, node, isbn):
        if node is None:
            return None
        if node.book.isbn == isbn:
            return node.book
        if isbn < node.book.isbn:
            return self._search(node.left, isbn)
        return self._search(node.right, isbn)

    def delete(self, isbn):
        if self.search(isbn) is not None:
            return self._delete(self.root, isbn)
        else:
            return None

    def _delete(self, node, isbn):
        if not node:
            return node
        if isbn < node.book.isbn:
            node.left = self._delete(node.left, isbn)
        elif isbn > node.book.isbn:
            node.right = self._delete(node.right, isbn)
        else:
            if node.left is None:
                return node.right
            elif node.right is None:
                return node.left

            temp = self._min_value_node(node.right)
            node.book = temp.book
            node.right = self._delete(node.right, temp.book.isbn)

        if not node:
            return node

        node.height = 1 + max(self._get_height(node.left), self._get_height(node.right))
        balance = self._get_balance(node)
        # Left Left
        if balance < -1 and self._get_balance(node.left) <= 0:
            return self._right_rotate(node)
        # Left Right
        if balance < -1 and self._get_balance(node.left) > 0:
            node.left = self._left_rotate(node.left)
            return self._right_rotate(node)
        # Right Right
        if balance > 1 and self._get_balance(node.right) >= 0:
            return self._left_rotate(node)
        # Right Left
        if balance > 1 and self._get_balance(node.right) < 0:
            node.right = self._right_rotate(node.right)
            return self._left_rotate(node)

        return node

    def _min_value_node(self, node):
        current = node
        while current.left is not None:
            current = current.left
        return current

    def in_order_traversal(self, node):
        if node is None:
            return
        self.in_order_traversal(node.left)
        print(node.book.isbn, end="\n")
        self.in_order_traversal(node.right)


import pandas as pd
import time
class Book:
    def __init__(self, isbn, book_title, book_author, year_of_publication, publisher, image_URL_S, image_URL_M, image_URL_L):
        self.isbn = isbn
        self.book_title = book_title
        self.book_author = book_author
        self.year_of_publication = year_of_publication
        self.publisher = publisher
        self.image_URL_S = image_URL_S
        self.image_URL_M = image_URL_M
        self.image_URL_L = image_URL_L
    def __str__(self):
        return (f"ISBN: {self.isbn}\n"
                f"Book Title: {self.book_title}\n"
                f"Author: {self.book_author}\n"
                f"Year of Publication: {self.year_of_publication}\n"
                f"Publisher: {self.publisher}\n"
                f"Image URLs:\n"
                f"  Small: {self.image_URL_S}\n"
                f"  Medium: {self.image_URL_M}\n"
                f"  Large: {self.image_URL_L}\n")

class Hash_Table:
    def __init__(self, size):
        self.size = size
        self.table = [None] * self.size
    def hash_function(self, key):
        return int(key) % self.size
    def insert(self, key, data):
        index = self.hash_function(key)
        if self.table[index] == None:
            self.table[index] = AVLTree()
            self.table[index].insert(data)
        else:
            self.table[index].insert(data)
        return self.table
    def search(self, key):
        index = self.hash_function(key)
        table_current_index = self.table[index]
        if table_current_index is not None:
            return self.table[index].search(key)
        else:
            return None

    def remove(self,key):
        index = self.hash_function(key)
        table_current_index = self.table[index]
        if table_current_index is not None:
            return self.table[index].delete(key)
        return None
def load_data(filename):
    df = pd.read_csv(filename)
    book_list = []  # List to store Book objects
    for index, row in df.iterrows():
        if row['ISBN'].strip().isdigit():
            book = Book(
                isbn=row['ISBN'],
                book_title=row['Book-Title'],
                book_author=row['Book-Author'],
                year_of_publication=row['Year-Of-Publication'],
                publisher=row['Publisher'],
                image_URL_S=row['Image-URL-S'],
                image_URL_M=row['Image-URL-M'],
                image_URL_L=row['Image-URL-L']
            )
            book_list.append(book)
        else:
            continue
    return book_list


if __name__ == "__main__":
    filename = 'D:\Workspace\Python code\Data\Book_raw.csv'
    book_list = load_data(filename)
    hash_table = Hash_Table(len(book_list))
    start_time = time.time()
    count = 0
    for i in book_list:
        hash_table.insert(i.isbn, i)
        count += 1
    end_time = time.time()
    print("Count insert: ", count)
    print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
    print("Build Hash_table Done!")
    while True:
        print("\nMenu:")
        print("1. Insert a book")
        print("2. Search for a book")
        print("3. Delete a book")
        print("4. Current book list")
        print("5. Exit")
        choice = input("Enter your choice: ")
        print()
        if choice == '1':
            isbn = input("Enter ISBN: ")
            book_title = input("Enter Book-Title: ")
            book_author = input("Enter Book-Author: ")
            year_of_publication = input("Enter Year-Of-Publication: ")
            publisher = input("Enter Publisher: ")
            image_URL_S = input("Enter Image-URL-S: ")
            image_URL_M = input("Enter Image-URL-M: ")
            image_URL_L = input("Enter Image-URL-L: ")
            book = Book(isbn, book_title, book_author, year_of_publication, publisher, image_URL_S, image_URL_M, image_URL_L)
            start_time = time.time()
            hash_table.insert(book.isbn,book)
            end_time = time.time()
            print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
            print("Book inserted successfully.")

        elif choice == '2':
            isbn = str(input("Enter ISBN to search: "))
            print()
            start_time = time.time()
            book = hash_table.search(isbn)
            end_time = time.time()
            print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
            if book is not None:
                print(book)
            else:
                print("Book not found.")
        elif choice == '3':
            isbn = str(input("Enter ISBN to delete: "))
            start_time = time.time()
            remove_book = hash_table.remove(isbn)
            end_time = time.time()
            print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
            if remove_book is not None:
                print("Book deleted successfully.")
            else:
                print("Book not found.")

        elif choice == '4':
            isbn = str(input("Enter ISBN to search: "))
            index = hash_table.hash_function(isbn)
            tree = hash_table.table[index]
            if tree:
              tree.in_order_traversal(tree.root)
            else:
              print("No books found for this ISBN.")
        elif choice == '5':
            print("Exiting program.")
            break
        else:
            print("Invalid choice. Please try again.")

Count insert:  249041
Time taken: 1392.406940460205 mili_seconds
Build Hash_table Done!

Menu:
1. Insert a book
2. Search for a book
3. Delete a book
4. Current book list
5. Exit

Exiting program.


Hash Table - Linked_List

In [6]:
import pandas as pd
import time
class Book:
    def __init__(self, isbn, book_title, book_author, year_of_publication, publisher, image_URL_S, image_URL_M, image_URL_L):
        self.isbn = isbn
        self.book_title = book_title
        self.book_author = book_author
        self.year_of_publication = year_of_publication
        self.publisher = publisher
        self.image_URL_S = image_URL_S
        self.image_URL_M = image_URL_M
        self.image_URL_L = image_URL_L
    def __str__(self):
        return (f"ISBN: {self.isbn}\n"
                f"Book Title: {self.book_title}\n"
                f"Author: {self.book_author}\n"
                f"Year of Publication: {self.year_of_publication}\n"
                f"Publisher: {self.publisher}\n"
                f"Image URLs:\n"
                f"  Small: {self.image_URL_S}\n"
                f"  Medium: {self.image_URL_M}\n"
                f"  Large: {self.image_URL_L}\n")
class Linked_List:
  def __init__(self,data):
      self.data = data
      self.next = None
class Hash_Table:
    def __init__(self, size):
        self.size = size
        self.table = [None] * self.size
    def hash_function(self, key):
        return int(key) % self.size
    def insert(self, key, data):
        index = self.hash_function(key)
        if self.table[index] == None:
            self.table[index] = Linked_List(data)
        else:
            table_current_index = self.table[index]
            while table_current_index.next is not None:
                table_current_index = table_current_index.next
            table_current_index.next = Linked_List(data)
        return self.table
    def search(self, key):
        index = self.hash_function(key)
        table_current_index = self.table[index]
        while table_current_index is not None:
            if key == table_current_index.data.isbn:
                return table_current_index.data
            table_current_index = table_current_index.next
        return None
    def remove(self,key):
        index = self.hash_function(key)
        table_current_index = self.table[index]
        prev = None
        while table_current_index is not None:
            if table_current_index.data.isbn == key:
                if prev is None:
                    self.table[index] = table_current_index.next
                else:
                    prev.next = table_current_index.next
                return self.table
            prev = table_current_index
            table_current_index = table_current_index.next
        return None
    def print_data(self):
        count = 0
        for book in self.table:
            table_current_index = book
            while table_current_index is not None:
                count += 1
                print("ISBN: ",table_current_index.data.isbn)
                table_current_index = table_current_index.next
        return count

def load_data(filename):
    df = pd.read_csv(filename)
    book_list = []  # List to store Book objects
    for index, row in df.iterrows():
        if row['ISBN'].strip().isdigit():
            book = Book(
                isbn=row['ISBN'],
                book_title=row['Book-Title'],
                book_author=row['Book-Author'],
                year_of_publication=row['Year-Of-Publication'],
                publisher=row['Publisher'],
                image_URL_S=row['Image-URL-S'],
                image_URL_M=row['Image-URL-M'],
                image_URL_L=row['Image-URL-L']
            )
            book_list.append(book)
        else:
            continue
    return book_list
if __name__ == "__main__":
    filename = 'D:\Workspace\Python code\Data\Book_raw.csv'
    book_list = load_data(filename)
    hash_table = Hash_Table(len(book_list))
    start_time = time.time()
    for i in book_list:
        hash_table.insert(i.isbn, i)
    end_time = time.time()
    print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
    print("Build Hash_table Done!")
    while True:
        print("\nMenu:")
        print("1. Insert a book")
        print("2. Search for a book")
        print("3. Delete a book")
        print("4. Current book list")
        print("5. Exit")
        choice = input("Enter your choice: ")
        print()
        if choice == '1':
            isbn = input("Enter ISBN: ")
            book_title = input("Enter Book-Title: ")
            book_author = input("Enter Book-Author: ")
            year_of_publication = input("Enter Year-Of-Publication: ")
            publisher = input("Enter Publisher: ")
            image_URL_S = input("Enter Image-URL-S: ")
            image_URL_M = input("Enter Image-URL-M: ")
            image_URL_L = input("Enter Image-URL-L: ")
            book = Book(isbn, book_title, book_author, year_of_publication, publisher, image_URL_S, image_URL_M, image_URL_L)
            start_time = time.time()
            hash_table.insert(book.isbn,book)
            end_time = time.time()
            print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
            print("Book inserted successfully.")

        elif choice == '2':
            isbn = str(input("Enter ISBN to search: "))
            print()
            start_time = time.time()
            book = hash_table.search(isbn)
            end_time = time.time()
            print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
            if book is not None:
                print(book)
            else:
                print("Book not found.")
        elif choice == '3':
            isbn = str(input("Enter ISBN to delete: "))
            start_time = time.time()
            remove_book = hash_table.remove(isbn)
            end_time = time.time()
            print(f"Time taken: {(end_time - start_time) * 1000} mili_seconds")
            if remove_book is not None:
                print("Book deleted successfully.")
            else:
                print("Book not found.")

        elif choice == '4':
            count = hash_table.print_data()
            print("Count of list book: ", count)
        elif choice == '5':
            print("Exiting program.")
            break
        else:
            print("Invalid choice. Please try again.")

Time taken: 748.8057613372803 mili_seconds
Build Hash_table Done!

Menu:
1. Insert a book
2. Search for a book
3. Delete a book
4. Current book list
5. Exit

Exiting program.
