<a href="https://colab.research.google.com/github/nkcong206/Travel-Recommendation-System/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q langchain-ollama
!pip install -q langchain
!pip install -q langchain_community
!pip install -q sentence-transformers

In [2]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import re
import json
import subprocess
import time
import threading
import psycopg2

  from tqdm.autonotebook import tqdm, trange


In [3]:
model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [4]:
!curl https://ollama.ai/install.sh | sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 12030    0 12030    0     0  31338      0 --:--:-- --:--:-- --:--:-- 31409
>>> Downloading ollama...
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [5]:
# Start the ollama server in a new process
process = subprocess.Popen(['ollama', 'serve'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Function to print server output
def print_output(process):
    while True:
        output = process.stdout.readline()
        if output == b'' and process.poll() is not None:
            break
        if output:
            print(output.strip().decode('utf-8'))
        time.sleep(1)

# Start a thread to print server output
thread = threading.Thread(target=print_output, args=(process,))
thread.start()

print("Ollama server is running in the background")

Ollama server is running in the background


In [6]:
!ollama pull llama3:latest

[GIN] 2024/08/19 - 18:03:30 | 200 |      78.694µs |       127.0.0.1 | HEAD     "/"
[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manife

In [7]:
llm = OllamaLLM(model="llama3:latest")

In [8]:
template = """
Your task is to extract key information from a travel request.

    Request: "{travel_request}"

    Extract and return the result in the following format:
    {{
        "Type": "...",
        "District": "...",
        "City": "...",
        "Number_of_people": "...",
        "Price": "...",
        "Rating": "...",
        "Description": "..."
    }}

    Ensure that the "Type" is one of the following: "Hotel," "Restaurant," or "TouristAttraction." If one or two types are mentioned, return only those. If none are mentioned, include all three types.

    For any information not specified in the travel request, return `null`. Ensure that the JSON result is strictly valid JSON, with no extra text, comments, or parentheses.

    """
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | llm

In [20]:
query = """
Gợi ý cho tôi 1 khách sạn ở Cầu Giấy, Hà Nội với chất lượng dịch vụ cao cấp!
"""
response = chain.invoke(query)
cleaned_json_str = re.search(r'\{.*?\}', response, re.DOTALL).group(0)
result_dict = json.loads(cleaned_json_str)
result_dict

[GIN] 2024/08/19 - 17:39:12 | 200 |  3.806090822s |       127.0.0.1 | POST     "/api/generate"


In [None]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
cur = conn.cursor()
cur.execute("""
-- SET search_path TO GROUP_PROJECT;
DROP SCHEMA IF EXISTS travel_database CASCADE;
CREATE SCHEMA IF NOT EXISTS travel_database;
SET search_path TO travel_database;

-- Create a custom type for the Address structure
CREATE TYPE Address AS (
	street TEXT,
	district TEXT,
	city TEXT
);

CREATE TYPE Location AS (
    Latitude DECIMAL(9, 6),
    Longitude DECIMAL(9, 6)
);

-- Create the Hotel table
CREATE TABLE Hotel (
    HotelID SERIAL NOT NULL PRIMARY KEY,
    Name VARCHAR(255),
    Address Address,
    Location Location,
    Rating DECIMAL(2, 1),
    Description TEXT,
    Img_URL JSON,
    Comments JSON
-- price_range JSONB
);

-- Create an index on the district of the Location and Rating columns for the Hotel table
CREATE INDEX HOTEL_IDX_ADDRESS_RATING ON Hotel(((Address).district), Rating);

-- Create the Price table for the Hotel table
CREATE TABLE HotelPrice (
	HPriceID SERIAL NOT NULL PRIMARY KEY,
	HotelID SERIAL NOT NULL,
	RoomType VARCHAR(255),
	Capacity INT,
	Price INT NOT NULL,
	CONSTRAINT hotel_price_foreign
    	FOREIGN KEY (HotelID)
    		REFERENCES travel_database.Hotel (HotelID)
    		ON DELETE CASCADE
   			ON UPDATE NO ACTION

);

-- Create an index on the hotelID for the HotelPrice table
CREATE INDEX IDX_HOTELPRICE_HOTELID ON HotelPrice(HotelID);

-- Create the TouristAttraction table
CREATE TABLE TouristAttraction (
    AttractionID SERIAL NOT NULL PRIMARY KEY,
    Name VARCHAR(255),
    Address Address,
    Location Location,
    AttractionType VARCHAR(255),
    Rating DECIMAL(2, 1),
    Tour_Duration VARCHAR(50),
    Description TEXT,
    Img_URL JSON,
    Comments JSON
);

-- Create an index on the district of the Location and Rating columns for the TouristAttraction table
CREATE INDEX ATTRACTION_IDX_ADDRESS_RATING ON TouristAttraction(((Address).district), Rating);

-- Create the Price table for the TouristAttraction table
CREATE TABLE AttractionPrice (
	APriceID SERIAL NOT NULL PRIMARY KEY,
	AttractionID SERIAL NOT NULL,
	TicketType VARCHAR(255),
	NumberPeople INT,
	Price INT NOT NULL,
	CONSTRAINT attraction_price_foreign
    	FOREIGN KEY (AttractionID)
    		REFERENCES travel_database.TouristAttraction (AttractionID)
    		ON DELETE CASCADE
   			ON UPDATE NO ACTION

);

-- Create an index on the AttractionID for the AttractionPrice table
CREATE INDEX IDX_ATTRACTIONPRICE_ATTRACTIONID ON AttractionPrice(AttractionID);

-- Create the Restaurant table
CREATE TABLE Restaurant (
    ResID SERIAL NOT NULL PRIMARY KEY,
    Name VARCHAR(255),
    Address Address,
    Location Location,
    Rating DECIMAL(2, 1),
    -- Price_Range VARCHAR(50),
    Description TEXT,
    Img_URL VARCHAR(255),
    Comments JSON
);

-- Create an index on the district of the Location and Rating columns for the Restaurant table
CREATE INDEX RES_IDX_ADDRESS_RATING ON Restaurant(((Address).district), Rating);
""")

# Cam kết các thay đổi vào cơ sở dữ liệu
conn.commit()


In [None]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
# Tạo con trỏ để thực thi các câu lệnh SQL
cur = conn.cursor()

# Đặt schema hiện tại
cur.execute("SET search_path TO travel_database;")

# Kiểm tra xem bảng Hotel có tồn tại không
cur.execute("SELECT * FROM information_schema.tables WHERE table_name = 'hotel';")
if not cur.fetchone():
    print("Bảng 'Hotel' không tồn tại trong schema 'travel_database'.")
else:
# Dữ liệu cần chèn vào bảng Hotel
    data_1 = [
        ('Grand Hotel', ('123 Main St', 'Ba Đình', 'Hà Nội'), (21.0285, 105.8542), 4.5, 'Luxury hotel with great amenities.', json.dumps(["https://example.com/image1.jpg"]), json.dumps(["asdsadsd"])),
    ]

    data_2 = [
        ('Elite Suites', ('808 Elite Ave', 'Cầu Giấy', 'Hà Nội'), (21.0295, 105.8552), 4.8, 'Exclusive suites with top-notch services.', json.dumps(["https://example.com/image11.jpg"]),json.dumps(["asdsadsd"]))
    ]

    # Chèn dữ liệu vào bảng Hotel
    cur.executemany("""
    INSERT INTO Hotel (Name, Address, Location, Rating, Description, Img_URL, Comments) VALUES (%s, %s, %s, %s, %s, %s, %s)
    """, data_1)

    # Cam kết các thay đổi vào cơ sở dữ liệu
    conn.commit()

    # Chèn dữ liệu bổ sung
    cur.executemany("""
    INSERT INTO Hotel (Name, Address, Location, Rating, Description, Img_URL, Comments) VALUES (%s, %s, %s, %s, %s, %s, %s)
    """, data_2)

    # Cam kết các thay đổi vào cơ sở dữ liệu
    conn.commit()
cur.close()
conn.close()

In [None]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")

cur = conn.cursor()

cur.execute("SET search_path TO travel_database;")

result_dict = {
    'Type': ['Tourist Attraction'],
    'District': "Ba Đình",
    'City': 'Hà Nội',
    'number_of_people': 3,
    'Price': None,
    'Rating': None,
    'Description': None
}


cur.execute("""
SELECT
    h.HotelID,
    h.Name,
    (h.Location).Latitude AS Latitude,
    (h.Location).Longitude AS Longitude,
    h.Rating,
    h.Description,
    h.Img_URL,
    h.Comments,
    p.Price,
    p.RoomType,
    p.Capacity
FROM
    travel_database.Hotel h
JOIN
    travel_database.HotelPrice p ON h.HotelID = p.HotelID
WHERE
    (h.Rating = %s OR %s IS NULL) AND
    (p.Price = %s OR %s IS NULL) AND
    ((h.Address).district = %s OR %s IS NULL) AND
    ((h.Address).city = %s OR %s IS NULL) AND
    (h.Description = %s OR %s IS NULL);
""",
(
    result_dict['Rating'],
    result_dict['Rating'],
    result_dict['Price'],
    result_dict['Price'],
    result_dict['District'],
    result_dict['District'],
    result_dict['City'],
    result_dict['City'],
    result_dict['Description'],
    result_dict['Description']
))

rows = cur.fetchall()
for row in rows:
    print(row)
cur.close()
conn.close()

In [None]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
cur = conn.cursor()

# Đặt schema hiện tại
cur.execute("SET search_path TO travel_database;")

# Kiểm tra xem bảng Hotel có tồn tại không
cur.execute("SELECT * FROM information_schema.tables WHERE table_name = 'hotel';")
if not cur.fetchone():
    print("Bảng 'Hotel' không tồn tại trong schema 'travel_database'.")
else:
    result_dict = {
    'Type': ['Tourist Attraction'],
    'District': "Cầu Giấy",
    'City': 'Hà Nội',
    'number_of_people': 3,
    'Price': None,
    'Rating': 4.5,
    'Description': None
    }
    # Thực hiện truy vấn SELECT đơn giản để kiểm tra dữ liệu
    cur.execute("""
    SELECT
        h.HotelID,
        h.Name,
        (h.Location).Latitude AS Latitude,
        (h.Location).Longitude AS Longitude,
        h.Rating,
        h.Description,
        h.Img_URL,
        h.Comments,
        p.Price,
        p.RoomType,
        p.Capacity
    FROM
        travel_database.Hotel h
    JOIN
        travel_database.HotelPrice p ON h.HotelID = p.HotelID
    WHERE
    (h.Rating = %s OR %s IS NULL) AND
    ((h.Address).district = %s OR %s IS NULL)
    """,(
        result_dict['Rating'],
        result_dict['Rating'],
        result_dict['District'],
        result_dict['District']
    ))

    # Lấy kết quả và in ra
    rows = cur.fetchall()
    print(type(rows))
    for row in rows:
        print(row)

cur.close()
conn.close()


<class 'list'>
(1, 'Sông Hồng', Decimal('21.028500'), Decimal('105.854200'), Decimal('4.5'), 'Cao cấp', ['https://example.com/image1.jpg'], ['tốt'], 3000000, 'Luxury', 2)
(1, 'Sông Hồng', Decimal('21.028500'), Decimal('105.854200'), Decimal('4.5'), 'Cao cấp', ['https://example.com/image1.jpg'], ['tốt'], 1000000, 'Basic', 2)


In [13]:
# Gợi ý cho tôi 1 khách sạn ở Cầu Giấy, Hà Nội với chất lượng dịch vụ cao cấp!
query = input("Nhập câu truy vấn của bạn: ")

Nhập câu truy vấn của bạn: Gợi ý cho tôi 1 khách sạn ở Cầu Giấy, Hà Nội với chất lượng dịch vụ bình dân thôi, không cần cao cấp!


In [14]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
cur = conn.cursor()

cur.execute("SET search_path TO travel_database;")

cur.execute("SELECT * FROM information_schema.tables WHERE table_name = 'hotel';")
if not cur.fetchone():
    print("Bảng 'Hotel' không tồn tại trong schema 'travel_database'.")
else:

    response = chain.invoke(query)
    cleaned_json_str = re.search(r'\{.*?\}', response, re.DOTALL).group(0)
    result_dict = json.loads(cleaned_json_str)

    cur.execute("""
    SELECT
        h.HotelID,
        h.Name,
        (h.Address).street AS Street,
        (h.Address).district AS District,
        (h.Address).city AS City,
        h.Rating,
        h.Description,
        h.Comments,
        p.Price,
        p.RoomType,
        p.Capacity
    FROM
        travel_database.Hotel h
    JOIN
        travel_database.HotelPrice p ON h.HotelID = p.HotelID
    WHERE
    ((h.Address).district = %s OR %s IS NULL) AND
    ((h.Address).city = %s OR %s IS NULL)
    """,(
        result_dict['District'],
        result_dict['District'],
        result_dict['City'],
        result_dict['City']
    ))

    # Lấy kết quả và so sánh mô tả
    rows = cur.fetchall()
    descriptions = [row[6] for row in rows]
    descriptions_with_info = []

    # Mô tả từ result
    result_description = result_dict['Description'] if result_dict['Description'] else ""
    result_embedding = model.encode([result_description])

    # So sánh và sắp xếp kết quả
    for row in rows:
        description = row[6]
        embedding = model.encode([description])
        similarity = cosine_similarity(result_embedding, embedding)[0][0]
        descriptions_with_info.append((row, description, similarity))

    # Sắp xếp theo mức độ liên quan giảm dần
    descriptions_with_info.sort(key=lambda x: x[2], reverse=True)

    print(f"Yêu cầu người dùng: {query}")
    for info in descriptions_with_info:
        row, description, similarity = info
        print(f"HotelID: {row[0]}")
        print(f"Name: {row[1]}")
        print(f"Address: {row[2]}, {row[3]}, {row[4]}")
        print(f"Rating: {row[5]}")
        print(f"Description: {row[6]}")
        print(f"Similarity: {similarity:.4f}")
        print(f"Comments: {row[7]}")
        print(f"Price: {row[8]}")
        print(f"RoomType: {row[9]}")
        print(f"Capacity: {row[10]}")
        print("-" * 40)


cur.close()
conn.close()


[GIN] 2024/08/19 - 18:07:14 | 200 |  3.230104144s |       127.0.0.1 | POST     "/api/generate"
Yêu cầu người dùng: Gợi ý cho tôi 1 khách sạn ở Cầu Giấy, Hà Nội với chất lượng dịch vụ bình dân thôi, không cần cao cấp!
HotelID: 1
Name: Sông Hồng
Address: 123 PVĐ, Cầu Giấy, Hà Nội
Rating: 4.5
Description: Bình dân
Similarity: 0.4834
Comments: ['Cũng bình thường']
Price: 3000000
RoomType: Luxury
Capacity: 2
----------------------------------------
HotelID: 2
Name: Tây Sơn
Address: 808 Cổ Nhuế, Cầu Giấy, Hà Nội
Rating: 4.5
Description: Cao cấp
Similarity: 0.4393
Comments: ['KS này chất']
Price: 5000000
RoomType: Luxury
Capacity: 2
----------------------------------------
