In [2]:
!pip install -q langchain-ollama
!pip install -q langchain
!pip install -q langchain_community

In [3]:
!curl https://ollama.ai/install.sh | sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0>>> Downloading ollama...
100 12030    0 12030    0     0  27338      0 --:--:-- --:--:-- --:--:-- 27403
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [4]:
import subprocess
import time
import threading

# Start the ollama server in a new process
process = subprocess.Popen(['ollama', 'serve'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

# Function to print server output
def print_output(process):
    while True:
        output = process.stdout.readline()
        if output == b'' and process.poll() is not None:
            break
        if output:
            print(output.strip().decode('utf-8'))
        time.sleep(1)

# Start a thread to print server output
thread = threading.Thread(target=print_output, args=(process,))
thread.start()

print("Ollama server is running in the background")

Ollama server is running in the background


In [5]:
!ollama pull llama3:latest

Couldn't find '/root/.ollama/id_ed25519'. Generating new private key.
Your new public key is:

ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIO8d4fMVxp80rTnia5SfRFltbi3ZAJiZMvHgRKIUXJS/

[GIN] 2024/08/19 - 14:54:05 | 200 |       47.33µs |       127.0.0.1 | HEAD     "/"
[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest 
pulling 6a0746a1ec1a...   0% ▕▏    0 B/4.7 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling 6a0746a1ec1

In [41]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
import re
import json

In [11]:
llm = OllamaLLM(model="llama3:latest")

In [62]:
template = """
Your task is to extract key information from a travel request.

    Request: "{travel_request}"

    Extract and return the result in the following format:
    {{
        "Type": "...",
        "district": "...",
        "city": "...",
        "number_of_people": "...",
        "Price": "...",
        "Rating": "...",
        "Description": "..."
    }}

    Ensure that the "Type" is one of the following: "Hotel," "Restaurant," or "TouristAttraction." If one or two types are mentioned, return only those. If none are mentioned, include all three types.

    For any information not specified in the travel request, return `null`. Ensure that the JSON result is strictly valid JSON, with no extra text, comments, or parentheses.

    """
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | llm

In [63]:
response = chain.invoke({"Tôi muốn đi du lịch Hà Nội vào cuối tuần này trong 2 ngày với gia đình 4 người bao gồm trẻ em, bạn có thể gợi ý cho tôi một lộ trình các điểm tham quan và nhà hàng nổi tiếng không?"})
cleaned_json_str = re.search(r'\{.*?\}', response, re.DOTALL).group(0)

[GIN] 2024/08/19 - 15:43:48 | 200 |  3.469710166s |       127.0.0.1 | POST     "/api/generate"


In [64]:
cleaned_json_str

'{\n    "Type": ["Tourist Attraction"],\n    "district": null,\n    "city": "Hà Nội",\n    "number_of_people": 4,\n    "Price": null,\n    "Rating": null,\n    "Description": null\n}'

In [66]:
result_dict = json.loads(cleaned_json_str)
result_dict

{'Type': ['Tourist Attraction'],
 'district': None,
 'city': 'Hà Nội',
 'number_of_people': 4,
 'Price': None,
 'Rating': None,
 'Description': None}

In [1]:
import psycopg2

In [90]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")

In [84]:
# Thực thi các lệnh SQL để tạo schema, loại dữ liệu, và bảng
cur = conn.cursor()
cur.execute("""
-- SET search_path TO GROUP_PROJECT;
DROP SCHEMA IF EXISTS travel_database CASCADE;
CREATE SCHEMA IF NOT EXISTS travel_database;
SET search_path TO travel_database;

-- Create a custom type for the Address structure
CREATE TYPE Address AS (
	street TEXT,
	district TEXT,
	city TEXT
);

CREATE TYPE Location AS (
    Latitude DECIMAL(9, 6),
    Longitude DECIMAL(9, 6)
);

-- Create the Hotel table
CREATE TABLE Hotel (
    HotelID SERIAL NOT NULL PRIMARY KEY,
    Name VARCHAR(255),
    Address Address,
    Location Location,
    Rating DECIMAL(2, 1),
    Description TEXT,
    Img_URL JSON,
    Comments JSON
-- price_range JSONB
);

-- Create an index on the district of the Location and Rating columns for the Hotel table
CREATE INDEX HOTEL_IDX_ADDRESS_RATING ON Hotel(((Address).district), Rating);

-- Create the Price table for the Hotel table
CREATE TABLE HotelPrice (
	HPriceID SERIAL NOT NULL PRIMARY KEY,
	HotelID SERIAL NOT NULL,
	RoomType VARCHAR(255),
	Capacity INT,
	Price INT NOT NULL,
	CONSTRAINT hotel_price_foreign
    	FOREIGN KEY (HotelID)
    		REFERENCES travel_database.Hotel (HotelID)
    		ON DELETE CASCADE
   			ON UPDATE NO ACTION

);

-- Create an index on the hotelID for the HotelPrice table
CREATE INDEX IDX_HOTELPRICE_HOTELID ON HotelPrice(HotelID);

-- Create the TouristAttraction table
CREATE TABLE TouristAttraction (
    AttractionID SERIAL NOT NULL PRIMARY KEY,
    Name VARCHAR(255),
    Address Address,
    Location Location,
    AttractionType VARCHAR(255),
    Rating DECIMAL(2, 1),
    Tour_Duration VARCHAR(50),
    Description TEXT,
    Img_URL JSON,
    Comments JSON
);

-- Create an index on the district of the Location and Rating columns for the TouristAttraction table
CREATE INDEX ATTRACTION_IDX_ADDRESS_RATING ON TouristAttraction(((Address).district), Rating);

-- Create the Price table for the TouristAttraction table
CREATE TABLE AttractionPrice (
	APriceID SERIAL NOT NULL PRIMARY KEY,
	AttractionID SERIAL NOT NULL,
	TicketType VARCHAR(255),
	NumberPeople INT,
	Price INT NOT NULL,
	CONSTRAINT attraction_price_foreign
    	FOREIGN KEY (AttractionID)
    		REFERENCES travel_database.TouristAttraction (AttractionID)
    		ON DELETE CASCADE
   			ON UPDATE NO ACTION

);

-- Create an index on the AttractionID for the AttractionPrice table
CREATE INDEX IDX_ATTRACTIONPRICE_ATTRACTIONID ON AttractionPrice(AttractionID);

-- Create the Restaurant table
CREATE TABLE Restaurant (
    ResID SERIAL NOT NULL PRIMARY KEY,
    Name VARCHAR(255),
    Address Address,
    Location Location,
    Rating DECIMAL(2, 1),
    -- Price_Range VARCHAR(50),
    Description TEXT,
    Img_URL VARCHAR(255),
    Comments JSON
);

-- Create an index on the district of the Location and Rating columns for the Restaurant table
CREATE INDEX RES_IDX_ADDRESS_RATING ON Restaurant(((Address).district), Rating);
""")

# Cam kết các thay đổi vào cơ sở dữ liệu
conn.commit()


In [4]:
import json

In [None]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
# Tạo con trỏ để thực thi các câu lệnh SQL
cur = conn.cursor()

# Đặt schema hiện tại
cur.execute("SET search_path TO travel_database;")

# Kiểm tra xem bảng Hotel có tồn tại không
cur.execute("SELECT * FROM information_schema.tables WHERE table_name = 'hotel';")
if not cur.fetchone():
    print("Bảng 'Hotel' không tồn tại trong schema 'travel_database'.")
else:
# Dữ liệu cần chèn vào bảng Hotel
    data_1 = [
        ('Grand Hotel', ('123 Main St', 'Ba Đình', 'Hà Nội'), (21.0285, 105.8542), 4.5, 'Luxury hotel with great amenities.', json.dumps(["https://example.com/image1.jpg"]), json.dumps(["asdsadsd"])),
    ]

    data_2 = [
        ('Elite Suites', ('808 Elite Ave', 'Hà Đông', 'Hà Nội'), (21.0295, 105.8552), 4.8, 'Exclusive suites with top-notch services.', json.dumps(["https://example.com/image11.jpg"]),json.dumps(["asdsadsd"]))
    ]

    # Chèn dữ liệu vào bảng Hotel
    cur.executemany("""
    INSERT INTO Hotel (Name, Address, Location, Rating, Description, Img_URL, Comments) VALUES (%s, %s, %s, %s, %s, %s, %s)
    """, data_1)

    # Cam kết các thay đổi vào cơ sở dữ liệu
    conn.commit()

    # Chèn dữ liệu bổ sung
    cur.executemany("""
    INSERT INTO Hotel (Name, Address, Location, Rating, Description, Img_URL, Comments) VALUES (%s, %s, %s, %s, %s, %s, %s)
    """, data_2)

    # Cam kết các thay đổi vào cơ sở dữ liệu
    conn.commit()
cur.close()
conn.close()

In [14]:
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
# Tạo con trỏ để thực thi các câu lệnh SQL
cur = conn.cursor()

# Đặt schema hiện tại
cur.execute("SET search_path TO travel_database;")

abc = {
    'Type': ['Tourist Attraction'],
    'District': "Ba Đình",
    'City': 'Hà Nội',
    'number_of_people': 3,
    'Price': None,
    'Rating': None,
    'Description': None
}

# Thực hiện truy vấn dữ liệu để kiểm tra
cur.execute("""
SELECT
    h.HotelID,
    h.Name,
    (h.Location).Latitude AS Latitude,
    (h.Location).Longitude AS Longitude,
    h.Rating,
    h.Description,
    h.Img_URL,
    h.Comments,
    p.Price,
    p.RoomType,
    p.Capacity
FROM
    travel_database.Hotel h
JOIN
    travel_database.HotelPrice p ON h.HotelID = p.HotelID
WHERE
    (h.Rating = %s OR %s IS NULL) AND
    (p.Price = %s OR %s IS NULL) AND
    ((h.Address).district = %s OR %s IS NULL) AND
    ((h.Address).city = %s OR %s IS NULL) AND
    (h.Description = %s OR %s IS NULL);
""",
(
    abc['Rating'],
    abc['Rating'],  # Tham số cho điều kiện Rating
    abc['Price'],
    abc['Price'],   # Tham số cho điều kiện Price
    abc['District'],
    abc['District'],# Tham số cho điều kiện District
    abc['City'],
    abc['City'],    # Tham số cho điều kiện City
    abc['Description'],
    abc['Description']  # Tham số cho điều kiện Description
))

# Lấy kết quả và in ra
rows = cur.fetchall()
for row in rows:
    print(row)
cur.close()
conn.close()

In [24]:
import psycopg2
import json

# Kết nối đến cơ sở dữ liệu
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
cur = conn.cursor()

# Đặt schema hiện tại
cur.execute("SET search_path TO travel_database;")

# Kiểm tra xem bảng Hotel có tồn tại không
cur.execute("SELECT * FROM information_schema.tables WHERE table_name = 'hotel';")
if not cur.fetchone():
    print("Bảng 'Hotel' không tồn tại trong schema 'travel_database'.")
else:
    abc = {
    'Type': ['Tourist Attraction'],
    'District': "Cầu Giấy",
    'City': 'Hà Nội',
    'number_of_people': 3,
    'Price': None,
    'Rating': 4.5,
    'Description': None
    }
    # Thực hiện truy vấn SELECT đơn giản để kiểm tra dữ liệu
    cur.execute("""
    SELECT
        h.HotelID,
        h.Name,
        (h.Location).Latitude AS Latitude,
        (h.Location).Longitude AS Longitude,
        h.Rating,
        h.Description,
        h.Img_URL,
        h.Comments,
        p.Price,
        p.RoomType,
        p.Capacity
    FROM
        travel_database.Hotel h
    JOIN
        travel_database.HotelPrice p ON h.HotelID = p.HotelID
    WHERE
    (h.Rating = %s OR %s IS NULL) AND
    ((h.Address).district = %s OR %s IS NULL)
    """,(
        abc['Rating'],
        abc['Rating'],
        abc['District'],
        abc['District']
    ))

    # Lấy kết quả và in ra
    rows = cur.fetchall()
    print(type(rows))
    for row in rows:
        print(row)

cur.close()
conn.close()


<class 'list'>
(1, 'Sông Hồng', Decimal('21.028500'), Decimal('105.854200'), Decimal('4.5'), 'Cao cấp', ['https://example.com/image1.jpg'], ['tốt'], 3000000, 'Luxury', 2)
(1, 'Sông Hồng', Decimal('21.028500'), Decimal('105.854200'), Decimal('4.5'), 'Cao cấp', ['https://example.com/image1.jpg'], ['tốt'], 1000000, 'Basic', 2)


In [25]:
!pip install -q sentence-transformers

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/227.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.7/227.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [28]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/6.46k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/540M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/22.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

In [36]:
import psycopg2
import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Kết nối đến cơ sở dữ liệu
conn = psycopg2.connect("postgresql://travel_owner:9oaCOWkgBX4L@ep-odd-sun-a1sblbbw.ap-southeast-1.aws.neon.tech/travel?sslmode=require")
cur = conn.cursor()

# Đặt schema hiện tại
cur.execute("SET search_path TO travel_database;")

# Kiểm tra xem bảng Hotel có tồn tại không
cur.execute("SELECT * FROM information_schema.tables WHERE table_name = 'hotel';")
if not cur.fetchone():
    print("Bảng 'Hotel' không tồn tại trong schema 'travel_database'.")
else:
    abc = {
        'Type': ['Tourist Attraction'],
        'District': "Cầu Giấy",
        'City': 'Hà Nội',
        'number_of_people': 3,
        'Price': None,
        'Rating': 4.5,
        'Description': 'Cao cấp nhất thế giới'
    }

    # Thực hiện truy vấn SELECT đơn giản để kiểm tra dữ liệu
    cur.execute("""
    SELECT
        h.HotelID,
        h.Name,
        (h.Location).Latitude AS Latitude,
        (h.Location).Longitude AS Longitude,
        h.Rating,
        h.Description,
        h.Img_URL,
        h.Comments,
        p.Price,
        p.RoomType,
        p.Capacity
    FROM
        travel_database.Hotel h
    JOIN
        travel_database.HotelPrice p ON h.HotelID = p.HotelID
    WHERE
    (h.Rating = %s OR %s IS NULL) AND
    ((h.Address).district = %s OR %s IS NULL)
    """,(
        abc['Rating'],
        abc['Rating'],
        abc['District'],
        abc['District']
    ))

    # Lấy kết quả
    rows = cur.fetchall()

    # Tạo mô hình SentenceTransformer
    model = SentenceTransformer('bkai-foundation-models/vietnamese-bi-encoder')

    # Tạo embedding cho description trong abc
    abc_description = abc['Description'] if abc['Description'] else ""
    abc_embedding = model.encode([abc_description])[0]

    # Tạo danh sách chứa mô tả và ID của các khách sạn
    descriptions = []
    hotel_ids = []
    hotel_names = []

    for row in rows:
        hotel_id, name, latitude, longitude, rating, description, img_url, comments, price, room_type, capacity = row
        descriptions.append(description)
        hotel_ids.append(hotel_id)
        hotel_names.append(name)

    # Tạo embedding cho các mô tả từ cơ sở dữ liệu
    embeddings = model.encode(descriptions)

    # Tính toán độ tương đồng cosine
    similarities = cosine_similarity([abc_embedding], embeddings)[0]

    # Kết hợp ID khách sạn, tên khách sạn, mô tả và độ tương đồng
    results = list(zip(hotel_ids, hotel_names, descriptions, similarities))

    # Sắp xếp kết quả theo độ tương đồng giảm dần
    results.sort(key=lambda x: x[3], reverse=True)

    # In kết quả
    print(f"Mô tả từ yêu cầu người dùng: '{abc_description}'")
    for hotel_id, name, description, similarity in results:
        print(f"Hotel ID: {hotel_id}")
        print(f"Hotel Name: {name}")
        print(f"Description: {description}")
        print(f"Similarity: {similarity:.4f}")
        print("-" * 50)

cur.close()
conn.close()


Mô tả từ yêu cầu người dùng: 'Cao cấp nhất thế giới'
Hotel ID: 2
Hotel Name: Tây Sơn
Description: Cao cấp
Similarity: 0.5107
--------------------------------------------------
Hotel ID: 1
Hotel Name: Sông Hồng
Description: Bình dân
Similarity: 0.0529
--------------------------------------------------
