<a href="https://colab.research.google.com/github/ruslanmv/watsonx-with-multimodal-llava/blob/master/6_Watsonx_Multimodal_Chat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Hotel Recommendation with Multimodal and WatsonX
#### Developed by Ruslan Magana

In [1]:
!pip install --upgrade -q accelerate bitsandbytes
!pip install git+https://github.com/huggingface/transformers.git
!pip install datasets
!pip install gradio_multimodalchatbot
!pip install haversine
!pip install langchain
!pip install langchain_community
!pip install langchain_ibm
!pip install python-dotenv

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.1/315.1 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting git+https://github.com/huggingface/transformers.git
  Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-jdk5mru1
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-jdk5mru1
  Resolved https://github.com/huggingface/transformers.git to commit 54b7703682aee4bc46817ebce96fdbdfcc82e262
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: transformers
  Building wheel for transformers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for transformers: filename=transformers-4.45.0.d

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [1]:
import gradio as gr
from gradio_multimodalchatbot import MultimodalChatbot
from gradio.data_classes import FileData
import os
import pandas as pd
import requests
from PIL import Image, UnidentifiedImageError
from io import BytesIO
import matplotlib.pyplot as plt
import urllib3
from transformers import pipeline
from transformers import BitsAndBytesConfig
import torch
import textwrap
import pandas as pd
import numpy as np
from haversine import haversine  # Install haversine library: pip install haversine
from transformers import AutoProcessor, LlavaForConditionalGeneration
from transformers import BitsAndBytesConfig
import torch
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer
from transformers import AutoImageProcessor
from datasets import load_dataset
from geopy.geocoders import Nominatim
import pyarrow
from dotenv import load_dotenv
from os import environ, getenv
from getpass import getpass
from pydantic import BaseModel
from langchain.prompts import PromptTemplate
from langchain_ibm import WatsonxLLM

In [2]:
# Load environment variables from .env file
load_dotenv()

# Function to set environment variables
def set_env(var: str):
    env_var = getenv(var)
    if not env_var:
        env_var = getpass(f"{var}: ")
        environ[var] = env_var
    return env_var

# Define IBM connection parameters
class IbmConnectionParams(BaseModel):
    api_key: str
    project_id: str
    url: str
    credentials: dict[str, str]

    def __init__(self, api_key: str, project_id: str, url: str) -> None:
        super().__init__(api_key=api_key, project_id=project_id, url=url, credentials={"url": url, "apikey": api_key})

# Load IBM connection parameters from environment variables
def load_connection_params() -> IbmConnectionParams:
    api_key = set_env("WATSONX_API_KEY")
    project_id = set_env("PROJECT_ID")
    url = set_env("WATSONX_URL")

    return IbmConnectionParams(api_key=api_key, project_id=project_id, url=url)

connection_params: IbmConnectionParams = load_connection_params()


# Define parameters for the model
parameters = {
    "decoding_method": "sample",
    "max_new_tokens": 300,
    "min_new_tokens": 1,
    "temperature": 0.5,
    "top_k": 50,
    "top_p": 1,
}

# Initialize the WatsonxLLM model
watsonx_llm = WatsonxLLM(
    model_id="meta-llama/llama-3-70b-instruct",
    apikey=connection_params.api_key,
    url=connection_params.url,
    project_id=connection_params.project_id,
    params=parameters,
)
#watsonx_llm.invoke("What is the capital of Italy?")


# Ensure data files are available
current_directory = os.getcwd()
geocoded_hotels_path = os.path.join(current_directory, 'geocoded_hotels.csv')
csv_file_path = os.path.join(current_directory, 'hotel_multimodal.csv')

# Load geocoded hotels data
if not os.path.isfile(geocoded_hotels_path):
    url = 'https://github.com/ruslanmv/watsonx-with-multimodal-llava/raw/master/geocoded_hotels.csv'
    response = requests.get(url)
    if response.status_code == 200:
        with open(geocoded_hotels_path, 'wb') as f:
            f.write(response.content)
        print(f"File {geocoded_hotels_path} downloaded successfully!")
    else:
        print(f"Error downloading file. Status code: {response.status_code}")
else:
    print(f"File {geocoded_hotels_path} already exists.")
geocoded_hotels = pd.read_csv(geocoded_hotels_path)

# Load hotel dataset
if not os.path.exists(csv_file_path):
    dataset = load_dataset("ruslanmv/hotel-multimodal")
    df_hotels = dataset['train'].to_pandas()
    df_hotels.to_csv(csv_file_path, index=False)
    print("Dataset downloaded and saved as CSV.")
else:
    df_hotels = pd.read_csv(csv_file_path)

def get_current_location():
    try:
        response = requests.get('https://ipinfo.io/json')
        data = response.json()
        location = data.get('loc', '')
        if location:
            return map(float, location.split(','))
        else:
            return None, None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None, None

def get_coordinates(location_name):
    geolocator = Nominatim(user_agent="coordinate_finder")
    location = geolocator.geocode(location_name)
    if location:
        return location.latitude, location.longitude
    else:
        return None

def find_nearby(place=None):
    if place:
        coordinates = get_coordinates(place)
        if coordinates:
            latitude, longitude = coordinates
            print(f"The coordinates of {place} are: Latitude: {latitude}, Longitude: {longitude}")
        else:
            print(f"Location not found: {place}")
            return None
    else:
        latitude, longitude = get_current_location()
        if not latitude or not longitude:
            print("Could not retrieve the current location.")
            return None

    geocoded_hotels['distance_km'] = geocoded_hotels.apply(
        lambda row: haversine((latitude, longitude), (row['latitude'], row['longitude'])),
        axis=1
    )

    closest_hotels = geocoded_hotels.sort_values(by='distance_km').head(5)
    print("The 5 closest locations are:\n")
    print(closest_hotels)
    return closest_hotels

# Suppress InsecureRequestWarning
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Define the respond function
def search_hotel(place=None):
    df_found = find_nearby(place)
    if df_found is None:
        return pd.DataFrame()

    hotel_ids = df_found["hotel_id"].values.tolist()
    filtered_df = df_hotels[df_hotels['hotel_id'].isin(hotel_ids)]

    filtered_df.loc[:, 'hotel_id'] = pd.Categorical(filtered_df['hotel_id'], categories=hotel_ids, ordered=True)
    filtered_df = filtered_df.sort_values('hotel_id').reset_index(drop=True)
    grouped_df = filtered_df.groupby('hotel_id', observed=True).head(2)
    description_data = []

    for index, row in grouped_df.iterrows():
        hotel_id = row['hotel_id']
        hotel_name = row['hotel_name']
        image_url = row['image_url']

        try:
            response = requests.get(image_url, verify=False)
            response.raise_for_status()
            img = Image.open(BytesIO(response.content))
            prompt = "USER: <image>\nAnalyze this image. Give me feedback on whether this hotel is worth visiting based on the picture. Provide a summary review.\nASSISTANT:"
            outputs = pipe_image_to_text(img, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
            description = outputs[0]["generated_text"].split("\nASSISTANT:")[-1].strip()
            description_data.append({'hotel_name': hotel_name, 'hotel_id': hotel_id, 'image': img, 'description': description})
        except (requests.RequestException, UnidentifiedImageError):
            print(f"Skipping image at URL: {image_url}")

    return pd.DataFrame(description_data)

# Constants
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
MODEL_ID = "llava-hf/llava-1.5-7b-hf"


# Print device and memory info
print(f"Using device: {DEVICE}")
print(f"Low memory: {LOW_MEMORY}")

# Quantization configuration for efficient model loading
# Define BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

# Load the tokenizer associated with your 'MODEL_ID'
tokenizer_image_to_text = AutoTokenizer.from_pretrained(MODEL_ID)
# Load the image processor associated with your 'MODEL_ID'
image_processor = AutoImageProcessor.from_pretrained(MODEL_ID)
# Load models only once
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = LlavaForConditionalGeneration.from_pretrained(MODEL_ID, quantization_config=quantization_config, device_map="auto")
# Pass the tokenizer, image processor explicitly to the pipeline
pipe_image_to_text = pipeline("image-to-text", model=model, tokenizer=tokenizer_image_to_text, image_processor=image_processor, model_kwargs={"quantization_config": quantization_config})


def multimodal_results(description_df):
    conversation = []
    for _, row in description_df.iterrows():
        hotel_name = row['hotel_name']
        description = row['description']
        img = row['image']

        img_path = f"{hotel_name}.png"
        img.save(img_path)

        bot_msg = {
            "text": f"Here is {hotel_name}. {description}",
            "files": [{"file": FileData(path=img_path)}]
        }

        conversation.append([{"text": "", "files": []}, bot_msg])

    return conversation


#user_input="Genova Italy"
#description_df = search_hotel(user_input)
#hotel_conversation = multimodal_results(description_df)

def grouped_description(description_df):
    grouped_descriptions = description_df.groupby('hotel_id')['description'].apply(lambda x: ' '.join(x.astype(str))).reset_index()
    result_df = pd.merge(grouped_descriptions, description_df[['hotel_id', 'hotel_name']], on='hotel_id', how='left')
    result_df = result_df.drop_duplicates(subset='hotel_id', keep='first')
    result_df = result_df[['hotel_name', 'hotel_id', 'description']]
    return result_df

def create_prompt_result(result_df):
    prompt = ""
    for _, row in result_df.iterrows():
        hotel_name = row['hotel_name']
        hotel_id = row['hotel_id']
        description = row['description']
        prompt += f"Hotel Name: {hotel_name}\nHotel ID: {hotel_id}\nDescription: {description}\n\n"
    return prompt



def build_prompt(context_result):
    hotel_recommendation_template = """
<s>[INST] <<SYS>>
You are a helpful and informative chatbot assistant.
<</SYS>>
Based on the following hotel descriptions, recommend the best hotel:
{context_result}
[/INST]
"""
    return hotel_recommendation_template.format(context_result=context_result)
# Define the respond function using WatsonxLLM
def generate_text_response(prompt):
    response = watsonx_llm.invoke(prompt)
    return response


def llm_results(description_df):
    result_df = grouped_description(description_df)
    context_result = create_prompt_result(result_df)
    recommendation_prompt = build_prompt(context_result)
    result = generate_text_response(recommendation_prompt)
    conversation = [[{"text": "Based on your search...", "files": []}, {"text": f"**My recommendation:** {result}", "files": []}]]
    return conversation

#final_recommendation = llm_results(description_df)
#final_recommendation

def chatbot_response(user_input, conversation):
    bot_initial_message = {
        "text": f"Looking for hotels in {user_input}...",
        "files": []
    }
    conversation.append([{"text": user_input, "files": []}, bot_initial_message])

    yield conversation

    description_df = search_hotel(user_input)

    if description_df is None or description_df.empty:
        error_message = {"text": f"Sorry, I couldn't find any hotels for {user_input}. Please try another location.", "files": []}
        conversation.append([{"text": user_input, "files": []}, error_message])
        yield conversation
        return  # Exit the function early

    hotel_conversation = multimodal_results(description_df)

    for message_pair in hotel_conversation:
        conversation.append(message_pair)
        yield conversation

    final_recommendation = llm_results(description_df)
    for message_pair in final_recommendation:
        conversation.append(message_pair)
        yield conversation





File /content/geocoded_hotels.csv downloaded successfully!


Downloading data:   0%|          | 0.00/285M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1140386 [00:00<?, ? examples/s]

Dataset downloaded and saved as CSV.
Using device: cuda
Low memory: False


tokenizer_config.json:   0%|          | 0.00/1.36k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/950 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/70.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

In [None]:
import gradio as gr

# Create the Gradio app with custom CSS styling
with gr.Blocks(css="style.css") as demo:
    # Header
    with gr.Row(elem_id="header"):
        gr.Markdown(
            """
            # 🏨 **WatsonX Hotel Recommendation with Multimodal** 🏨
            Discover the best hotels in any city with personalized recommendations powered by WatsonX!
            """,
            elem_id="title"
        )

    # Input area for place and button
    with gr.Row():
        place_input = gr.Textbox(label="Enter a place", placeholder="E.g., Segrate Milano Italy, Tokyo Japan, Genova Italy")
        send_btn = gr.Button("Search Hotels")

    # Output area to show chatbot responses (including images)
    chatbot = MultimodalChatbot(height=600, elem_id="chatbot-output")

    send_btn.click(chatbot_response, inputs=[place_input, chatbot], outputs=chatbot)

# Launch the Gradio app
demo.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://65e48fcb427fe19dc4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


The coordinates of Segrate Milano Italy are: Latitude: 45.4907352, Longitude: 9.2948204
The 5 closest locations are:

       hotel_id                    hotel_name  chain_id  latitude  longitude  \
28407     38857                   Hotel Gamma        -1  45.48395    9.23656   
21659     29580  Residence Biancacroce Milano        -1  45.46983    9.23439   
8323      11611           Ramada Plaza Milano        73  45.50154    9.22840   
28165     38515               Hotel Lombardia        -1  45.48737    9.22355   
33244     45710                   Hotel Dieci        -1  45.47376    9.22389   

         city country      state  county       suburb postcode  \
28407  Milano  Italia  Lombardia  Milano  Municipio 3    20134   
21659  Milano  Italia  Lombardia  Milano  Municipio 3    20059   
8323   Milano  Italia  Lombardia  Milano  Municipio 2    20127   
28165  Milano  Italia  Lombardia  Milano  Municipio 3    20131   
33244  Milano  Italia  Lombardia  Milano  Municipio 3    20133   

    