In [12]:
from xgboost import XGBRegressor
import hopsworks
from openai import OpenAI
from functions.llm_chain import (
    load_model, 
    get_llm_chain, 
    generate_response, 
    generate_response_openai,
)
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore")

In [13]:
with open('./hopsworks-api-key.txt', 'r') as file:
     os.environ["HOPSWORKS_API_KEY"] = file.read().rstrip()
project = hopsworks.login()
fs = project.get_feature_store() 

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1157273
Connected. Call `.close()` to terminate connection gracefully.


In [14]:
# Get_or_create the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
    name='air_quality_fv',
    version=1
)

# Initialize batch scoring
feature_view.init_batch_scoring(1)

weather_fg = fs.get_feature_group(
    name='weather',
    version=1,
)

In [15]:
# Retrieve the model registry
mr = project.get_model_registry()

# Retrieve the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
    name="air_quality_xgboost_model",
    version=1,
)

# Download the saved model artifacts  to a local directory
saved_model_dir = retrieved_model.download()

Connected. Call `.close()` to terminate connection gracefully.
Downloading model artifact (1 dirs, 5 files)... DONE

In [16]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
# model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
model_air_quality = XGBRegressor()

model_air_quality.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
model_air_quality

In [20]:
import time
start_time = time.time()
from transformers import AutoTokenizer, AutoModelForCausalLM

def load_llm(model_id):
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.pad_token = tokenizer.eos_token
    return model, tokenizer

In [22]:
model_id = "imiraoui/OpenHermes-2.5-Mistral-7B-sharded"
model, tokenizer = load_llm(model_id)
print("Model and tokenizer loaded successfully.")

2024-11-12 12:21:41,253 INFO: We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/120 [00:00<?, ?B/s]



Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Model and tokenizer loaded successfully.


In [24]:
import transformers

text_generation_pipeline = transformers.pipeline(
    model=model,                      # The pre-trained language model for text generation
    tokenizer=tokenizer,                  # The tokenizer corresponding to the language model
    task="text-generation",               # Specify the task as text generation
    use_cache=True,
    do_sample=True,
    temperature=0.4,
    top_p=1.0,
    top_k=0,
    max_new_tokens=512,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)


In [26]:
def get_prompt_template():
    """
    Retrieve a template for generating prompts in a conversational AI system.

    Returns:
        str: A string representing the template for generating prompts.
            This template includes placeholders for system information,
            instructions, previous conversation, context, date and user query.
    """
    prompt_template = """<|im_start|>system
You are one of the best air quality experts in the world.

###INSTRUCTIONS:
- If you don't know the answer, you will respond politely that you cannot help.
- Use the context table with air quality indicators for city provided by user to generate your answer.
- You answer should be at least one sentence.
- Do not show any calculations to the user.
- Make sure that you use correct air quality indicators for the corresponding date.
- Add a rich analysis of the air quality level, such as whether it is safe, whether to go for a walk, etc.
- Do not mention in your answer that you are using context table.
<|im_end|>

### CONTEXT:
{context}

IMPORTANT: Today is {date_today}.

<|im_start|>user
{question}<|im_end|>
<|im_start|>assistant"""
    return prompt_template

In [27]:
from langchain.chains.llm import LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

mistral_llm = HuggingFacePipeline(
    pipeline=text_generation_pipeline,
)

# Create prompt from prompt template
prompt = PromptTemplate(
    input_variables=["context", "question", "date_today"],
    template=get_prompt_template(),
)

# Create LLM chain
llm_chain = LLMChain(
    llm=mistral_llm,
    prompt=prompt,
    verbose=False,
)

In [28]:
QUESTION7 = "Hi!"

response7 = generate_response(
    QUESTION7,
    feature_view,
    weather_fg,
    model_air_quality,
    model, 
    tokenizer,
    llm_chain,
    verbose=False,
)

print(response7)

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.



Hello! I'd be happy to help you with any air quality-related questions you may have. Could you please provide me with the name of the city you are interested in?


In [30]:
QUESTION = "Can you please explain different PM2_5 air quality levels?"

response = generate_response(
    QUESTION, 
    feature_view,
    weather_fg,
    model_air_quality,
    model, 
    tokenizer,
    llm_chain,
    verbose=False,
)

print(response)

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


KeyboardInterrupt: 

In [32]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
import gradio as gr
from transformers import pipeline
import numpy as np
from xgboost import XGBRegressor
from functions.llm_chain import load_model, get_llm_chain, generate_response

In [33]:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    if y.ndim > 1 and y.shape[1] > 1:
        y = np.mean(y, axis=1)
    y /= np.max(np.abs(y))
    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def generate_query_response(user_query, method, openai_api_key=None):
    if method == 'Hermes LLM':        
        response = generate_response(
            user_query,
            feature_view,
            weather_fg,
            model_air_quality,
            model_llm,
            tokenizer,
            llm_chain,
            verbose=False,
        )
        return response
    
    elif method == 'OpenAI API' and openai_api_key:
        client = OpenAI(
            api_key=openai_api_key
        )
        
        response = generate_response_openai(   
            user_query,
            feature_view,
            weather_fg,
            model_air_quality,
            client=client,
            verbose=True,
        )
        return response
        
    else:
        return "Invalid method or missing API key."

def handle_input(text_input=None, audio_input=None, method='Hermes LLM', openai_api_key=""):
    if audio_input is not None:
        user_query = transcribe(audio_input)
    else:
        user_query = text_input
    
    # Check if OpenAI API key is required but not provided
    if method == 'OpenAI API' and not openai_api_key.strip():
        return "OpenAI API key is required for this method."

    if user_query:
        return generate_query_response(user_query, method, openai_api_key)
    else:
        return "Please provide input either via text or voice."
    

# Setting up the Gradio Interface
iface = gr.Interface(
    fn=handle_input,
    inputs=[
        gr.Textbox(placeholder="Type here or use voice input..."), 
        gr.Audio(), 
        gr.Radio(["Hermes LLM", "OpenAI API"], label="Choose the response generation method"),
        gr.Textbox(label="Enter your OpenAI API key (only if you selected OpenAI API):", type="password")  # Removed `optional=True`
    ],
    outputs="text",
    title="🌤️ AirQuality AI Assistant 💬",
    description="Ask your questions about air quality or use your voice to interact. Select the response generation method and provide an OpenAI API key if necessary."
)

iface.launch(share=True)

config.json:   0%|          | 0.00/1.94k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/290M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/805 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.41M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.83k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/185k [00:00<?, ?B/s]

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://343136510e34e69f2a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


