Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
976 changes: 732 additions & 244 deletions advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb

Large diffs are not rendered by default.

942 changes: 848 additions & 94 deletions advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb

Large diffs are not rendered by default.

632 changes: 474 additions & 158 deletions advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb

Large diffs are not rendered by default.

402 changes: 335 additions & 67 deletions advanced_tutorials/air_quality/4_air_quality_batch_inference.ipynb

Large diffs are not rendered by default.

776 changes: 776 additions & 0 deletions advanced_tutorials/air_quality/5_function_calling.ipynb

Large diffs are not rendered by default.

104 changes: 104 additions & 0 deletions advanced_tutorials/air_quality/app_gradio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import gradio as gr
from transformers import pipeline
import numpy as np
import hopsworks
import joblib
from functions.llm_chain import load_model, get_llm_chain, generate_response

# Initialize the ASR pipeline
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

def connect_to_hopsworks():
# Initialize Hopsworks feature store connection
project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve the model registry
mr = project.get_model_registry()

# Retrieve the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
name="air_quality_fv",
version=1,
)

# Initialize batch scoring
feature_view.init_batch_scoring(1)

# Retrieve the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
name="air_quality_xgboost_model",
version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

# Load the XGBoost regressor model and label encoder from the saved model directory
model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")

return feature_view, model_air_quality, encoder


def retrieve_llm_chain():

# Load the LLM and its corresponding tokenizer.
model_llm, tokenizer = load_model()

# Create and configure a language model chain.
llm_chain = get_llm_chain(
model_llm,
tokenizer,
)

return model_llm, tokenizer, llm_chain


# Retrieve the feature view, air quality model and encoder for the city_name column
feature_view, model_air_quality, encoder = connect_to_hopsworks()

# Load the LLM and its corresponding tokenizer and configure a language model chain
model_llm, tokenizer, llm_chain = retrieve_llm_chain()

def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
if y.ndim > 1 and y.shape[1] > 1:
y = np.mean(y, axis=1)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]

def generate_query_response(user_query):
response = generate_response(
user_query,
feature_view,
model_llm,
tokenizer,
model_air_quality,
encoder,
llm_chain,
verbose=False,
)
return response

def handle_input(text_input=None, audio_input=None):
if audio_input is not None:
user_query = transcribe(audio_input)
else:
user_query = text_input

if user_query:
return generate_query_response(user_query)
else:
return "Please provide input either via text or voice."

iface = gr.Interface(
fn=handle_input,
inputs=[gr.Textbox(placeholder="Type here or use voice input..."), gr.Audio()],
outputs="text",
title="🌤️ AirQuality AI Assistant 💬",
description="Ask your questions about air quality or use your voice to interact."
)

iface.launch(share=True)
99 changes: 99 additions & 0 deletions advanced_tutorials/air_quality/app_streamlit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import streamlit as st
import hopsworks
import joblib
from functions.llm_chain import load_model, get_llm_chain, generate_response
import warnings
warnings.filterwarnings('ignore')

st.title("🌤️ AirQuality AI assistant 💬")

@st.cache_resource()
def connect_to_hopsworks():
# Initialize Hopsworks feature store connection
project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve the model registry
mr = project.get_model_registry()

# Retrieve the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
name="air_quality_fv",
version=1,
)

# Initialize batch scoring
feature_view.init_batch_scoring(1)

# Retrieve the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
name="air_quality_xgboost_model",
version=1,
)

# Download the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

# Load the XGBoost regressor model and label encoder from the saved model directory
model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")

return feature_view, model_air_quality, encoder


@st.cache_resource()
def retrieve_llm_chain():

# Load the LLM and its corresponding tokenizer.
model_llm, tokenizer = load_model()

# Create and configure a language model chain.
llm_chain = get_llm_chain(
model_llm,
tokenizer,
)

return model_llm, tokenizer, llm_chain


# Retrieve the feature view, air quality model and encoder for the city_name column
feature_view, model_air_quality, encoder = connect_to_hopsworks()

# Load the LLM and its corresponding tokenizer and configure a language model chain
model_llm, tokenizer, llm_chain = retrieve_llm_chain()

# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])

# React to user input
if user_query := st.chat_input("How can I help you?"):
# Display user message in chat message container
st.chat_message("user").markdown(user_query)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": user_query})

st.write('⚙️ Generating Response...')

# Generate a response to the user query
response = generate_response(
user_query,
feature_view,
model_llm,
tokenizer,
model_air_quality,
encoder,
llm_chain,
verbose=False,
)

# Display assistant response in chat message container
with st.chat_message("assistant"):
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
158 changes: 0 additions & 158 deletions advanced_tutorials/air_quality/feature_pipeline.py

This file was deleted.

Empty file.
6 changes: 3 additions & 3 deletions advanced_tutorials/air_quality/features/air_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def shift_pm_2_5(df: pd.DataFrame, days: int = 5) -> pd.DataFrame:
"""
for shift_value in range(1, days + 1):
df[f'pm_2_5_previous_{shift_value}_day'] = df.groupby('city_name')['pm2_5'].shift(shift_value)
df = df.dropna()
return df


Expand Down Expand Up @@ -227,8 +226,9 @@ def feature_engineer_aq(df: pd.DataFrame) -> pd.DataFrame:
for i in [7, 14, 28]:
for func in [moving_std, exponential_moving_average, exponential_moving_std]:
df_res = func(df_res, i)

df_res = df_res.sort_values(by=["date", "pm2_5"]).dropna()


df_res = df_res.sort_values(by=["date", "pm2_5"])
df_res = df_res.reset_index(drop=True)

df_res['year'] = year(df_res['date'])
Expand Down
Loading