diff --git a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb index 8f159c75..0d67018a 100644 --- a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb +++ b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f16a717d", + "id": "932614f4", "metadata": {}, "source": [ "# **Hopsworks Feature Store** - Part 02: Feature Pipeline\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "37facd6e", + "id": "fada62d6", "metadata": {}, "source": [ "### 📝 Imports" @@ -25,7 +25,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "77d2fbe5", + "id": "223d0565", "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,6 @@ "from functions.parse_weather import get_weather_data_from_open_meteo\n", "from functions.common_functions import *\n", "\n", - "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] @@ -47,7 +46,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "c14c97e6", + "id": "680489a3", "metadata": { "tags": [] }, @@ -62,7 +61,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "5b67e039", + "id": "f4e6d85a", "metadata": { "tags": [] }, @@ -70,7 +69,7 @@ { "data": { "text/plain": [ - "(datetime.date(2024, 3, 17), '2024-03-17')" + "(datetime.date(2024, 5, 14), '2024-05-14')" ] }, "execution_count": 3, @@ -88,7 +87,7 @@ }, { "cell_type": "markdown", - "id": "0c5ebe2a", + "id": "66767202", "metadata": {}, "source": [ "### 🔮 Connecting to Hopsworks Feature Store " @@ -97,7 +96,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "730eb857", + "id": "7187a854", "metadata": {}, "outputs": [ { @@ -121,7 +120,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "ddd400ad", + "id": "b1f968a6", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "f992009d", + "id": "7dfdbf9d", "metadata": {}, "source": [ "## 🌫 Filling gaps in Air Quality data (PM2.5)" @@ -147,7 +146,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "37058d7f", + "id": "98a4a37f", "metadata": { "tags": [] }, @@ -156,8 +155,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (2.30s) \n", - "Finished: Reading data from Hopsworks, using ArrowFlight (1.48s) \n" + "Finished: Reading data from Hopsworks, using ArrowFlight (2.27s) \n", + "Finished: Reading data from Hopsworks, using ArrowFlight (1.55s) \n" ] } ], @@ -170,7 +169,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "cee48adb", + "id": "104c5525", "metadata": { "tags": [] }, @@ -190,7 +189,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "49b9e259", + "id": "72cbe44b", "metadata": { "tags": [] }, @@ -199,8 +198,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "⛳️ Last update for Paris: 2024-03-15\n", - "⛳️ Last update for Columbus: 2024-03-15\n" + "⛳️ Last update for Paris: 2024-05-13\n", + "⛳️ Last update for Columbus: 2024-05-13\n" ] } ], @@ -219,7 +218,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "f658d581", + "id": "2d45a468", "metadata": {}, "outputs": [], "source": [ @@ -231,7 +230,7 @@ }, { "cell_type": "markdown", - "id": "f6df0f7f", + "id": "b6e5c1c9", "metadata": {}, "source": [ "### 🧙🏼‍♂️ Parsing PM2.5 data" @@ -240,7 +239,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "cc68ab56", + "id": "6835e4e4", "metadata": { "scrolled": true, "tags": [] @@ -250,144 +249,144 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processed PM2_5 for Amsterdam since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Amsterdam since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Athina since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Athina since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Berlin since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Berlin since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Gdansk since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Gdansk since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Kraków since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Kraków since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for London since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for London since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Madrid since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Madrid since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Marseille since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Marseille since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Milano since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Milano since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for München since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for München since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Napoli since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Napoli since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Paris since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Paris since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Sevilla since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Sevilla since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Stockholm since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Stockholm since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tallinn since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Tallinn since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Varna since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Varna since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Wien since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Wien since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Albuquerque since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Albuquerque since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Atlanta since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Atlanta since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Chicago since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Chicago since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Columbus since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Columbus since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Dallas since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Dallas since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Denver since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Denver since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Houston since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Houston since 2024-04-15 till 2024-05-14.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Los Angeles since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Los Angeles since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for New York since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for New York since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Phoenix-Mesa since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Phoenix-Mesa since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Salt Lake City since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Salt Lake City since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for San Francisco since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for San Francisco since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Tampa since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Tampa since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Bellevue-SE 12th St since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Bellevue-SE 12th St since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-02-16 till 2024-03-17.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-04-15 till 2024-05-14.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", - "\n", - "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Seattle-10th & Weller since 2024-02-16 till 2024-03-17.\n", - "Took 0.13 sec.\n", + "Processed PM2_5 for Seattle-10th & Weller since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-04-15 till 2024-05-14.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for TACOMA - L STREET since 2024-02-16 till 2024-03-17.\n", - "Took 0.66 sec.\n", + "Processed PM2_5 for TACOMA - L STREET since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Tacoma-S 36th St since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Tacoma-S 36th St since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Tukwila Allentown since 2024-02-16 till 2024-03-17.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Tukwila Allentown since 2024-04-15 till 2024-05-14.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-02-16 till 2024-03-17.\n", + "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-04-15 till 2024-05-14.\n", "Took 0.11 sec.\n", "\n", "----------------------------------------------------------------\n", - "Parsed new PM2.5 data for ALL locations up to 2024-03-17.\n", - "Took 5.44 sec.\n", + "Parsed new PM2.5 data for ALL locations up to 2024-05-14.\n", + "Took 4.74 sec.\n", "\n" ] } @@ -427,7 +426,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "09db1460", + "id": "1e8c2e7c", "metadata": {}, "outputs": [ { @@ -458,22 +457,22 @@ " \n", " \n", " \n", - " 1392\n", + " 1347\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-15\n", - " 14.5\n", + " 2024-05-12\n", + " 6.8\n", " \n", " \n", - " 1393\n", + " 1348\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-16\n", - " 13.7\n", + " 2024-05-13\n", + " 4.1\n", " \n", " \n", - " 1394\n", + " 1349\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-17\n", - " 15.1\n", + " 2024-05-14\n", + " 6.6\n", " \n", " \n", "\n", @@ -481,9 +480,9 @@ ], "text/plain": [ " city_name date pm2_5\n", - "1392 Tulalip-Totem Beach Rd 2024-03-15 14.5\n", - "1393 Tulalip-Totem Beach Rd 2024-03-16 13.7\n", - "1394 Tulalip-Totem Beach Rd 2024-03-17 15.1" + "1347 Tulalip-Totem Beach Rd 2024-05-12 6.8\n", + "1348 Tulalip-Totem Beach Rd 2024-05-13 4.1\n", + "1349 Tulalip-Totem Beach Rd 2024-05-14 6.6" ] }, "execution_count": 11, @@ -497,7 +496,7 @@ }, { "cell_type": "markdown", - "id": "7cbabf34", + "id": "8b1fec0b", "metadata": { "tags": [] }, @@ -508,7 +507,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "100f0f2d", + "id": "5d65f2ad", "metadata": { "tags": [] }, @@ -521,7 +520,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "91ded83e", + "id": "57ecbd4f", "metadata": { "tags": [] }, @@ -572,75 +571,75 @@ " \n", " \n", " \n", - " 1392\n", - " Athina\n", - " 2024-03-17\n", - " 25.6\n", - " 25.8\n", - " 20.2\n", - " 14.8\n", - " 8.4\n", + " 1347\n", + " Napoli\n", + " 2024-05-14\n", + " 18.0\n", + " 16.4\n", + " 11.9\n", + " 9.3\n", + " 8.8\n", + " 6.8\n", " 10.3\n", - " 9.6\n", - " 17.2\n", + " 12.0\n", " ...\n", - " 5.376253\n", + " 3.937156\n", " 2024\n", - " 17\n", - " 3\n", - " 6\n", + " 14\n", + " 5\n", " 1\n", - " 0.970064\n", - " 0.24285\n", - " -0.781831\n", + " 0\n", + " 0.729558\n", + " -0.683919\n", + " 0.781831\n", " 0.62349\n", " \n", " \n", - " 1393\n", + " 1348\n", " Los Angeles\n", - " 2024-03-17\n", - " 28.1\n", - " 21.5\n", - " 15.1\n", - " 12.7\n", - " 16.4\n", - " 11.0\n", - " 27.0\n", - " 36.3\n", + " 2024-05-14\n", + " 19.3\n", + " 15.9\n", + " 16.8\n", + " 16.1\n", + " 16.1\n", + " 15.7\n", + " 19.9\n", + " 20.6\n", " ...\n", - " 7.696575\n", + " 5.137784\n", " 2024\n", - " 17\n", - " 3\n", - " 6\n", + " 14\n", + " 5\n", " 1\n", - " 0.970064\n", - " 0.24285\n", - " -0.781831\n", + " 0\n", + " 0.729558\n", + " -0.683919\n", + " 0.781831\n", " 0.62349\n", " \n", " \n", - " 1394\n", - " Milano\n", - " 2024-03-17\n", - " 43.4\n", - " 23.6\n", - " 16.8\n", - " 46.6\n", - " 32.4\n", - " 17.0\n", - " 21.2\n", - " 10.4\n", + " 1349\n", + " Houston\n", + " 2024-05-14\n", + " 21.5\n", + " 13.7\n", + " 12.9\n", + " 7.2\n", + " 10.1\n", + " 14.7\n", + " 9.8\n", + " 8.9\n", " ...\n", - " 22.044394\n", + " 2.577200\n", " 2024\n", - " 17\n", - " 3\n", - " 6\n", + " 14\n", + " 5\n", " 1\n", - " 0.970064\n", - " 0.24285\n", - " -0.781831\n", + " 0\n", + " 0.729558\n", + " -0.683919\n", + " 0.781831\n", " 0.62349\n", " \n", " \n", @@ -650,34 +649,34 @@ ], "text/plain": [ " city_name date pm2_5 pm_2_5_previous_1_day \\\n", - "1392 Athina 2024-03-17 25.6 25.8 \n", - "1393 Los Angeles 2024-03-17 28.1 21.5 \n", - "1394 Milano 2024-03-17 43.4 23.6 \n", + "1347 Napoli 2024-05-14 18.0 16.4 \n", + "1348 Los Angeles 2024-05-14 19.3 15.9 \n", + "1349 Houston 2024-05-14 21.5 13.7 \n", "\n", " pm_2_5_previous_2_day pm_2_5_previous_3_day pm_2_5_previous_4_day \\\n", - "1392 20.2 14.8 8.4 \n", - "1393 15.1 12.7 16.4 \n", - "1394 16.8 46.6 32.4 \n", + "1347 11.9 9.3 8.8 \n", + "1348 16.8 16.1 16.1 \n", + "1349 12.9 7.2 10.1 \n", "\n", " pm_2_5_previous_5_day pm_2_5_previous_6_day pm_2_5_previous_7_day \\\n", - "1392 10.3 9.6 17.2 \n", - "1393 11.0 27.0 36.3 \n", - "1394 17.0 21.2 10.4 \n", + "1347 6.8 10.3 12.0 \n", + "1348 15.7 19.9 20.6 \n", + "1349 14.7 9.8 8.9 \n", "\n", " ... exp_std_28_days year day_of_month month day_of_week \\\n", - "1392 ... 5.376253 2024 17 3 6 \n", - "1393 ... 7.696575 2024 17 3 6 \n", - "1394 ... 22.044394 2024 17 3 6 \n", + "1347 ... 3.937156 2024 14 5 1 \n", + "1348 ... 5.137784 2024 14 5 1 \n", + "1349 ... 2.577200 2024 14 5 1 \n", "\n", " is_weekend sin_day_of_year cos_day_of_year sin_day_of_week \\\n", - "1392 1 0.970064 0.24285 -0.781831 \n", - "1393 1 0.970064 0.24285 -0.781831 \n", - "1394 1 0.970064 0.24285 -0.781831 \n", + "1347 0 0.729558 -0.683919 0.781831 \n", + "1348 0 0.729558 -0.683919 0.781831 \n", + "1349 0 0.729558 -0.683919 0.781831 \n", "\n", " cos_day_of_week \n", - "1392 0.62349 \n", - "1393 0.62349 \n", - "1394 0.62349 \n", + "1347 0.62349 \n", + "1348 0.62349 \n", + "1349 0.62349 \n", "\n", "[3 rows x 31 columns]" ] @@ -700,7 +699,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "a387ac8a", + "id": "b3c89be0", "metadata": {}, "outputs": [ { @@ -722,7 +721,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "bb4b8914", + "id": "215fc4d5", "metadata": { "tags": [] }, @@ -730,7 +729,7 @@ { "data": { "text/plain": [ - "(135, 31)" + "(90, 31)" ] }, "execution_count": 15, @@ -745,7 +744,7 @@ }, { "cell_type": "markdown", - "id": "fe63cf55", + "id": "998e1128", "metadata": {}, "source": [ "## 🌦 Filling gaps in Weather data" @@ -754,7 +753,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "36a7388e", + "id": "8b9d1259", "metadata": { "tags": [] }, @@ -773,7 +772,7 @@ }, { "cell_type": "markdown", - "id": "6c868dae", + "id": "737827d6", "metadata": { "tags": [] }, @@ -784,7 +783,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "0ad03c2d", + "id": "a6e9d808", "metadata": { "scrolled": true, "tags": [] @@ -794,144 +793,144 @@ "name": "stdout", "output_type": "stream", "text": [ - "Parsed weather for Amsterdam since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", - "\n", - "Parsed weather for Athina since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Amsterdam since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Berlin since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Athina since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Gdansk since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Berlin since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Kraków since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Gdansk since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for London since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Kraków since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Madrid since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for London since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Marseille since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for Madrid since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Milano since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Marseille since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for München since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Milano since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Napoli since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for München since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Paris since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for Napoli since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Sevilla since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Paris since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Stockholm since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Sevilla since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tallinn since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Stockholm since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Varna since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for Tallinn since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Wien since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Varna since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Albuquerque since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Wien since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Atlanta since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Albuquerque since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Chicago since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Atlanta since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Columbus since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Chicago since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Dallas since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Columbus since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Denver since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Dallas since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Houston since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Denver since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Los Angeles since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Houston since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for New York since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Los Angeles since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Phoenix-Mesa since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for New York since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Salt Lake City since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Phoenix-Mesa since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for San Francisco since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Salt Lake City since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Tampa since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for San Francisco since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Bellevue-SE 12th St since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Tampa since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Bellevue-SE 12th St since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for KENT - JAMES & CENTRAL since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-03-15 till 2024-03-17.\n", - "Took 2.12 sec.\n", + "Parsed weather for KENT - JAMES & CENTRAL since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-03-15 till 2024-03-17.\n", - "Took 2.12 sec.\n", + "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - BEACON HILL since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - DUWAMISH since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for SEATTLE - BEACON HILL since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for SEATTLE - DUWAMISH since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Seattle-10th & Weller since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Seattle-10th & Weller since 2024-05-13 till 2024-05-14.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for TACOMA - ALEXANDER AVE since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for TACOMA - ALEXANDER AVE since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for TACOMA - L STREET since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for TACOMA - L STREET since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tacoma-S 36th St since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Tacoma-S 36th St since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tukwila Allentown since 2024-03-15 till 2024-03-17.\n", - "Took 2.11 sec.\n", + "Parsed weather for Tukwila Allentown since 2024-05-13 till 2024-05-14.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Tulalip-Totem Beach Rd since 2024-03-15 till 2024-03-17.\n", + "Parsed weather for Tulalip-Totem Beach Rd since 2024-05-13 till 2024-05-14.\n", "Took 2.1 sec.\n", "\n", "----------------------------------------------------------------\n", - "Parsed new weather data for ALL cities up to 2024-03-17.\n", - "Took 94.79 sec.\n", + "Parsed new weather data for ALL cities up to 2024-05-14.\n", + "Took 94.69 sec.\n", "\n" ] } @@ -975,7 +974,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "de4d4870", + "id": "281ebd0c", "metadata": { "tags": [] }, @@ -997,7 +996,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "82d7bc88", + "id": "b7b0d0ef", "metadata": { "tags": [] }, @@ -1039,69 +1038,69 @@ " \n", " \n", " \n", - " 132\n", - " Tulalip-Totem Beach Rd\n", - " 2024-03-15\n", - " 15.3\n", - " 4.2\n", + " 87\n", + " Tukwila Allentown\n", + " 2024-05-14\n", + " 20.7\n", + " 9.8\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 10.8\n", - " 22.3\n", - " 343\n", - " 1710460800000\n", + " 16.9\n", + " 25.6\n", + " 8\n", + " 1715644800000\n", " \n", " \n", - " 133\n", + " 88\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-16\n", - " 21.3\n", - " 4.8\n", + " 2024-05-13\n", + " 19.4\n", + " 10.0\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 9.8\n", - " 25.9\n", - " 336\n", - " 1710547200000\n", + " 21.6\n", + " 52.2\n", + " 330\n", + " 1715558400000\n", " \n", " \n", - " 134\n", + " 89\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-17\n", - " 22.0\n", - " 9.2\n", + " 2024-05-14\n", + " 17.9\n", + " 7.6\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 11.4\n", - " 14.0\n", - " 93\n", - " 1710633600000\n", + " 17.6\n", + " 41.0\n", + " 337\n", + " 1715644800000\n", " \n", " \n", "\n", "" ], "text/plain": [ - " city_name date temperature_max temperature_min \\\n", - "132 Tulalip-Totem Beach Rd 2024-03-15 15.3 4.2 \n", - "133 Tulalip-Totem Beach Rd 2024-03-16 21.3 4.8 \n", - "134 Tulalip-Totem Beach Rd 2024-03-17 22.0 9.2 \n", + " city_name date temperature_max temperature_min \\\n", + "87 Tukwila Allentown 2024-05-14 20.7 9.8 \n", + "88 Tulalip-Totem Beach Rd 2024-05-13 19.4 10.0 \n", + "89 Tulalip-Totem Beach Rd 2024-05-14 17.9 7.6 \n", "\n", - " precipitation_sum rain_sum snowfall_sum precipitation_hours \\\n", - "132 0.0 0.0 0.0 0.0 \n", - "133 0.0 0.0 0.0 0.0 \n", - "134 0.0 0.0 0.0 0.0 \n", + " precipitation_sum rain_sum snowfall_sum precipitation_hours \\\n", + "87 0.0 0.0 0.0 0.0 \n", + "88 0.0 0.0 0.0 0.0 \n", + "89 0.0 0.0 0.0 0.0 \n", "\n", - " wind_speed_max wind_gusts_max wind_direction_dominant unix_time \n", - "132 10.8 22.3 343 1710460800000 \n", - "133 9.8 25.9 336 1710547200000 \n", - "134 11.4 14.0 93 1710633600000 " + " wind_speed_max wind_gusts_max wind_direction_dominant unix_time \n", + "87 16.9 25.6 8 1715644800000 \n", + "88 21.6 52.2 330 1715558400000 \n", + "89 17.6 41.0 337 1715644800000 " ] }, "execution_count": 19, @@ -1123,7 +1122,7 @@ }, { "cell_type": "markdown", - "id": "7b5640f9", + "id": "68a51e7e", "metadata": { "tags": [] }, @@ -1134,18 +1133,18 @@ { "cell_type": "code", "execution_count": 20, - "id": "fd72be07", + "id": "2053131e", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ad45db9921ea44f392976d59e79f3999", + "model_id": "a08101f8499844b9aaa317091df879ab", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Uploading Dataframe: 0.00% | | Rows 0/135 | Elapsed Time: 00:00 | Remaining Time: ?" + "Uploading Dataframe: 0.00% | | Rows 0/90 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, @@ -1163,7 +1162,7 @@ { "data": { "text/plain": [ - "(, None)" + "(, None)" ] }, "execution_count": 20, @@ -1179,18 +1178,18 @@ { "cell_type": "code", "execution_count": 21, - "id": "cdffe4a9", + "id": "ff55bcf8", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "afe7919348224c17b0f680c8c8067507", + "model_id": "1bc259a83b4b48b588a8651241ebe5fc", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Uploading Dataframe: 0.00% | | Rows 0/135 | Elapsed Time: 00:00 | Remaining Time: ?" + "Uploading Dataframe: 0.00% | | Rows 0/90 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, @@ -1208,7 +1207,7 @@ { "data": { "text/plain": [ - "(, None)" + "(, None)" ] }, "execution_count": 21, @@ -1223,7 +1222,7 @@ }, { "cell_type": "markdown", - "id": "d03605ea", + "id": "1c2a96b0", "metadata": {}, "source": [ "---\n", diff --git a/advanced_tutorials/air_quality/5_function_calling.ipynb b/advanced_tutorials/air_quality/5_function_calling.ipynb index 751218bc..1d7baee6 100644 --- a/advanced_tutorials/air_quality/5_function_calling.ipynb +++ b/advanced_tutorials/air_quality/5_function_calling.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "98404102", + "id": "9b00e25c", "metadata": {}, "source": [ "## 📝 Imports" @@ -11,7 +11,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "4f454d56", + "id": "031b277b", "metadata": {}, "outputs": [], "source": [ @@ -21,18 +21,22 @@ { "cell_type": "code", "execution_count": 2, - "id": "61ae8738", + "id": "4f72caf8", "metadata": {}, "outputs": [], "source": [ "import joblib\n", "\n", - "from functions.llm_chain import load_model, get_llm_chain, generate_response" + "from functions.llm_chain import (\n", + " load_model, \n", + " get_llm_chain, \n", + " generate_response,\n", + ")" ] }, { "cell_type": "markdown", - "id": "026af360", + "id": "73c91640", "metadata": {}, "source": [ "## 🔮 Connect to Hopsworks Feature Store " @@ -41,7 +45,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "81617c65", + "id": "cbe50ef2", "metadata": {}, "outputs": [ { @@ -65,7 +69,7 @@ }, { "cell_type": "markdown", - "id": "e4e693cb", + "id": "c4732bcd", "metadata": {}, "source": [ "## ⚙️ Feature View Retrieval" @@ -74,7 +78,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "ccd11e5f", + "id": "489d68e6", "metadata": {}, "outputs": [], "source": [ @@ -90,7 +94,7 @@ }, { "cell_type": "markdown", - "id": "b54628d2", + "id": "b1097149", "metadata": {}, "source": [ "## 🪝 Retrieve AirQuality Model from Model Registry" @@ -99,7 +103,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "e67708ea", + "id": "c9cf3026", "metadata": {}, "outputs": [ { @@ -128,7 +132,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "5c391589", + "id": "d5a2b8a0", "metadata": {}, "outputs": [ { @@ -186,7 +190,7 @@ }, { "cell_type": "markdown", - "id": "fa25fb65", + "id": "027de50a", "metadata": {}, "source": [ "## ⬇️ LLM Loading" @@ -195,154 +199,29 @@ { "cell_type": "code", "execution_count": 7, - "id": "97fa773f", + "id": "9ab3103e", "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f2d85ff92cd64c08a58ff800ff7d17b3", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "tokenizer_config.json: 0%| | 0.00/1.60k [00:00⛓️ LangChain" @@ -384,7 +256,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "38e442d4", + "id": "b8cfd2b4", "metadata": {}, "outputs": [ { @@ -398,14 +270,14 @@ "source": [ "# Create and configure a language model chain.\n", "llm_chain = get_llm_chain(\n", - " model_llm,\n", " tokenizer,\n", + " model_llm,\n", ")" ] }, { "cell_type": "markdown", - "id": "52bbd357", + "id": "d10b1460", "metadata": {}, "source": [ "## 🧬 Model Inference\n" @@ -414,53 +286,17 @@ { "cell_type": "code", "execution_count": 9, - "id": "45e27000", + "id": "15739772", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 \n", - "\n", - "Hello! I can help you with information about the air quality in the city. According to the data I have, on the 27th of March, 2024, the air quality in the city was considered safe for most people. The concentration of PM2.5 was at 12 µg/m³, which is within the safe limit of 25 µg/m³. The concentration of NO2 was at 20 µg/m³, which is also within the safe limit of 40 µg/m³. The concentration of O3 was at 30 µg/m³, which is below the safe limit of 40 µg/m³. \n", - "\n", - "Based on these readings, it is safe for you to go for a walk or engage in outdoor activities. However, if you have a pre-existing respiratory condition, it is always recommended to consult with your doctor before engaging in outdoor activities.\n" - ] - } - ], - "source": [ - "QUESTION7 = \"Hi!\"\n", - "\n", - "response7 = generate_response(\n", - " QUESTION7,\n", - " feature_view,\n", - " model_air_quality,\n", - " encoder,\n", - " model_llm,\n", - " tokenizer,\n", - " llm_chain,\n", - " verbose=True,\n", - ")\n", - "\n", - "print(response7)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "547321f2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "🗓️ Today's date: Wednesday, 2024-03-27\n", - "📖 \n", - "\n", - "I am an expert in air quality, but I'm unable to assist you at the moment.\n" + "===============\n", + "I am an expert in air quality analysis.\n" ] } ], @@ -483,24 +319,24 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "c346ae89", + "execution_count": 10, + "id": "ad9b2229", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.87s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (8.25s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for New York:\n", "Date: 2024-01-10; Air Quality: 7.2\n", "Date: 2024-01-11; Air Quality: 5.9\n", "Date: 2024-01-12; Air Quality: 10.8\n", "Date: 2024-01-13; Air Quality: 5.9\n", "Date: 2024-01-14; Air Quality: 5.1\n", - "\n", - "The air quality in New York from January 10th to January 14th was generally within a safe range. The average air quality during this period was 6.9, which indicates good air quality. This is a suitable time for outdoor activities, such as going for a walk or bike ride.\n" + "===============\n", + "The air quality in New York from January 10th to January 14th was generally moderate. The measurements show that the air quality fluctuated during this period, with a high of 10.8 on January 12th and lows of 5.1 and 5.9 on different days. Overall, it's a good time to be outside and enjoy the fresh air, but you may want to avoid strenuous outdoor activities on the 12th.\n" ] } ], @@ -523,24 +359,24 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "a6a3cbda", + "execution_count": 11, + "id": "693b98bf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.12s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (8.58s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for New York:\n", "Date: 2024-01-10; Air Quality: 7.2\n", "Date: 2024-01-11; Air Quality: 5.9\n", "Date: 2024-01-12; Air Quality: 10.8\n", "Date: 2024-01-13; Air Quality: 5.9\n", "Date: 2024-01-14; Air Quality: 5.1\n", - "\n", - "The maximum air quality in New York from 2024-01-10 to 2024-01-14 was on 2024-01-12 with an air quality level of 10.8. This level is considered unhealthy for sensitive groups and may cause breathing difficulties for some individuals. It is advisable to limit outdoor activities on days with such high air quality levels.\n" + "===============\n", + "The maximum air quality during that period in New York was on January 12th with an air quality of 10.8. This level is considered to be unhealthy for sensitive groups, and it is advisable to limit outdoor activities.\n" ] } ], @@ -563,24 +399,24 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "71cc8289", + "execution_count": 12, + "id": "a55195a0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.35s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.79s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for New York:\n", "Date: 2024-01-10; Air Quality: 7.2\n", "Date: 2024-01-11; Air Quality: 5.9\n", "Date: 2024-01-12; Air Quality: 10.8\n", "Date: 2024-01-13; Air Quality: 5.9\n", "Date: 2024-01-14; Air Quality: 5.1\n", - "\n", - "The minimum air quality during that period was on 2024-01-14, with an air quality level of 5.1. This is considered to be good air quality, which means it is safe to go for a walk or engage in outdoor activities.\n" + "===============\n", + "The minimum air quality during that period in New York was on January 14th, with an air quality of 5.1. This indicates that the air quality on that day was quite good, and it would be safe for you to go for a walk or engage in outdoor activities.\n" ] } ], @@ -603,20 +439,20 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "d89d46e8", + "execution_count": 13, + "id": "7be0351d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.02s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.90s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for London:\n", - "Date: 2024-03-26; Air Quality: 12.7\n", - "\n", - "The air quality in London yesterday, on March 26th, was 12.7. This indicates that the air quality was within the safe range, making it suitable for outdoor activities.\n" + "Date: 2024-05-13; Air Quality: 10.5\n", + "===============\n", + "Yesterday, the air quality in London was safe for most people. However, it might have been slightly uncomfortable for those with respiratory issues.\n" ] } ], @@ -639,26 +475,27 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "48e76dfb", + "execution_count": 14, + "id": "30c6aca3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.49s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (8.33s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for London:\n", - "Date: 2024-03-18; Air Quality: 12.7\n", - "Date: 2024-03-19; Air Quality: 9.7\n", - "Date: 2024-03-20; Air Quality: 15.6\n", - "Date: 2024-03-21; Air Quality: 16.7\n", - "Date: 2024-03-22; Air Quality: 8.7\n", - "Date: 2024-03-23; Air Quality: 5.4\n", - "Date: 2024-03-24; Air Quality: 6.4\n", - "\n", - "Last week in London, the air quality was generally good. On 2024-03-19, it was slightly polluted with an air quality of 9.7. The air quality improved on 2024-03-20, reaching a moderate level of 15.6. It was quite clean on 2024-03-21 with an air quality of 16.7. However, it became slightly polluted again on 2024-03-22 with an air quality of 8.7. The air quality improved significantly on 2024-03-23, reaching a healthy level of 5.4. On 2024-03-24, the air quality was slightly better than the previous day with an air quality of 6.4. Overall, the air quality last week in London was mostly good with some slight fluctuations.\n" + "Date: 2024-05-07; Air Quality: 14.2\n", + "Date: 2024-05-08; Air Quality: 15.1\n", + "Date: 2024-05-09; Air Quality: 23.4\n", + "Date: 2024-05-10; Air Quality: 26.2\n", + "Date: 2024-05-11; Air Quality: 23.1\n", + "Date: 2024-05-12; Air Quality: 16.5\n", + "Date: 2024-05-13; Air Quality: 10.5\n", + "Date: 2024-05-14; Air Quality: 5.9\n", + "===============\n", + "Last week in London, the air quality was generally moderate to good. The readings for the days you provided show that the air quality was improving over the week, with levels ranging from 5.9 on May 14th to 14.2 on May 7th. Overall, the air quality was safe for most activities, but it would be advisable to check for any local advisories before engaging in outdoor activities.\n" ] } ], @@ -681,31 +518,31 @@ }, { "cell_type": "code", - "execution_count": 16, - "id": "dd764369", + "execution_count": 15, + "id": "3ac41382", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.11s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.92s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for London:\n", - "Date: 2024-03-27; Air Quality: 6.4\n", - "Date: 2024-03-28; Air Quality: 9.77\n", - "Date: 2024-03-29; Air Quality: 8.71\n", - "Date: 2024-03-30; Air Quality: 8.24\n", - "Date: 2024-03-31; Air Quality: 8.57\n", - "Date: 2024-04-01; Air Quality: 8.66\n", - "Date: 2024-04-02; Air Quality: 8.18\n", - "\n", - "The air quality in London on 2024-04-02 is expected to be at a level of 8.18. This is considered to be within the moderate range, which means it is safe for most people to go outside and engage in outdoor activities. However, sensitive individuals, such as those with respiratory or cardiovascular conditions, should take precautions and limit their exposure to air pollution.\n" + "Date: 2024-05-14; Air Quality: 5.9\n", + "Date: 2024-05-15; Air Quality: 10.88\n", + "Date: 2024-05-16; Air Quality: 11.99\n", + "Date: 2024-05-17; Air Quality: 11.6\n", + "Date: 2024-05-18; Air Quality: 11.56\n", + "Date: 2024-05-19; Air Quality: 11.52\n", + "Date: 2024-05-20; Air Quality: 11.52\n", + "===============\n", + "The air quality in London on 2024-05-20 is expected to be at a moderate level, with an Air Quality index of 11.52. This is within the safe range, but it might not be the best day for outdoor activities, especially if you have respiratory issues. It would be advisable to keep an eye on the air quality and possibly choose a different day for more strenuous activities.\n" ] } ], "source": [ - "QUESTION3 = \"What will the air quality be like in London in 2024-04-02?\"\n", + "QUESTION3 = \"What will the air quality be like in London in 2024-05-20?\"\n", "\n", "response3 = generate_response(\n", " QUESTION3, \n", @@ -723,21 +560,21 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "210cde3d", + "execution_count": 16, + "id": "1c039b2e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (9.34s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.62s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for Chicago:\n", - "Date: 2024-03-27; Air Quality: 3.0\n", - "Date: 2024-03-28; Air Quality: 8.06\n", - "\n", - "Based on the air quality measurements for Chicago, tomorrow's air quality is expected to be better than today. The air quality on 2024-03-28 is measured at 8.06, which is within the safe range for most people. It is generally safe to go outside and engage in outdoor activities, but people with respiratory issues may still want to take precautions.\n" + "Date: 2024-05-14; Air Quality: 15.0\n", + "Date: 2024-05-15; Air Quality: 8.76\n", + "===============\n", + "Tomorrow, the air quality in Chicago is expected to be significantly better than today. The air quality measurement for tomorrow, based on our data, is 8.76. This level indicates that the air quality is considered good, and it is safe for outdoor activities such as walking or cycling.\n" ] } ], @@ -760,31 +597,25 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "f3c2ab43", + "execution_count": 17, + "id": "c5dc44bb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.11s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.82s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for London:\n", - "Date: 2024-03-27; Air Quality: 6.4\n", - "Date: 2024-03-28; Air Quality: 9.77\n", - "Date: 2024-03-29; Air Quality: 8.71\n", - "Date: 2024-03-30; Air Quality: 8.24\n", - "Date: 2024-03-31; Air Quality: 8.57\n", - "Date: 2024-04-01; Air Quality: 8.66\n", - "Date: 2024-04-02; Air Quality: 8.18\n", - "Date: 2024-04-03; Air Quality: 8.18\n", - "Date: 2024-04-04; Air Quality: 8.18\n", - "Date: 2024-04-05; Air Quality: 8.18\n", - "Date: 2024-04-06; Air Quality: 8.18\n", - "Date: 2024-04-07; Air Quality: 8.18\n", - "\n", - "Based on the air quality measurements for London, next Sunday, 2024-04-07, the air quality is expected to be 8.18. This level of air quality is considered safe, but it might be better to avoid strenuous outdoor activities if you have respiratory issues.\n" + "Date: 2024-05-14; Air Quality: 5.9\n", + "Date: 2024-05-15; Air Quality: 10.88\n", + "Date: 2024-05-16; Air Quality: 11.99\n", + "Date: 2024-05-17; Air Quality: 11.6\n", + "Date: 2024-05-18; Air Quality: 11.56\n", + "Date: 2024-05-19; Air Quality: 11.52\n", + "===============\n", + "Based on the air quality measurements for London, next Sunday, 2024-05-19, the air quality is expected to be at 11.52. This level falls within the moderate range, which means it is safe for most people to go outside, but those with respiratory issues may want to limit their exposure. It is advisable to check for any local alerts or updates before planning any outdoor activities.\n" ] } ], @@ -807,45 +638,29 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "956f3636", + "execution_count": 18, + "id": "aa28c252", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (7.96s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.65s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for London:\n", - "Date: 2024-03-27; Air Quality: 6.4\n", - "Date: 2024-03-28; Air Quality: 9.77\n", - "Date: 2024-03-29; Air Quality: 8.71\n", - "Date: 2024-03-30; Air Quality: 8.24\n", - "Date: 2024-03-31; Air Quality: 8.57\n", - "Date: 2024-04-01; Air Quality: 8.66\n", - "Date: 2024-04-02; Air Quality: 8.18\n", - "Date: 2024-04-03; Air Quality: 8.18\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "The air quality on April 3 in London is expected to be safe for outdoor activities. The air quality index is around 8.18, which falls within the moderate range. This means that while the air may not be perfect, it is generally safe for most people to go outside and engage in physical activities.\n" + "Date: 2024-05-14; Air Quality: 5.9\n", + "Date: 2024-05-15; Air Quality: 10.88\n", + "Date: 2024-05-16; Air Quality: 11.99\n", + "Date: 2024-05-17; Air Quality: 11.6\n", + "Date: 2024-05-18; Air Quality: 11.56\n", + "===============\n", + "The air quality on May 18 in London is expected to be slightly unhealthy for sensitive groups, with a reading of 11.56. While it may not be ideal for everyone, those with respiratory issues should take extra precautions. It is still generally safe for most people to go outside, but you may want to limit prolonged exposure and consider using a mask.\n" ] } ], "source": [ - "QUESTION7 = \"What will the air quality be like on April 3 in London?\"\n", + "QUESTION7 = \"What will the air quality be like on May 18 in London?\"\n", "\n", "response7 = generate_response(\n", " QUESTION7,\n", @@ -863,16 +678,17 @@ }, { "cell_type": "code", - "execution_count": 20, - "id": "951363d5", + "execution_count": 19, + "id": "f65aae43", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "🗓️ Today's date: Wednesday, 2024-03-27\n", - "📖 \n" + "🗓️ Today's date: Tuesday, 2024-05-14\n", + "📖 \n", + "===============\n" ] }, { @@ -886,20 +702,23 @@ "name": "stdout", "output_type": "stream", "text": [ + "Sure, I'd be happy to explain the different PM2.5 air quality levels. PM2.5 refers to particulate matter with a diameter of 2.5 micrometers or less. It's a measure of the concentration of these tiny particles in the air.\n", "\n", - "Certainly! PM2.5 levels are categorized as follows:\n", + "Here are the general air quality categories based on PM2.5 levels:\n", "\n", - "1. Good (below 12 µg/m³): At this level, the air quality is considered safe for everyone, including those who are sensitive to air pollution. It's a great time to go for a walk or engage in outdoor activities.\n", + "1. Good (0-12 µg/m³): At this level, the air quality is considered to be safe and suitable for all populations, including those who are sensitive to air pollution.\n", "\n", - "2. Moderate (12-18 µg/m³): While the air quality is generally safe, people with respiratory issues or sensitivities may experience some discomfort. It's still suitable for outdoor activities, but those with breathing concerns should take precautions.\n", + "2. Moderate (12-35 µg/m³): The air quality is generally safe, but people who are sensitive to air pollution may experience mild discomfort. It's usually safe for most activities, including outdoor exercise.\n", "\n", - "3. Poor (18-25 µg/m³): At this level, air quality is considered unhealthy for sensitive groups, such as children, the elderly, and those with respiratory conditions. It's advisable to limit prolonged outdoor exertion and consider indoor activities.\n", + "3. Unhealthy for Sensitive Groups (35-55 µg/m³): People with lung or heart conditions, children, and the elderly may experience health effects. It's generally safe for most people, but sensitive groups should avoid prolonged outdoor exertion.\n", "\n", - "4. Very poor (25-35 µg/m³): The air quality is unhealthy for the general population, and it's advised to minimize outdoor activities, especially for sensitive groups.\n", + "4. Unhealthy (55-150 µg/m³): Everyone may experience health effects, including respiratory symptoms, eye irritation, and aggravation of heart and lung diseases. It's advisable to limit outdoor activities, especially for children and people with pre-existing health conditions.\n", "\n", - "5. Hazardous (above 35 µg/m³): At this level, air quality is considered severely polluted and poses a significant health risk to everyone. It's crucial to limit outdoor activities and stay indoors with air filtration systems in place.\n", + "5. Very Unhealthy (150-250 µg/m³): This level poses a significant health risk for everyone, with symptoms like respiratory problems, heart attacks, and premature death possible. It's crucial to avoid all outdoor activities, especially for sensitive groups.\n", "\n", - "Remember, these are general guidelines, and it's always best to consult local air quality advisories for the most accurate and up-to-date information.\n" + "6. Hazardous (>250 µg/m³): This is an emergency situation where the air quality is extremely dangerous. It can cause serious health effects, including serious respiratory problems, heart attacks, and even death. Everyone should avoid all outdoor activities and stay indoors with air filtration systems in place.\n", + "\n", + "Please remember that these are general guidelines, and local air quality standards may vary. It's always best to consult local air quality reports for the most accurate information.\n" ] } ], @@ -922,7 +741,7 @@ }, { "cell_type": "markdown", - "id": "b711f003", + "id": "41f8615a", "metadata": {}, "source": [ "---\n", @@ -932,8 +751,8 @@ }, { "cell_type": "code", - "execution_count": 21, - "id": "ad2bd599", + "execution_count": 20, + "id": "5538f395", "metadata": {}, "outputs": [], "source": [ @@ -946,8 +765,8 @@ }, { "cell_type": "code", - "execution_count": 22, - "id": "4ea97858", + "execution_count": 21, + "id": "5618e195", "metadata": {}, "outputs": [ { @@ -968,27 +787,28 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "1eeea10a", + "execution_count": 22, + "id": "62ae66ea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2024-03-27 14:42:13,003 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (8.23s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "2024-05-14 20:28:16,300 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (8.38s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for London:\n", - "Date: 2024-03-18; Air Quality: 12.7\n", - "Date: 2024-03-19; Air Quality: 9.7\n", - "Date: 2024-03-20; Air Quality: 15.6\n", - "Date: 2024-03-21; Air Quality: 16.7\n", - "Date: 2024-03-22; Air Quality: 8.7\n", - "Date: 2024-03-23; Air Quality: 5.4\n", - "Date: 2024-03-24; Air Quality: 6.4\n", - "2024-03-27 14:42:41,602 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Last week in London, the air quality varied, starting off at a moderate level of 12.7 on the 18th. It slightly improved to 9.7 by the 19th, indicating relatively clean air that would be quite suitable for outdoor activities. There was a slight uptick in pollutants midweek, with air quality readings reaching 15.6 and 16.7 on the 20th and 21st respectively, suggesting a decrease in air quality but still remaining within a range considered safe, although individuals with sensitivity to air pollution might have experienced some discomfort. The air quality then improved significantly towards the end of the week, dropping to very good levels of 8.7 on the 22nd, 5.4 on the 23rd, and 6.4 on the 24th, indicating cleaner air and ideal conditions for spending time outdoors.\n" + "Date: 2024-05-06; Air Quality: 16.4\n", + "Date: 2024-05-07; Air Quality: 14.2\n", + "Date: 2024-05-08; Air Quality: 15.1\n", + "Date: 2024-05-09; Air Quality: 23.4\n", + "Date: 2024-05-10; Air Quality: 26.2\n", + "Date: 2024-05-11; Air Quality: 23.1\n", + "Date: 2024-05-12; Air Quality: 16.5\n", + "Date: 2024-05-13; Air Quality: 10.5\n", + "2024-05-14 20:28:40,843 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Last week in London, the air quality varied, starting at a good level of 16.4 on the 6th of May, indicating it was quite safe for outdoor activities. It slightly improved further on the 7th with a level of 14.2, and remained fairly stable and good on the 8th at 15.1, suggesting that conditions were conducive for spending time outside. However, there was a noticeable increase in pollution levels starting from the 9th of May, peaking on the 10th with an air quality level of 26.2, which indicated a decline in air quality and might have made outdoor activities less advisable for sensitive groups. The air quality then slightly improved to 23.1 on the 11th, but still remained at levels where people with respiratory conditions should be cautious. The week ended with a return to a good air quality level of 16.5 on the 12th, making it safer again for outdoor activities. By the 13th, the air quality significantly improved to an excellent level of 10.5, suggesting very clean air and optimal conditions for all activities outdoors.\n" ] } ], @@ -1008,22 +828,22 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "441f51e2", + "execution_count": 23, + "id": "5978d4e5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2024-03-27 14:42:43,248 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (9.63s) \n", - "🗓️ Today's date: Wednesday, 2024-03-27\n", + "2024-05-14 20:28:42,202 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.96s) \n", + "🗓️ Today's date: Tuesday, 2024-05-14\n", "📖 Air Quality Measurements for Chicago:\n", - "Date: 2024-03-27; Air Quality: 3.0\n", - "Date: 2024-03-28; Air Quality: 8.06\n", - "2024-03-27 14:43:02,325 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", - "The air quality in Chicago tomorrow is expected to be at 8.06, which indicates a moderate level of pollutants. It's still relatively safe for most people, but individuals who are especially sensitive to air pollution might want to limit their outdoor activities. It's a good day to keep an eye on any changes if you have respiratory conditions or other health concerns related to air quality.\n" + "Date: 2024-05-14; Air Quality: 15.0\n", + "Date: 2024-05-15; Air Quality: 8.76\n", + "2024-05-14 20:28:57,762 INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", + "The air quality in Chicago tomorrow will be excellent, with a reading of 8.76. It will be a wonderful day to enjoy outdoor activities, such as going for a walk or a bike ride, as the air will be very clean and healthy to breathe.\n" ] } ], @@ -1044,7 +864,7 @@ }, { "cell_type": "markdown", - "id": "d7d5577e", + "id": "76fd05ea", "metadata": {}, "source": [ "---" @@ -1053,7 +873,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -1067,7 +887,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/advanced_tutorials/air_quality/functions/llm_chain.py b/advanced_tutorials/air_quality/functions/llm_chain.py index d4ee9fdd..fbdb6ce3 100644 --- a/advanced_tutorials/air_quality/functions/llm_chain.py +++ b/advanced_tutorials/air_quality/functions/llm_chain.py @@ -2,8 +2,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig from langchain.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate -from langchain.chains.llm import LLMChain from langchain.memory import ConversationBufferWindowMemory +from langchain.schema.output_parser import StrOutputParser import torch import datetime from typing import Any, Dict, Union @@ -63,36 +63,36 @@ def get_prompt_template(): instructions, previous conversation, context, date and user query. """ prompt_template = """<|im_start|>system -You are one of the best air quality experts in the world. - -###INSTRUCTIONS: -- If you don't know the answer, you will respond politely that you cannot help. -- Use the context table with air quality indicators for city provided by user to generate your answer. -- You answer should be at least one sentence. -- Do not show any calculations to the user. -- Make sure that you use correct air quality indicators for the corresponding date. -- Add a rich analysis of the air quality level, such as whether it is safe, whether to go for a walk, etc. -- Do not mention in your answer that you are using context table. -<|im_end|> - -### CONTEXT: -{context} - -IMPORTANT: Today is {date_today}. - -<|im_start|>user -{question}<|im_end|> -<|im_start|>assistant""" + You are one of the best air quality experts in the world. + + ###INSTRUCTIONS: + - If you don't know the answer, you will respond politely that you cannot help. + - Use the context table with air quality indicators for city provided by user to generate your answer. + - You answer should be at least one sentence. + - Do not show any calculations to the user. + - Make sure that you use correct air quality indicators for the corresponding date. + - Add a rich analysis of the air quality level, such as whether it is safe, whether to go for a walk, etc. + - Do not mention in your answer that you are using context table. + <|im_end|> + + ### CONTEXT: + {context} + + IMPORTANT: Today is {date_today}. + + <|im_start|>user + {question}<|im_end|> + <|im_start|>assistant""" return prompt_template -def get_llm_chain(model_llm, tokenizer): +def get_llm_chain(tokenizer, model_llm): """ Create and configure a language model chain. Args: - model_llm: The pre-trained language model for text generation. tokenizer: The tokenizer corresponding to the language model. + model_llm: The pre-trained language model for text generation. Returns: LLMChain: The configured language model chain. @@ -124,11 +124,7 @@ def get_llm_chain(model_llm, tokenizer): ) # Create LLM chain - llm_chain = LLMChain( - llm=mistral_llm, - prompt=prompt, - verbose=False, - ) + llm_chain = prompt | mistral_llm | StrOutputParser() return llm_chain @@ -176,6 +172,7 @@ def generate_response( if verbose: print(f"🗓️ Today's date: {date_today}") print(f'📖 {context}') + print('===' * 5) # Invoke the language model chain with relevant context model_output = llm_chain.invoke({ @@ -185,7 +182,7 @@ def generate_response( }) # Return the generated text from the model output - return model_output['text'].split('<|im_start|>assistant')[-1] + return model_output.split('<|im_start|>assistant')[-1].strip() def generate_response_openai( @@ -212,6 +209,7 @@ def generate_response_openai( if verbose: print(f"🗓️ Today's date: {date_today}") print(f'📖 {context}') + print('===' * 5) instructions = get_prompt_template().split('<|im_start|>user')[0]