From ac82da83a0e20659f8d5bc7062415fba5b5c9e3e Mon Sep 17 00:00:00 2001 From: Maksym Zhytnikov <63515947+Maxxx-zh@users.noreply.github.com> Date: Mon, 26 Feb 2024 11:15:09 +0200 Subject: [PATCH 1/3] Function Calling --- .../1_air_quality_feature_backfill.ipynb | 965 +++++++++--- .../2_air_quality_feature_pipeline.ipynb | 936 ++++++++++-- .../3_air_quality_training_pipeline.ipynb | 462 ++++-- .../4_air_quality_batch_inference.ipynb | 549 ++++++- .../air_quality/5_function_calling.ipynb | 1309 +++++++++++++++++ 5 files changed, 3682 insertions(+), 539 deletions(-) create mode 100644 advanced_tutorials/air_quality/5_function_calling.ipynb diff --git a/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb b/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb index f203e073..1c4bb853 100644 --- a/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb +++ b/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "73ee3ec9", + "id": "3cdebecf", "metadata": {}, "source": [ "# **Hopsworks Feature Store** \n", @@ -12,27 +12,42 @@ "**Note**: This tutorial does not support Google Colab.\n", "\n", "## ๐Ÿ—’๏ธ This notebook is divided into the following sections:\n", - "1. Fetch historical data\n", - "2. Connect to the Hopsworks feature store\n", - "3. Create feature groups and insert them to the feature store\n", + "\n", + "1. Fetch historical data.\n", + "2. Connect to the Hopsworks feature store.\n", + "3. Create feature groups and insert them to the feature store.\n", "\n", "![tutorial-flow](../../images/01_featuregroups.png)" ] }, { "cell_type": "markdown", - "id": "f04d5c5e", + "id": "0e2fd829", "metadata": {}, "source": [ - "### ๐Ÿ“ Imports" + "## ๐Ÿ“ Imports" ] }, { "cell_type": "code", - "execution_count": null, - "id": "a03d0127", + "execution_count": 1, + "id": "73f04813", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "tensorflow 2.11.0 requires protobuf<3.20,>=3.9.2, but you have protobuf 4.25.3 which is incompatible.\n", + "tensorboard 2.11.2 requires protobuf<4,>=3.9.2, but you have protobuf 4.25.3 which is incompatible.\n", + "ray 2.0.0 requires protobuf<4.0.0,>=3.15.3, but you have protobuf 4.25.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m" + ] + } + ], "source": [ "!pip install -U hopsworks --quiet\n", "!pip install geopy folium streamlit-folium --q" @@ -40,8 +55,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "cd165941", + "execution_count": 2, + "id": "d46a2162", "metadata": {}, "outputs": [], "source": [ @@ -62,15 +77,7 @@ }, { "cell_type": "markdown", - "id": "ba9903fc", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "b7a1965a-0da7-4263-a68a-8b2e8cb753f1", + "id": "d4b00c77", "metadata": {}, "source": [ "## ๐ŸŒ Representing the Target cities " @@ -78,8 +85,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "bd578db1-69e7-4230-b3f2-807b8056283a", + "execution_count": 3, + "id": "0c4cefac", "metadata": { "tags": [] }, @@ -95,8 +102,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "ea972c52-bfad-465d-b1e1-50eeff99b482", + "execution_count": 5, + "id": "d47f6f59", "metadata": {}, "outputs": [], "source": [ @@ -109,13 +116,13 @@ " location=coords,\n", " popup=city_name,\n", " ).add_to(my_map)\n", - "my_map" + "#my_map" ] }, { "cell_type": "code", "execution_count": null, - "id": "fb5ecf81-647b-490a-92b1-f7e963413710", + "id": "53f1b78d", "metadata": {}, "outputs": [], "source": [ @@ -125,7 +132,7 @@ }, { "cell_type": "markdown", - "id": "2246ca9d", + "id": "970c179a", "metadata": {}, "source": [ "## ๐ŸŒซ Processing Air Quality data" @@ -133,7 +140,7 @@ }, { "cell_type": "markdown", - "id": "b4a1c5d1", + "id": "bbcf628e", "metadata": {}, "source": [ "### [๐Ÿ‡ช๐Ÿ‡บ EEA](https://discomap.eea.europa.eu/map/fme/AirQualityExport.htm)\n", @@ -142,12 +149,39 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "96b8be01-6286-4886-8043-56e0e49b314e", + "execution_count": 6, + "id": "c4a76e26", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Amsterdam': [52.37, 4.89],\n", + " 'Athina': [37.98, 23.73],\n", + " 'Berlin': [52.52, 13.39],\n", + " 'Gdansk': [54.37, 18.61],\n", + " 'Krakรณw': [50.06, 19.94],\n", + " 'London': [51.51, -0.13],\n", + " 'Madrid': [40.42, -3.7],\n", + " 'Marseille': [43.3, 5.37],\n", + " 'Milano': [45.46, 9.19],\n", + " 'Mรผnchen': [48.14, 11.58],\n", + " 'Napoli': [40.84, 14.25],\n", + " 'Paris': [48.85, 2.35],\n", + " 'Sevilla': [37.39, -6.0],\n", + " 'Stockholm': [59.33, 18.07],\n", + " 'Tallinn': [59.44, 24.75],\n", + " 'Varna': [43.21, 27.92],\n", + " 'Wien': [48.21, 16.37]}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# EU Cities \n", "target_cities[\"EU\"]" @@ -155,47 +189,96 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "5bb2a868-5f3a-4065-b651-318c24826b97", + "execution_count": 12, + "id": "65dff77b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โ›ณ๏ธ Size of this dataframe: (63548, 3)\n", + "โ›ณ๏ธ Missing Values: 0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatepm2_5
11887Gdansk2014-09-2123.0
17498Krakรณw2019-10-2356.0
42593Paris2016-08-047.0
\n", + "
" + ], + "text/plain": [ + " city_name date pm2_5\n", + "11887 Gdansk 2014-09-21 23.0\n", + "17498 Krakรณw 2019-10-23 56.0\n", + "42593 Paris 2016-08-04 7.0" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read the CSV file from the specified URL into a pandas DataFrame\n", - "df_eu = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_eu.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5620df22-f744-4550-a81a-7e5d71aae542", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Check for missing values in the 'df_eu' DataFrame\n", - "df_eu.isna().sum().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b0e23728-a01d-45bc-bf25-4a9c77f21d66", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ + "df_eu = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_eu.csv\")\n", + "\n", "# Print the size of the 'df_eu' DataFrame (number of rows and columns)\n", "print(\"โ›ณ๏ธ Size of this dataframe:\", df_eu.shape)\n", "\n", + "# Check for missing values in the 'df_eu' DataFrame\n", + "print(f'โ›ณ๏ธ Missing Values: {df_eu.isna().sum().sum()}')\n", + "\n", "# Display a random sample of three rows from the 'df_eu' DataFrame\n", "df_eu.sample(3)" ] }, { "cell_type": "markdown", - "id": "c2e45567-dd6b-4e5e-a153-82a2f4f32fbc", + "id": "3f9ca2fd", "metadata": {}, "source": [ "### [๐Ÿ‡บ๐Ÿ‡ธ USEPA](https://aqs.epa.gov/aqsweb/documents/data_api.html#daily)\n", @@ -206,12 +289,35 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c4952759-0fb9-4229-8b78-2e37cffb144d", + "execution_count": 13, + "id": "26e9d8f1", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Albuquerque': [35.08, -106.65],\n", + " 'Atlanta': [33.75, -84.39],\n", + " 'Chicago': [41.88, -87.62],\n", + " 'Columbus': [39.96, -83.0],\n", + " 'Dallas': [32.78, -96.8],\n", + " 'Denver': [39.74, -104.98],\n", + " 'Houston': [29.76, -95.37],\n", + " 'Los Angeles': [34.05, -118.24],\n", + " 'New York': [40.71, -74.01],\n", + " 'Phoenix-Mesa': [33.66, -112.04],\n", + " 'Salt Lake City': [40.76, -111.89],\n", + " 'San Francisco': [37.78, -122.42],\n", + " 'Tampa': [27.95, -82.46]}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# US Cities \n", "target_cities[\"US\"]" @@ -219,49 +325,98 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c6aceaee-9431-48fd-818a-41fbdd07575c", + "execution_count": 16, + "id": "88aeafb6", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โ›ณ๏ธ Size of this dataframe: (46037, 3)\n", + "โ›ณ๏ธ Missing Values: 0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datecity_namepm2_5
214762015-01-14Houston11.3
263212018-11-28Los Angeles7.8
430022014-09-01Tampa11.8
\n", + "
" + ], + "text/plain": [ + " date city_name pm2_5\n", + "21476 2015-01-14 Houston 11.3\n", + "26321 2018-11-28 Los Angeles 7.8\n", + "43002 2014-09-01 Tampa 11.8" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read the CSV file from the specified URL into a pandas DataFrame\n", - "df_us = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_us.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e7ff20e-8a1a-4fa3-b801-71beead7b5f2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Check for missing values in the 'df_us' DataFrame\n", - "df_us.isna().sum().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3818e3e1-8674-4634-9023-92be8410fba5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ + "df_us = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_us.csv\")\n", + "\n", "# Print the size of the 'df_us' DataFrame (number of rows and columns)\n", "print(\"โ›ณ๏ธ Size of this dataframe:\", df_us.shape)\n", "\n", + "# Check for missing values in the 'df_us' DataFrame\n", + "print(f'โ›ณ๏ธ Missing Values: {df_us.isna().sum().sum()}')\n", + "\n", "# Display a random sample of three rows from the 'df_us' DataFrame\n", "df_us.sample(3)" ] }, { "cell_type": "markdown", - "id": "25557752-31c8-4da9-a52c-4415c4d20ae3", + "id": "3d9caa58", "metadata": {}, "source": [ "### ๐Ÿข Processing special city - `Seattle`\n", @@ -271,72 +426,135 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "2f54d2cb-991c-47cb-a686-76c9f7a87170", + "execution_count": 15, + "id": "13e7c33a", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'Bellevue-SE 12th St': [47.60086, -122.1484],\n", + " 'DARRINGTON - FIR ST (Darrington High School)': [48.2469, -121.6031],\n", + " 'KENT - JAMES & CENTRAL': [47.38611, -122.23028],\n", + " 'LAKE FOREST PARK TOWNE CENTER': [47.755, -122.2806],\n", + " 'MARYSVILLE - 7TH AVE (Marysville Junior High)': [48.05432, -122.17153],\n", + " 'NORTH BEND - NORTH BEND WAY': [47.49022, -121.77278],\n", + " 'SEATTLE - BEACON HILL': [47.56824, -122.30863],\n", + " 'SEATTLE - DUWAMISH': [47.55975, -122.33827],\n", + " 'SEATTLE - SOUTH PARK #2': [47.53091, -122.3208],\n", + " 'Seattle-10th & Weller': [47.59722, -122.31972],\n", + " 'TACOMA - ALEXANDER AVE': [47.2656, -122.3858],\n", + " 'TACOMA - L STREET': [47.1864, -122.4517],\n", + " 'Tacoma-S 36th St': [47.22634, -122.46256],\n", + " 'Tukwila Allentown': [47.49854, -122.27839],\n", + " 'Tulalip-Totem Beach Rd': [48.06534, -122.28519]}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "target_cities[\"Seattle\"]" ] }, { "cell_type": "code", - "execution_count": null, - "id": "31c8505d-68bc-40b6-be0f-42d8532dbd48", + "execution_count": 17, + "id": "78ee1447", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โ›ณ๏ธ Size of this dataframe: (46479, 3)\n", + "โ›ณ๏ธ Missing Values: 0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatepm2_5
8709SEATTLE - BEACON HILL2015-11-059.5
6634DARRINGTON - FIR ST (Darrington High School)2014-06-241.7
45134NORTH BEND - NORTH BEND WAY2023-01-120.3
\n", + "
" + ], + "text/plain": [ + " city_name date pm2_5\n", + "8709 SEATTLE - BEACON HILL 2015-11-05 9.5\n", + "6634 DARRINGTON - FIR ST (Darrington High School) 2014-06-24 1.7\n", + "45134 NORTH BEND - NORTH BEND WAY 2023-01-12 0.3" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read the CSV file from the specified URL into a pandas DataFrame\n", - "df_seattle = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_seattle.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2f6583c9-3b2a-41c6-a020-aeede88c4867", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Check for missing values in the 'df_seattle' DataFrame\n", - "df_seattle.isna().sum().sum()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "065a5b03-28f7-475c-9c6a-4340388157d8", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ + "df_seattle = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_pm2_5_seattle.csv\")\n", + "\n", "# Print the size of the 'df_seattle' DataFrame (number of rows and columns)\n", "print(\"โ›ณ๏ธ Size of this dataframe:\", df_seattle.shape)\n", + "\n", + "# Check for missing values in the 'df_seattle' DataFrame\n", + "print(f'โ›ณ๏ธ Missing Values: {df_seattle.isna().sum().sum()}')\n", + "\n", + "# Display a random sample of three rows\n", "df_seattle.sample(3)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3b17ca4-0e9d-4207-ad62-90ea9c157def", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Value Counts\n", - "df_seattle['city_name'].value_counts()" - ] - }, { "cell_type": "markdown", - "id": "c278a55d-f083-4f95-b292-92e545b9c408", + "id": "d58b59fd", "metadata": {}, "source": [ "### ๐ŸŒŸ All together" @@ -344,12 +562,94 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "0d55ae92-4bf9-43ae-8841-6767f5f68bec", + "execution_count": 19, + "id": "a727a333", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โ›ณ๏ธ DF shape: (156064, 3)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatepm2_5
106487Tampa2014-06-309.3
12453Gdansk2016-04-099.0
101342Salt Lake City2020-04-295.8
46538Sevilla2017-02-128.0
117821Seattle-10th & Weller2015-12-125.7
\n", + "
" + ], + "text/plain": [ + " city_name date pm2_5\n", + "106487 Tampa 2014-06-30 9.3\n", + "12453 Gdansk 2016-04-09 9.0\n", + "101342 Salt Lake City 2020-04-29 5.8\n", + "46538 Sevilla 2017-02-12 8.0\n", + "117821 Seattle-10th & Weller 2015-12-12 5.7" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Concatenate the DataFrames df_eu, df_us, and df_seattle along the rows and reset the index\n", "df_air_quality = pd.concat(\n", @@ -365,18 +665,18 @@ }, { "cell_type": "markdown", - "id": "22896049-441d-4baf-b717-415123cb39d7", + "id": "7ff3a932", "metadata": { "tags": [] }, "source": [ - "### ๐Ÿ›  Feature Engineering" + "## ๐Ÿ›  Feature Engineering" ] }, { "cell_type": "code", - "execution_count": null, - "id": "140b468a-e0c2-44a1-8e44-4cf393407eca", + "execution_count": 20, + "id": "d1ecd72e", "metadata": { "tags": [] }, @@ -388,12 +688,23 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "87dc89c0-72a7-4be6-b4e4-03d5d32be546", + "execution_count": 21, + "id": "c559d20d", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Apply feature engineering to the df_air_quality DataFrame using the air_quality.feature_engineer_aq() function\n", "df_air_quality = air_quality.feature_engineer_aq(df_air_quality)\n", @@ -407,12 +718,23 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "94f67c89-6b39-4748-b4be-6ed3c9d57f96", + "execution_count": 22, + "id": "92722e0e", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(154533, 31)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Print the shape (number of rows and columns) of the df_air_quality DataFrame\n", "df_air_quality.shape" @@ -420,12 +742,32 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "ed9bc7f1-d62e-4b1f-97af-6ecd30fe4b67", + "execution_count": 23, + "id": "00608459", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['city_name', 'date', 'pm2_5', 'pm_2_5_previous_1_day',\n", + " 'pm_2_5_previous_2_day', 'pm_2_5_previous_3_day',\n", + " 'pm_2_5_previous_4_day', 'pm_2_5_previous_5_day',\n", + " 'pm_2_5_previous_6_day', 'pm_2_5_previous_7_day', 'mean_7_days',\n", + " 'mean_14_days', 'mean_28_days', 'std_7_days', 'exp_mean_7_days',\n", + " 'exp_std_7_days', 'std_14_days', 'exp_mean_14_days', 'exp_std_14_days',\n", + " 'std_28_days', 'exp_mean_28_days', 'exp_std_28_days', 'year',\n", + " 'day_of_month', 'month', 'day_of_week', 'is_weekend', 'sin_day_of_year',\n", + " 'cos_day_of_year', 'sin_day_of_week', 'cos_day_of_week'],\n", + " dtype='object')" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Retrieve and display the column names of the df_air_quality DataFrame\n", "df_air_quality.columns" @@ -433,15 +775,7 @@ }, { "cell_type": "markdown", - "id": "88a9e0ef-e9d2-4e3c-91af-c4e619b8c906", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "4687e802", + "id": "2ad26f52", "metadata": { "tags": [] }, @@ -451,42 +785,124 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c46283b4", + "execution_count": 27, + "id": "25410ee6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatetemperature_maxtemperature_minprecipitation_sumrain_sumsnowfall_sumprecipitation_hourswind_speed_maxwind_gusts_maxwind_direction_dominant
0Amsterdam2013-01-019.25.510.210.20.014.032.062.6255
1Amsterdam2013-01-027.85.60.50.50.02.022.939.6251
2Amsterdam2013-01-0310.38.22.02.00.06.022.239.2255
\n", + "
" + ], + "text/plain": [ + " city_name date temperature_max temperature_min precipitation_sum \\\n", + "0 Amsterdam 2013-01-01 9.2 5.5 10.2 \n", + "1 Amsterdam 2013-01-02 7.8 5.6 0.5 \n", + "2 Amsterdam 2013-01-03 10.3 8.2 2.0 \n", + "\n", + " rain_sum snowfall_sum precipitation_hours wind_speed_max \\\n", + "0 10.2 0.0 14.0 32.0 \n", + "1 0.5 0.0 2.0 22.9 \n", + "2 2.0 0.0 6.0 22.2 \n", + "\n", + " wind_gusts_max wind_direction_dominant \n", + "0 62.6 255 \n", + "1 39.6 251 \n", + "2 39.2 255 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Read the CSV file from the specified URL into a pandas DataFrame for weather data\n", - "df_weather = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_weather.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1921b61c-d002-417e-88a6-9fe1cad0a7d4", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Count the occurrences of each unique value in the 'city_name' column of the df_weather DataFrame\n", - "df_weather.city_name.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8d5dcd0a", - "metadata": {}, - "outputs": [], - "source": [ + "df_weather = pd.read_csv(\"https://repo.hops.works/dev/davit/air_quality/backfill_weather.csv\")\n", + "\n", "# Display the first three rows of the df_weather DataFrame\n", "df_weather.head(3)" ] }, { "cell_type": "markdown", - "id": "cc9b7ad6", + "id": "bae7c356", "metadata": {}, "source": [ "---" @@ -494,8 +910,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a8f886c3-a5ac-4370-a6a2-22838ab7409e", + "execution_count": 28, + "id": "5526fc60", "metadata": { "tags": [] }, @@ -516,26 +932,29 @@ }, { "cell_type": "markdown", - "id": "f2ebd846-0420-4e4c-8a5b-0827fa91c693", + "id": "6b35ec15", "metadata": {}, "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "cb6f83ba", - "metadata": {}, - "source": [ - "### ๐Ÿ”ฎ Connecting to Hopsworks Feature Store " + "## ๐Ÿ”ฎ Connecting to Hopsworks Feature Store " ] }, { "cell_type": "code", - "execution_count": null, - "id": "dd068240", + "execution_count": 29, + "id": "cfe90a1e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n", + "\n", + "Logged in to project, explore it here https://snurran.hops.works/p/5242\n", + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], "source": [ "import hopsworks\n", "\n", @@ -546,7 +965,7 @@ }, { "cell_type": "markdown", - "id": "63d8c3b9", + "id": "cef1c97b", "metadata": {}, "source": [ "## ๐Ÿช„ Creating Feature Groups" @@ -554,7 +973,7 @@ }, { "cell_type": "markdown", - "id": "4a2515c4", + "id": "749153f8", "metadata": {}, "source": [ "### ๐ŸŒซ Air Quality Data" @@ -562,13 +981,62 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "9d7088a8", + "execution_count": 30, + "id": "475f6ee6", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature Group created successfully, explore it at \n", + "https://snurran.hops.works/p/5242/fs/5190/fg/5194\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6677d288bc0746a48faf802be7032e40", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Uploading Dataframe: 0.00% | | Rows 0/154533 | Elapsed Time: 00:00 | Remaining Time: ?" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching job: air_quality_1_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://snurran.hops.works/p/5242/jobs/named/air_quality_1_offline_fg_materialization/executions\n" + ] + }, + { + "data": { + "text/plain": [ + "(, None)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Get or create feature group\n", "air_quality_fg = fs.get_or_create_feature_group(\n", @@ -577,23 +1045,14 @@ " version=1,\n", " primary_key=['unix_time','city_name'],\n", " event_time=[\"unix_time\"],\n", - ") " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e04a975-bb58-42e2-9abd-90e68ae37864", - "metadata": {}, - "outputs": [], - "source": [ + ") \n", "# Insert data\n", "air_quality_fg.insert(df_air_quality)" ] }, { "cell_type": "markdown", - "id": "a73a9029", + "id": "855e3754", "metadata": {}, "source": [ "### ๐ŸŒฆ Weather Data" @@ -601,10 +1060,52 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "acc2b799", + "execution_count": 31, + "id": "2e99385d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature Group created successfully, explore it at \n", + "https://snurran.hops.works/p/5242/fs/5190/fg/5195\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7328db1bd3b84e769e7b4ac88c1416a6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Uploading Dataframe: 0.00% | | Rows 0/168975 | Elapsed Time: 00:00 | Remaining Time: ?" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching job: weather_1_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://snurran.hops.works/p/5242/jobs/named/weather_1_offline_fg_materialization/executions\n" + ] + }, + { + "data": { + "text/plain": [ + "(, None)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Get or create feature group\n", "weather_fg = fs.get_or_create_feature_group(\n", @@ -613,27 +1114,17 @@ " version=1,\n", " primary_key=['unix_time','city_name'],\n", " event_time=[\"unix_time\"],\n", - ") " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9583b4d1-e2e3-4f56-9e5d-23caa0c49457", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ + ") \n", "# Insert data\n", "weather_fg.insert(df_weather)" ] }, { "cell_type": "markdown", - "id": "87c668dd", + "id": "ec169da5", "metadata": {}, "source": [ + "---\n", "## โญ๏ธ **Next:** Part 02: Feature Pipeline \n", " \n", "\n", @@ -657,7 +1148,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb index 580e8fc7..6240eef6 100644 --- a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb +++ b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb @@ -2,19 +2,21 @@ "cells": [ { "cell_type": "markdown", - "id": "dd094af7", + "id": "2552d8bf", "metadata": {}, "source": [ "# **Hopsworks Feature Store** - Part 02: Feature Pipeline\n", "\n", "## ๐Ÿ—’๏ธ This notebook is divided into the following sections:\n", - "1. Parse Data\n", - "2. Feature Group Insertion" + "\n", + "1. Fetch Feature Groups. \n", + "2. Parse Data.\n", + "3. Feature Group Insertion." ] }, { "cell_type": "markdown", - "id": "a7dcc328", + "id": "b231c0db", "metadata": {}, "source": [ "### ๐Ÿ“ Imports" @@ -22,8 +24,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "364e961e", + "execution_count": 1, + "id": "edf983f6", "metadata": {}, "outputs": [], "source": [ @@ -42,8 +44,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "50d04cc5-6788-4a4c-9f87-c2e00b5fce49", + "execution_count": 2, + "id": "dbc36bb3", "metadata": { "tags": [] }, @@ -57,12 +59,23 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b0d2261f-8907-44f4-9f1a-bd9ec5e1556f", + "execution_count": 3, + "id": "d07e46c6", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.date(2024, 2, 23), '2024-02-23')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Getting the current date\n", "today = datetime.date.today()\n", @@ -73,7 +86,7 @@ }, { "cell_type": "markdown", - "id": "d406b01d", + "id": "50e64602", "metadata": {}, "source": [ "### ๐Ÿ”ฎ Connecting to Hopsworks Feature Store " @@ -81,16 +94,35 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "8ba3cb02", + "execution_count": 4, + "id": "06651b20", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n", + "\n", + "Logged in to project, explore it here https://snurran.hops.works/p/5242\n", + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], "source": [ "import hopsworks\n", "\n", "project = hopsworks.login()\n", - "fs = project.get_feature_store() \n", - "\n", + "fs = project.get_feature_store() " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4a6f5fc2", + "metadata": {}, + "outputs": [], + "source": [ "# Retrieve feature groups\n", "air_quality_fg = fs.get_feature_group(\n", " name='air_quality',\n", @@ -104,15 +136,7 @@ }, { "cell_type": "markdown", - "id": "c7f61053-a8c0-48a7-afa4-0e8733d2a54a", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "459ee37e-7e74-4051-97f6-2e03f9cac9d8", + "id": "640ab38b", "metadata": {}, "source": [ "## ๐ŸŒซ Filling gaps in Air Quality data (PM2.5)" @@ -120,12 +144,21 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "76ae9dd9-ab28-41d1-8478-5af27b7f767e", + "execution_count": 6, + "id": "edc5510b", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (3.26s) \n", + "Finished: Reading data from Hopsworks, using ArrowFlight (2.25s) \n" + ] + } + ], "source": [ "# Read data from feature groups\n", "df_air_quality = air_quality_fg.read()\n", @@ -134,8 +167,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "03063bc6-b58f-47f4-bfc6-8020ec196478", + "execution_count": 7, + "id": "cfb667b0", "metadata": { "tags": [] }, @@ -154,12 +187,21 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "5e868bdf-e91a-410a-b654-a315c605f3dc", + "execution_count": 8, + "id": "74862993", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โ›ณ๏ธ Last update for Paris: 2024-02-23\n", + "โ›ณ๏ธ Last update for Columbus: 2024-02-23\n" + ] + } + ], "source": [ "# Accessing the last updated date for the city of Paris\n", "paris_last_date = last_dates_aq.get(\"Paris\", \"Not available\")\n", @@ -172,9 +214,22 @@ "print(\"โ›ณ๏ธ Last update for Columbus:\", columbus_last_date)" ] }, + { + "cell_type": "code", + "execution_count": 9, + "id": "7790102a", + "metadata": {}, + "outputs": [], + "source": [ + "for city, date in last_dates_aq.items():\n", + " city_last_date = datetime.datetime.strptime(date, \"%Y-%m-%d\").date()\n", + " if (today - city_last_date) <= datetime.timedelta(days=28):\n", + " last_dates_aq[city] = (city_last_date - datetime.timedelta(days=28)).strftime(\"%Y-%m-%d\")" + ] + }, { "cell_type": "markdown", - "id": "77c4ee8d-7f7e-4bd0-a97b-c3ac0d7db50f", + "id": "67fd48eb", "metadata": {}, "source": [ "### ๐Ÿง™๐Ÿผโ€โ™‚๏ธ Parsing PM2.5 data" @@ -182,13 +237,159 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "112a7974-37cb-4195-bc71-328af428c491", + "execution_count": 10, + "id": "ed8324e7", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processed PM2_5 for Amsterdam since 2024-01-26 till 2024-02-23.\n", + "Took 0.12 sec.\n", + "\n", + "Processed PM2_5 for Athina since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Berlin since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Gdansk since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Krakรณw since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for London since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Madrid since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Marseille since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Milano since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Mรผnchen since 2024-01-26 till 2024-02-23.\n", + "Took 0.16 sec.\n", + "\n", + "Processed PM2_5 for Napoli since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Paris since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Sevilla since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Stockholm since 2024-01-26 till 2024-02-23.\n", + "Took 0.12 sec.\n", + "\n", + "Processed PM2_5 for Tallinn since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Varna since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Wien since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Albuquerque since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Atlanta since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Chicago since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Columbus since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Dallas since 2024-01-26 till 2024-02-23.\n", + "Took 0.14 sec.\n", + "\n", + "Processed PM2_5 for Denver since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Houston since 2024-01-26 till 2024-02-23.\n", + "Took 0.14 sec.\n", + "\n", + "Processed PM2_5 for Los Angeles since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for New York since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Phoenix-Mesa since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Salt Lake City since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for San Francisco since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Tampa since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Bellevue-SE 12th St since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-01-26 till 2024-02-23.\n", + "Took 0.17 sec.\n", + "\n", + "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Seattle-10th & Weller since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for TACOMA - L STREET since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Tacoma-S 36th St since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Tukwila Allentown since 2024-01-26 till 2024-02-23.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-01-26 till 2024-02-23.\n", + "Took 0.1 sec.\n", + "\n", + "----------------------------------------------------------------\n", + "Parsed new PM2.5 data for ALL locations up to 2024-02-23.\n", + "Took 5.04 sec.\n", + "\n" + ] + } + ], "source": [ "# Storing the current time as the start time of the cell execution\n", "start_of_cell = time.time()\n", @@ -223,17 +424,78 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "1afdc6a5", + "execution_count": 11, + "id": "c47cfcb6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatepm2_5
1302Tulalip-Totem Beach Rd2024-02-218.4
1303Tulalip-Totem Beach Rd2024-02-223.4
1304Tulalip-Totem Beach Rd2024-02-238.7
\n", + "
" + ], + "text/plain": [ + " city_name date pm2_5\n", + "1302 Tulalip-Totem Beach Rd 2024-02-21 8.4\n", + "1303 Tulalip-Totem Beach Rd 2024-02-22 3.4\n", + "1304 Tulalip-Totem Beach Rd 2024-02-23 8.7" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df_aq_raw.tail(3)" ] }, { "cell_type": "markdown", - "id": "250d9daf-83fa-49f1-bcd8-4efaeb90b99c", + "id": "9054f8d7", "metadata": { "tags": [] }, @@ -243,8 +505,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "140b468a-e0c2-44a1-8e44-4cf393407eca", + "execution_count": 12, + "id": "bc1b957c", "metadata": { "tags": [] }, @@ -256,27 +518,200 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "acc181a9-6183-45ec-aed2-8ee684e13b39", + "execution_count": 13, + "id": "be43a5b0", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatepm2_5pm_2_5_previous_1_daypm_2_5_previous_2_daypm_2_5_previous_3_daypm_2_5_previous_4_daypm_2_5_previous_5_daypm_2_5_previous_6_daypm_2_5_previous_7_day...exp_std_28_daysyearday_of_monthmonthday_of_weekis_weekendsin_day_of_yearcos_day_of_yearsin_day_of_weekcos_day_of_week
1302Krakรณw2024-02-2322.218.723.819.226.727.726.265.4...17.5119332024232400.8013610.598181-0.433884-0.900969
1303Columbus2024-02-2323.317.010.322.817.89.55.65.8...5.5251282024232400.8013610.598181-0.433884-0.900969
1304Milano2024-02-2324.449.262.879.592.2119.4114.2110.1...31.1529442024232400.8013610.598181-0.433884-0.900969
\n", + "

3 rows ร— 31 columns

\n", + "
" + ], + "text/plain": [ + " city_name date pm2_5 pm_2_5_previous_1_day \\\n", + "1302 Krakรณw 2024-02-23 22.2 18.7 \n", + "1303 Columbus 2024-02-23 23.3 17.0 \n", + "1304 Milano 2024-02-23 24.4 49.2 \n", + "\n", + " pm_2_5_previous_2_day pm_2_5_previous_3_day pm_2_5_previous_4_day \\\n", + "1302 23.8 19.2 26.7 \n", + "1303 10.3 22.8 17.8 \n", + "1304 62.8 79.5 92.2 \n", + "\n", + " pm_2_5_previous_5_day pm_2_5_previous_6_day pm_2_5_previous_7_day \\\n", + "1302 27.7 26.2 65.4 \n", + "1303 9.5 5.6 5.8 \n", + "1304 119.4 114.2 110.1 \n", + "\n", + " ... exp_std_28_days year day_of_month month day_of_week \\\n", + "1302 ... 17.511933 2024 23 2 4 \n", + "1303 ... 5.525128 2024 23 2 4 \n", + "1304 ... 31.152944 2024 23 2 4 \n", + "\n", + " is_weekend sin_day_of_year cos_day_of_year sin_day_of_week \\\n", + "1302 0 0.801361 0.598181 -0.433884 \n", + "1303 0 0.801361 0.598181 -0.433884 \n", + "1304 0 0.801361 0.598181 -0.433884 \n", + "\n", + " cos_day_of_week \n", + "1302 -0.900969 \n", + "1303 -0.900969 \n", + "1304 -0.900969 \n", + "\n", + "[3 rows x 31 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Applying a feature engineering function 'feature_engineer_aq' to the 'df_aq_update' DataFrame\n", "df_aq_update = air_quality.feature_engineer_aq(df_aq_raw)\n", "\n", "# Dropping rows with missing values in the 'df_aq_update' DataFrame\n", "df_aq_update = df_aq_update.dropna()\n", + "\n", "df_aq_update.tail(3)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "0364873c", + "execution_count": 14, + "id": "8b046956", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Checking the total number of missing values in the 'df_aq_update' DataFrame\n", "df_aq_update.isna().sum().sum()" @@ -284,12 +719,23 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "94f67c89-6b39-4748-b4be-6ed3c9d57f96", + "execution_count": 15, + "id": "6ae477ab", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(45, 31)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Retrieving the dimensions (number of rows and columns) of the 'df_aq_update' DataFrame\n", "df_aq_update.shape" @@ -297,15 +743,7 @@ }, { "cell_type": "markdown", - "id": "d74f5622-6f57-47b9-ac0b-dfb6617847b2", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "95a34c64-5b94-4c4f-b03d-14e12a106f25", + "id": "12f33e25", "metadata": {}, "source": [ "## ๐ŸŒฆ Filling gaps in Weather data" @@ -313,8 +751,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "46009853-160c-467e-abb0-3145d27c57dc", + "execution_count": 16, + "id": "32eeb729", "metadata": { "tags": [] }, @@ -333,7 +771,7 @@ }, { "cell_type": "markdown", - "id": "1fd15812-a3a9-488c-879e-181c7b815357", + "id": "fb144e4a", "metadata": { "tags": [] }, @@ -343,13 +781,159 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "ef027d28-3443-4c7c-9e85-783625301a14", + "execution_count": 17, + "id": "3ed0d92c", "metadata": { "scrolled": true, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parsed weather for Amsterdam since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Athina since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Berlin since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Gdansk since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Krakรณw since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for London since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Madrid since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Marseille since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Milano since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Mรผnchen since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Napoli since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Paris since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Sevilla since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Stockholm since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Tallinn since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Varna since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Wien since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Albuquerque since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Atlanta since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Chicago since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Columbus since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Dallas since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Denver since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Houston since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Los Angeles since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for New York since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Phoenix-Mesa since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Salt Lake City since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for San Francisco since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Tampa since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Bellevue-SE 12th St since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for KENT - JAMES & CENTRAL since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for SEATTLE - BEACON HILL since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for SEATTLE - DUWAMISH since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Seattle-10th & Weller since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for TACOMA - ALEXANDER AVE since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for TACOMA - L STREET since 2024-02-23 till 2024-02-23.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Tacoma-S 36th St since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Tukwila Allentown since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Tulalip-Totem Beach Rd since 2024-02-23 till 2024-02-23.\n", + "Took 2.1 sec.\n", + "\n", + "----------------------------------------------------------------\n", + "Parsed new weather data for ALL cities up to 2024-02-23.\n", + "Took 94.76 sec.\n", + "\n" + ] + } + ], "source": [ "# Storing the current time as the start time of the cell execution\n", "start_of_cell = time.time()\n", @@ -388,8 +972,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a7bff400-a2fb-48a3-a07b-5bd2a0469cd7", + "execution_count": 18, + "id": "a149c4d6", "metadata": { "tags": [] }, @@ -410,12 +994,119 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "11752b30-2f40-4668-9813-2a90199c62b8", + "execution_count": 19, + "id": "7960e1b4", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
city_namedatetemperature_maxtemperature_minprecipitation_sumrain_sumsnowfall_sumprecipitation_hourswind_speed_maxwind_gusts_maxwind_direction_dominantunix_time
42Tacoma-S 36th St2024-02-2312.12.80.00.00.00.08.012.251708646400000
43Tukwila Allentown2024-02-2313.23.30.00.00.00.06.312.63291708646400000
44Tulalip-Totem Beach Rd2024-02-2312.73.70.00.00.00.07.911.53571708646400000
\n", + "
" + ], + "text/plain": [ + " city_name date temperature_max temperature_min \\\n", + "42 Tacoma-S 36th St 2024-02-23 12.1 2.8 \n", + "43 Tukwila Allentown 2024-02-23 13.2 3.3 \n", + "44 Tulalip-Totem Beach Rd 2024-02-23 12.7 3.7 \n", + "\n", + " precipitation_sum rain_sum snowfall_sum precipitation_hours \\\n", + "42 0.0 0.0 0.0 0.0 \n", + "43 0.0 0.0 0.0 0.0 \n", + "44 0.0 0.0 0.0 0.0 \n", + "\n", + " wind_speed_max wind_gusts_max wind_direction_dominant unix_time \n", + "42 8.0 12.2 5 1708646400000 \n", + "43 6.3 12.6 329 1708646400000 \n", + "44 7.9 11.5 357 1708646400000 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Converting the 'date' column in the 'df_aq_update' DataFrame to string format\n", "df_aq_update.date = df_aq_update.date.astype(str)\n", @@ -430,15 +1121,7 @@ }, { "cell_type": "markdown", - "id": "792dd383", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "5aef353d", + "id": "72b1db92", "metadata": { "tags": [] }, @@ -448,10 +1131,44 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "f81bb922", + "execution_count": 20, + "id": "403a8f41", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "13836ef8e3384504b27063ea496c122f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Uploading Dataframe: 0.00% | | Rows 0/45 | Elapsed Time: 00:00 | Remaining Time: ?" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching job: air_quality_1_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://snurran.hops.works/p/5242/jobs/named/air_quality_1_offline_fg_materialization/executions\n" + ] + }, + { + "data": { + "text/plain": [ + "(, None)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Insert new data\n", "air_quality_fg.insert(df_aq_update)" @@ -459,10 +1176,44 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "be0c498e", + "execution_count": 21, + "id": "fc2cb1d5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "01811207264a4dc8a78130e301d43f16", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Uploading Dataframe: 0.00% | | Rows 0/45 | Elapsed Time: 00:00 | Remaining Time: ?" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launching job: weather_1_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://snurran.hops.works/p/5242/jobs/named/weather_1_offline_fg_materialization/executions\n" + ] + }, + { + "data": { + "text/plain": [ + "(, None)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Insert new data\n", "weather_fg.insert(df_weather_update)" @@ -470,19 +1221,20 @@ }, { "cell_type": "markdown", - "id": "b50c64a1", + "id": "309617db", "metadata": {}, "source": [ + "---\n", "## โญ๏ธ **Next:** Part 03: Training Pipeline\n", " \n", "\n", - "In the following notebook you will read from a feature group and create training dataset within the feature store\n" + "In the following notebook you will create a feature view, create a training dataset, train a model and save it in the Hopsworks Model Registry." ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -496,7 +1248,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.11" }, "vscode": { "interpreter": { diff --git a/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb b/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb index 2104e1f5..1f54825c 100644 --- a/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb +++ b/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb @@ -2,28 +2,29 @@ "cells": [ { "cell_type": "markdown", - "id": "7eb83ff8", + "id": "77d695b6", "metadata": { "tags": [] }, "source": [ "# **Hopsworks Feature Store** - Part 03: Training Pipeline\n", "\n", - "This notebook explains how to read from a feature group and create training dataset within the feature store\n", + "This notebook explains how to create a feature view, create a training dataset, train a model and save it in the Hopsworks Model Registry.\n", "\n", "## ๐Ÿ—’๏ธ This notebook is divided into the following sections:\n", "\n", - "1. Fetch Feature Groups\n", - "2. Define Transformation functions\n", - "4. Create Feature Views\n", - "5. Create Training Dataset with training, validation and test splits\n", + "1. Fetch Feature Groups.\n", + "2. Create a Feature View.\n", + "3. Create a Training Dataset.\n", + "4. Train a model.\n", + "5. Save trained model in the Model Registry.\n", "\n", "![part2](../../images/02_training-dataset.png) " ] }, { "cell_type": "markdown", - "id": "f3b5f602-a575-49a8-bce9-a997cca936e0", + "id": "a4e180e5", "metadata": {}, "source": [ "### ๐Ÿ“ Imports" @@ -31,22 +32,39 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "ad609eec-0b46-445f-a0f5-5657e5f69866", + "execution_count": 1, + "id": "dca2f8a6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m" + ] + } + ], "source": [ "!pip install xgboost --q" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b3f2ac81-423a-4380-8fd6-b70aa55eb864", + "execution_count": 2, + "id": "e07a7998", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-02-20 21:27:58,492 INFO: generated new fontManager\n" + ] + } + ], "source": [ "import os\n", "import datetime\n", @@ -73,7 +91,7 @@ }, { "cell_type": "markdown", - "id": "a0b3bcd1", + "id": "4046f61d", "metadata": {}, "source": [ "## ๐Ÿ“ก Connecting to Hopsworks Feature Store " @@ -81,10 +99,21 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "89ad779f", + "execution_count": 3, + "id": "74e9b3c5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n", + "\n", + "Logged in to project, explore it here https://snurran.hops.works/p/5242\n", + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], "source": [ "import hopsworks\n", "\n", @@ -95,8 +124,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "735a083e", + "execution_count": 4, + "id": "90851146", "metadata": {}, "outputs": [], "source": [ @@ -113,18 +142,16 @@ }, { "cell_type": "markdown", - "id": "be427dca", + "id": "58c4715e", "metadata": {}, "source": [ - "--- \n", - "\n", - "## ๐Ÿ– Feature View Creation and Retrieving " + "## ๐Ÿ– Feature View Creation and Retrieval " ] }, { "cell_type": "code", - "execution_count": null, - "id": "cc3192d3", + "execution_count": 5, + "id": "dce6c4ac", "metadata": {}, "outputs": [], "source": [ @@ -137,8 +164,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b3b8ba7b-b0ab-4ea5-b050-f8e1faf43c27", + "execution_count": 6, + "id": "2e63d523", "metadata": { "scrolled": true, "tags": [] @@ -151,7 +178,7 @@ }, { "cell_type": "markdown", - "id": "d83a1681", + "id": "eb62cf36", "metadata": {}, "source": [ "`Feature Views` stands between **Feature Groups** and **Training Dataset**. ะกombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n", @@ -175,10 +202,19 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "403df0b4", + "execution_count": 7, + "id": "5394c761", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature view created successfully, explore it at \n", + "https://snurran.hops.works/p/5242/fs/5190/fv/air_quality_fv/version/1\n" + ] + } + ], "source": [ "# Get or create the 'air_quality_fv' feature view\n", "feature_view = fs.get_or_create_feature_view(\n", @@ -190,19 +226,17 @@ }, { "cell_type": "markdown", - "id": "0c723c54", + "id": "a71e815b", "metadata": {}, "source": [ - "For now `Feature View` is saved in Hopsworks and you can retrieve it using `FeatureStore.get_feature_view()`." + "For now, your `Feature View` is saved in Hopsworks and you can retrieve it using `FeatureStore.get_feature_view()`." ] }, { "cell_type": "markdown", - "id": "6e1187a2", + "id": "409936f9", "metadata": {}, "source": [ - "---\n", - "\n", "## ๐Ÿ‹๏ธ Training Dataset Creation\n", "\n", "In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n", @@ -228,10 +262,25 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "2f5bcf22-6ff1-4995-a8c3-11a1dab396a7", + "execution_count": 8, + "id": "dcd1c91f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (8.70s) \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "VersionWarning: Incremented version to `1`.\n" + ] + } + ], "source": [ "X, _ = feature_view.training_data(\n", " description = 'Air Quality dataset',\n", @@ -240,15 +289,7 @@ }, { "cell_type": "markdown", - "id": "c995b340-5ba6-4116-b8b6-86ca34f0a0ab", - "metadata": {}, - "source": [ - "---" - ] - }, - { - "cell_type": "markdown", - "id": "95783124-8303-47c5-bd15-2804efa15611", + "id": "3ab95b51", "metadata": {}, "source": [ "## ๐Ÿงฌ Modeling" @@ -256,25 +297,25 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "5b937dec", + "execution_count": 9, + "id": "9ebd4ae2", "metadata": {}, "outputs": [], "source": [ - "# Creating a LabelEncoder object\n", + "# Create a LabelEncoder object\n", "label_encoder = LabelEncoder()\n", "\n", - "# Fitting the encoder to the data in the 'city_name' column\n", + "# Fit the encoder to the data in the 'city_name' column\n", "label_encoder.fit(X[['city_name']])\n", "\n", - "# Transforming the 'city_name' column data using the fitted encoder\n", + "# Transform the 'city_name' column data using the fitted encoder\n", "encoded = label_encoder.transform(X[['city_name']])" ] }, { "cell_type": "code", - "execution_count": null, - "id": "97cdb6bb-6c9c-44b7-9171-1b420bae9181", + "execution_count": 10, + "id": "b50de188", "metadata": { "tags": [] }, @@ -292,90 +333,167 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4df41c7d-00bd-4203-90a1-8cc298508d68", + "execution_count": 11, + "id": "02e2f796", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Extracting the target variable 'pm2_5' from the DataFrame 'X' and assigning it to the variable 'y'\n", + "# Extract the target variable 'pm2_5' from the DataFrame 'X' and assigning it to the variable 'y'\n", "y = X.pop('pm2_5')" ] }, { "cell_type": "code", - "execution_count": null, - "id": "d0299506-195f-4ebc-b43e-347fe59db31c", + "execution_count": 12, + "id": "43cdecb5", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Splitting the data into training and testing sets using the train_test_split function\n", + "# Split the data into training and testing sets using the train_test_split function\n", "X_train, X_test, y_train, y_test = train_test_split(\n", - " X, y, test_size=0.2, random_state=42)" + " X, \n", + " y, \n", + " test_size=0.2, \n", + " random_state=42,\n", + ")\n", + "\n", + "X_train.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de9eb5ff", + "metadata": {}, + "outputs": [], + "source": [ + "y_train.head(3)" ] }, { "cell_type": "markdown", - "id": "8fd4e24e-7f02-4944-a309-6475b65e7846", + "id": "9bd07554", "metadata": {}, "source": [ - "### โš–๏ธ Model Validation" + "## ๐Ÿƒ๐Ÿปโ€โ™‚๏ธ Model Training" ] }, { "cell_type": "code", - "execution_count": null, - "id": "4e5be9f8-0f88-4a7e-8fc1-65ec8b02920d", + "execution_count": 17, + "id": "5c73f5b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+       "             colsample_bylevel=None, colsample_bynode=None,\n",
+       "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "             gamma=None, grow_policy=None, importance_type=None,\n",
+       "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create an instance of the XGBoost Regressor\n", + "xgb_regressor = XGBRegressor()\n", + "\n", + "# Fit the XGBoost Regressor to the training data\n", + "xgb_regressor.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "id": "6f78ae54", + "metadata": {}, + "source": [ + "## โš–๏ธ Model Validation" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "431e1863", "metadata": { "tags": [] }, - "outputs": [], - "source": [ - "# Storing the current time as the start time of the cell execution\n", - "start_of_cell = time.time()\n", - "\n", - "# Creating an instance of the XGBoost Regressor\n", - "xgb_regressor = XGBRegressor()\n", - "\n", - "# Fitting the XGBoost Regressor to the training data\n", - "xgb_regressor.fit(X_train, y_train)\n", - "\n", - "# Predicting target values on the test set\n", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "โ›ณ๏ธ MSE: 23.90455409357784\n", + "โ›ณ๏ธ RMSE: 4.889228374046137\n", + "โ›ณ๏ธ R^2: 0.7686663896306777\n" + ] + } + ], + "source": [ + "# Predict target values on the test set\n", "y_pred = xgb_regressor.predict(X_test)\n", "\n", - "# Calculating Mean Squared Error (MSE) using sklearn\n", + "# Calculate Mean Squared Error (MSE) using sklearn\n", "mse = mean_squared_error(y_test, y_pred)\n", - "print(\"MSE:\", mse)\n", + "print(\"โ›ณ๏ธ MSE:\", mse)\n", "\n", - "# Calculating Root Mean Squared Error (RMSE) using sklearn\n", + "# Calculate Root Mean Squared Error (RMSE) using sklearn\n", "rmse = mean_squared_error(y_test, y_pred, squared=False)\n", - "print(\"RMSE:\", rmse)\n", + "print(\"โ›ณ๏ธ RMSE:\", rmse)\n", "\n", - "# Calculating R squared using sklearn\n", + "# Calculate R squared using sklearn\n", "r2 = r2_score(y_test, y_pred)\n", - "print(\"R squared:\", r2)\n", - "\n", - "# Storing the current time as the end time of the cell execution\n", - "end_of_cell = time.time()\n", - "\n", - "# Printing information about the execution, including the time taken\n", - "print(f\"Took {round(end_of_cell - start_of_cell, 2)} sec.\\n\")" + "print(\"โ›ณ๏ธ R^2:\", r2)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ac31f9fb-7904-416a-9938-e85320340412", + "execution_count": 20, + "id": "d23df9c5", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# Creating a DataFrame 'df_' to store true and predicted values for evaluation\n", - "df_ = pd.DataFrame({\n", + "# Create a DataFrame 'df_' to store true and predicted values for evaluation\n", + "df_pred = pd.DataFrame({\n", " \"y_true\": y_test,\n", " \"y_pred\": y_pred,\n", "})" @@ -383,55 +501,71 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "f2fc8448-2150-4cfd-803d-afbd4845b59e", + "execution_count": 21, + "id": "9ca2eee4", "metadata": { "tags": [] }, - "outputs": [], - "source": [ - "# Creating a residual plot using Seaborn\n", - "residplot = sns.residplot(data=df_, x=\"y_true\", y=\"y_pred\", color='orange')\n", - "\n", - "# Adding title, xlabel, and ylabel to the residual plot\n", + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create a residual plot using Seaborn\n", + "residplot = sns.residplot(data=df_pred, x=\"y_true\", y=\"y_pred\", color='orange')\n", + "\n", + "# Add title, xlabel, and ylabel to the residual plot\n", "plt.title('Model Residuals')\n", "plt.xlabel('Observation #')\n", "plt.ylabel('Error')\n", "\n", - "# Displaying the residual plot\n", + "# Display the residual plot\n", "plt.show()\n", "\n", - "# Getting the figure from the residual plot and displaying it separately\n", + "# Get the figure from the residual plot and displaying it separately\n", "fig = residplot.get_figure()\n", "fig.show()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "5ae4e226-7a93-4e4d-8131-c1de62a7b6f9", + "execution_count": 24, + "id": "5596efc5", "metadata": { "tags": [] }, - "outputs": [], - "source": [ - "# Plotting feature importances using the plot_importance function from XGBoost\n", - "# 'xgb_regressor' is the trained XGBoost Regressor\n", - "# Setting 'max_num_features' to 25 to display the top 25 most important features\n", - "plot_importance(xgb_regressor, max_num_features=25)" - ] - }, - { - "cell_type": "markdown", - "id": "3dcea831-6c21-4396-a0ce-0631d21d1875", - "metadata": {}, - "source": [ - "---" + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot feature importances using the plot_importance function from XGBoost\n", + "plot_importance(\n", + " xgb_regressor, \n", + " max_num_features=25, # Display the top 25 most important features\n", + ")\n", + "plt.show()" ] }, { "cell_type": "markdown", - "id": "c066fe79-315e-4b85-b2ab-32d503679dc7", + "id": "4d7cf0fc", "metadata": { "tags": [] }, @@ -443,12 +577,20 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a787ec40-6bd7-4950-aa5d-bf004e1e5ade", + "execution_count": 25, + "id": "680b8270", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], "source": [ "# Retrieve the model registry\n", "mr = project.get_model_registry()" @@ -456,7 +598,7 @@ }, { "cell_type": "markdown", - "id": "7d240dc7-8a02-47b2-9667-7483508b2d24", + "id": "97bb4f4f", "metadata": {}, "source": [ "### โš™๏ธ Model Schema" @@ -464,7 +606,7 @@ }, { "cell_type": "markdown", - "id": "5c658df3-56a4-450b-90ee-127d0afe5b74", + "id": "0b0bd961", "metadata": {}, "source": [ "The model needs to be set up with a [Model Schema](https://docs.hopsworks.ai/machine-learning-api/latest/generated/model_schema/), which describes the inputs and outputs for a model.\n", @@ -474,8 +616,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "cd3f3751", + "execution_count": 26, + "id": "8314a355", "metadata": { "scrolled": true }, @@ -484,47 +626,79 @@ "from hsml.schema import Schema\n", "from hsml.model_schema import ModelSchema\n", "\n", - "# Creating input and output schemas using the 'Schema' class for features (X) and target variable (y)\n", + "# Create input and output schemas using the 'Schema' class for features (X) and target variable (y)\n", "input_schema = Schema(X)\n", "output_schema = Schema(y)\n", "\n", - "# Creating a model schema using 'ModelSchema' with the input and output schemas\n", + "# Create a model schema using 'ModelSchema' with the input and output schemas\n", "model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)\n", "\n", - "# Converting the model schema to a dictionary representation\n", + "# Convert the model schema to a dictionary representation\n", "schema_dict = model_schema.to_dict()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "d2777f5e", + "execution_count": 27, + "id": "ca017146", "metadata": { "scrolled": true }, "outputs": [], "source": [ - "# Creating a directory for the model artifacts if it doesn't exist\n", + "# Create a directory for the model artifacts if it doesn't exist\n", "model_dir = \"air_quality_model\"\n", "if os.path.isdir(model_dir) == False:\n", " os.mkdir(model_dir)\n", "\n", - "# Saving the label encoder and XGBoost regressor as joblib files in the model directory\n", + "# Save the label encoder and XGBoost regressor as joblib files in the model directory\n", "joblib.dump(label_encoder, model_dir + '/label_encoder.pkl')\n", "joblib.dump(xgb_regressor, model_dir + '/xgboost_regressor.pkl')\n", "\n", - "# Saving the residual plot figure as an image in the model directory\n", + "# Save the residual plot figure as an image in the model directory\n", "fig.savefig(model_dir + \"/residplot.png\")" ] }, { "cell_type": "code", - "execution_count": null, - "id": "41f6811e", + "execution_count": 28, + "id": "ebff7e6e", "metadata": {}, - "outputs": [], - "source": [ - "# Creating a Python model in the model registry named 'air_quality_xgboost_model'\n", + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c82967d3656441cf8dcaaeea8470903c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/6 [00:00 **Hopsworks Feature Store** - Part 04: Batch Inference\n", @@ -10,12 +10,14 @@ "## ๐Ÿ—’๏ธ This notebook is divided into the following sections:\n", "\n", "1. Load batch data.\n", - "2. Predict using model from Model Registry." + "2. Retrieve your trained model from the Model Registry.\n", + "3. Load batch data.\n", + "4. Predict batch data." ] }, { "cell_type": "markdown", - "id": "8855ee1a", + "id": "ded483d3", "metadata": {}, "source": [ "## ๐Ÿ“ Imports" @@ -23,8 +25,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "019c9226", + "execution_count": 1, + "id": "65a6a697", "metadata": {}, "outputs": [], "source": [ @@ -36,18 +38,36 @@ }, { "cell_type": "markdown", - "id": "ce2fe8a8", + "id": "c06c5552", "metadata": {}, "source": [ - "## ๐Ÿ“ก Connecting to Hopsworks Feature Store " + "## ๐Ÿ“ก Connect to Hopsworks Feature Store " ] }, { "cell_type": "code", - "execution_count": null, - "id": "39f83bc9", + "execution_count": 2, + "id": "2dcd1a04", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: log4j.properties is not found. HADOOP_CONF_DIR may be incomplete.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n", + "\n", + "Logged in to project, explore it here https://snurran.hops.works/p/5242\n", + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], "source": [ "import hopsworks\n", "\n", @@ -58,16 +78,16 @@ }, { "cell_type": "markdown", - "id": "87485ee0", + "id": "8b0649e3", "metadata": {}, "source": [ - "## โš™๏ธ Feature View Retrieval\n" + "## โš™๏ธ Feature View Retrieval" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e622d6b4", + "execution_count": 3, + "id": "fc7b3682", "metadata": {}, "outputs": [], "source": [ @@ -80,18 +100,26 @@ }, { "cell_type": "markdown", - "id": "e1dac8b6", + "id": "99be7bd0", "metadata": {}, "source": [ - "## ๐Ÿ—„ Model Registry\n" + "## ๐Ÿ—„ Model Registry" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ca35a9f4", + "execution_count": 4, + "id": "8d864db5", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], "source": [ "# Retrieve the model registry\n", "mr = project.get_model_registry()" @@ -99,47 +127,240 @@ }, { "cell_type": "markdown", - "id": "6f3589dc", + "id": "63665afa", "metadata": {}, "source": [ - "## ๐Ÿช Retrieving model from Model Registry" + "## ๐Ÿช Retrieve model from Model Registry" ] }, { "cell_type": "code", - "execution_count": null, - "id": "6ac8014f", + "execution_count": 5, + "id": "07d9a4c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-02-21 12:14:03,447 main ERROR Cannot access RandomAccessFile java.io.IOException: Could not create directory /srv/hops/hadoop-3.2.0.12-EE-RC0/logs java.io.IOException: Could not create directory /srv/hops/hadoop-3.2.0.12-EE-RC0/logs\n", + "\tat org.apache.logging.log4j.core.util.FileUtils.mkdir(FileUtils.java:128)\n", + "\tat org.apache.logging.log4j.core.util.FileUtils.makeParentDirs(FileUtils.java:141)\n", + "\tat org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager$RollingRandomAccessFileManagerFactory.createManager(RollingRandomAccessFileManager.java:231)\n", + "\tat org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager$RollingRandomAccessFileManagerFactory.createManager(RollingRandomAccessFileManager.java:204)\n", + "\tat org.apache.logging.log4j.core.appender.AbstractManager.getManager(AbstractManager.java:144)\n", + "\tat org.apache.logging.log4j.core.appender.OutputStreamManager.getManager(OutputStreamManager.java:100)\n", + "\tat org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager.getRollingRandomAccessFileManager(RollingRandomAccessFileManager.java:107)\n", + "\tat org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender$Builder.build(RollingRandomAccessFileAppender.java:132)\n", + "\tat org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender$Builder.build(RollingRandomAccessFileAppender.java:53)\n", + "\tat org.apache.logging.log4j.core.config.plugins.util.PluginBuilder.build(PluginBuilder.java:124)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createPluginObject(AbstractConfiguration.java:1138)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createConfiguration(AbstractConfiguration.java:1063)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createConfiguration(AbstractConfiguration.java:1055)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.doConfigure(AbstractConfiguration.java:664)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.initialize(AbstractConfiguration.java:258)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.start(AbstractConfiguration.java:304)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.setConfiguration(LoggerContext.java:621)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:694)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:711)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.start(LoggerContext.java:253)\n", + "\tat org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:155)\n", + "\tat org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:47)\n", + "\tat org.apache.logging.log4j.LogManager.getContext(LogManager.java:309)\n", + "\tat org.apache.log4j.Hierarchy$PrivateLogManager.getContext(Hierarchy.java:86)\n", + "\tat org.apache.log4j.Hierarchy.getContext(Hierarchy.java:99)\n", + "\tat org.apache.log4j.Category.(Category.java:188)\n", + "\tat org.apache.log4j.Logger.(Logger.java:57)\n", + "\tat org.apache.log4j.spi.RootLogger.(RootLogger.java:39)\n", + "\tat org.apache.log4j.LogManager.(LogManager.java:72)\n", + "\tat org.apache.log4j.Logger.getLogger(Logger.java:40)\n", + "\tat org.apache.commons.logging.impl.Log4JLogger.getLogger(Log4JLogger.java:262)\n", + "\tat org.apache.commons.logging.impl.Log4JLogger.(Log4JLogger.java:108)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\n", + "\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\n", + "\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.createLogFromClass(LogFactoryImpl.java:1025)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.discoverLogImplementation(LogFactoryImpl.java:844)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.newInstance(LogFactoryImpl.java:541)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.getInstance(LogFactoryImpl.java:292)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.getInstance(LogFactoryImpl.java:269)\n", + "\tat org.apache.commons.logging.LogFactory.getLog(LogFactory.java:657)\n", + "\tat org.apache.hadoop.fs.FileSystem.(FileSystem.java:139)\n", + "\n", + "2024-02-21 12:14:03,450 main ERROR Could not create plugin of type class org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender for element RollingRandomAccessFile: java.lang.IllegalStateException: ManagerFactory [org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager$RollingRandomAccessFileManagerFactory@1623b78d] unable to create manager for [/srv/hops/hadoop/logs/hadoop.log] with data [org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager$FactoryData@c8c12ac] java.lang.IllegalStateException: ManagerFactory [org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager$RollingRandomAccessFileManagerFactory@1623b78d] unable to create manager for [/srv/hops/hadoop/logs/hadoop.log] with data [org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager$FactoryData@c8c12ac]\n", + "\tat org.apache.logging.log4j.core.appender.AbstractManager.getManager(AbstractManager.java:146)\n", + "\tat org.apache.logging.log4j.core.appender.OutputStreamManager.getManager(OutputStreamManager.java:100)\n", + "\tat org.apache.logging.log4j.core.appender.rolling.RollingRandomAccessFileManager.getRollingRandomAccessFileManager(RollingRandomAccessFileManager.java:107)\n", + "\tat org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender$Builder.build(RollingRandomAccessFileAppender.java:132)\n", + "\tat org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender$Builder.build(RollingRandomAccessFileAppender.java:53)\n", + "\tat org.apache.logging.log4j.core.config.plugins.util.PluginBuilder.build(PluginBuilder.java:124)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createPluginObject(AbstractConfiguration.java:1138)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createConfiguration(AbstractConfiguration.java:1063)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createConfiguration(AbstractConfiguration.java:1055)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.doConfigure(AbstractConfiguration.java:664)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.initialize(AbstractConfiguration.java:258)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.start(AbstractConfiguration.java:304)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.setConfiguration(LoggerContext.java:621)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:694)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:711)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.start(LoggerContext.java:253)\n", + "\tat org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:155)\n", + "\tat org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:47)\n", + "\tat org.apache.logging.log4j.LogManager.getContext(LogManager.java:309)\n", + "\tat org.apache.log4j.Hierarchy$PrivateLogManager.getContext(Hierarchy.java:86)\n", + "\tat org.apache.log4j.Hierarchy.getContext(Hierarchy.java:99)\n", + "\tat org.apache.log4j.Category.(Category.java:188)\n", + "\tat org.apache.log4j.Logger.(Logger.java:57)\n", + "\tat org.apache.log4j.spi.RootLogger.(RootLogger.java:39)\n", + "\tat org.apache.log4j.LogManager.(LogManager.java:72)\n", + "\tat org.apache.log4j.Logger.getLogger(Logger.java:40)\n", + "\tat org.apache.commons.logging.impl.Log4JLogger.getLogger(Log4JLogger.java:262)\n", + "\tat org.apache.commons.logging.impl.Log4JLogger.(Log4JLogger.java:108)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\n", + "\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\n", + "\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.createLogFromClass(LogFactoryImpl.java:1025)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.discoverLogImplementation(LogFactoryImpl.java:844)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.newInstance(LogFactoryImpl.java:541)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.getInstance(LogFactoryImpl.java:292)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.getInstance(LogFactoryImpl.java:269)\n", + "\tat org.apache.commons.logging.LogFactory.getLog(LogFactory.java:657)\n", + "\tat org.apache.hadoop.fs.FileSystem.(FileSystem.java:139)\n", + "\n", + "2024-02-21 12:14:03,451 main ERROR Unable to invoke factory method in class org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender for element RollingRandomAccessFile: java.lang.IllegalStateException: No factory method found for class org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender java.lang.IllegalStateException: No factory method found for class org.apache.logging.log4j.core.appender.RollingRandomAccessFileAppender\n", + "\tat org.apache.logging.log4j.core.config.plugins.util.PluginBuilder.findFactoryMethod(PluginBuilder.java:260)\n", + "\tat org.apache.logging.log4j.core.config.plugins.util.PluginBuilder.build(PluginBuilder.java:136)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createPluginObject(AbstractConfiguration.java:1138)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createConfiguration(AbstractConfiguration.java:1063)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.createConfiguration(AbstractConfiguration.java:1055)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.doConfigure(AbstractConfiguration.java:664)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.initialize(AbstractConfiguration.java:258)\n", + "\tat org.apache.logging.log4j.core.config.AbstractConfiguration.start(AbstractConfiguration.java:304)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.setConfiguration(LoggerContext.java:621)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:694)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.reconfigure(LoggerContext.java:711)\n", + "\tat org.apache.logging.log4j.core.LoggerContext.start(LoggerContext.java:253)\n", + "\tat org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:155)\n", + "\tat org.apache.logging.log4j.core.impl.Log4jContextFactory.getContext(Log4jContextFactory.java:47)\n", + "\tat org.apache.logging.log4j.LogManager.getContext(LogManager.java:309)\n", + "\tat org.apache.log4j.Hierarchy$PrivateLogManager.getContext(Hierarchy.java:86)\n", + "\tat org.apache.log4j.Hierarchy.getContext(Hierarchy.java:99)\n", + "\tat org.apache.log4j.Category.(Category.java:188)\n", + "\tat org.apache.log4j.Logger.(Logger.java:57)\n", + "\tat org.apache.log4j.spi.RootLogger.(RootLogger.java:39)\n", + "\tat org.apache.log4j.LogManager.(LogManager.java:72)\n", + "\tat org.apache.log4j.Logger.getLogger(Logger.java:40)\n", + "\tat org.apache.commons.logging.impl.Log4JLogger.getLogger(Log4JLogger.java:262)\n", + "\tat org.apache.commons.logging.impl.Log4JLogger.(Log4JLogger.java:108)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\n", + "\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\n", + "\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.createLogFromClass(LogFactoryImpl.java:1025)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.discoverLogImplementation(LogFactoryImpl.java:844)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.newInstance(LogFactoryImpl.java:541)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.getInstance(LogFactoryImpl.java:292)\n", + "\tat org.apache.commons.logging.impl.LogFactoryImpl.getInstance(LogFactoryImpl.java:269)\n", + "\tat org.apache.commons.logging.LogFactory.getLog(LogFactory.java:657)\n", + "\tat org.apache.hadoop.fs.FileSystem.(FileSystem.java:139)\n", + "\n", + "2024-02-21 12:14:03,452 main ERROR Null object returned for RollingRandomAccessFile in Appenders.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "SLF4J: Class path contains multiple SLF4J bindings.\n", + "SLF4J: Found binding in [jar:file:/srv/hops/hadoop-3.2.0.12-EE-RC0/share/hadoop/common/lib/log4j-slf4j-impl-2.19.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n", + "SLF4J: Found binding in [jar:file:/srv/hops/hadoop-3.2.0.12-EE-RC0/share/hadoop/hdfs/lib/log4j-slf4j-impl-2.19.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n", + "SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.\n", + "SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading model artifact (0 dirs, 6 files)... DONE\r" + ] + } + ], "source": [ - "# Retrieving the 'air_quality_xgboost_model' from the model registry\n", + "# Retrieve the 'air_quality_xgboost_model' from the model registry\n", "retrieved_model = mr.get_model(\n", " name=\"air_quality_xgboost_model\",\n", " version=1,\n", ")\n", "\n", - "# Downloading the saved model artifacts to a local directory\n", + "# Download the saved model artifacts to a local directory\n", "saved_model_dir = retrieved_model.download()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "3812f78d", + "execution_count": 6, + "id": "08d3163c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+       "             colsample_bylevel=None, colsample_bynode=None,\n",
+       "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "             gamma=None, grow_policy=None, importance_type=None,\n",
+       "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Loading the XGBoost regressor model and label encoder from the saved model directory\n", + "# Load the XGBoost regressor model and label encoder from the saved model directory\n", "retrieved_xgboost_model = joblib.load(saved_model_dir + \"/xgboost_regressor.pkl\")\n", "retrieved_encoder = joblib.load(saved_model_dir + \"/label_encoder.pkl\")\n", "\n", - "# Displaying the retrieved XGBoost regressor model\n", + "# Display the retrieved XGBoost regressor model\n", "retrieved_xgboost_model" ] }, { "cell_type": "markdown", - "id": "9a762442", + "id": "5cebaf44", "metadata": {}, "source": [ "## โœจ Load Batch Data of last days\n", @@ -149,38 +370,59 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4bd49291", + "execution_count": 7, + "id": "80fe3f50", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'2024-01-22'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Getting the current date\n", + "# Get the current date\n", "today = datetime.date.today()\n", "\n", - "# Calculating a date threshold 30 days ago from the current date\n", + "# Calculate a date threshold 30 days ago from the current date\n", "date_threshold = today - datetime.timedelta(days=30)\n", "\n", - "# Converting the date threshold to a string format\n", + "# Convert the date threshold to a string format\n", "str(date_threshold)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "3990e55f", + "execution_count": 8, + "id": "a12b7a70", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (7.95s) \n" + ] + } + ], "source": [ - "# Initializing batch scoring\n", + "# Initialize batch scoring\n", "feature_view.init_batch_scoring(1)\n", "\n", - "# Retrieving batch data from the feature view with a start time set to the date threshold\n", - "batch_data = feature_view.get_batch_data(start_time=date_threshold)" + "# Retrieve batch data from the feature view with a start time set to the date threshold\n", + "batch_data = feature_view.get_batch_data(\n", + " start_time=date_threshold,\n", + ")" ] }, { "cell_type": "markdown", - "id": "36f82c4a", + "id": "3ad3a21b", "metadata": {}, "source": [ "### ๐Ÿค– Making the predictions" @@ -188,44 +430,219 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "a10ff736", + "execution_count": 12, + "id": "c503ca0c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pm_2_5_previous_1_daypm_2_5_previous_2_daypm_2_5_previous_3_daypm_2_5_previous_4_daypm_2_5_previous_5_daypm_2_5_previous_6_daypm_2_5_previous_7_daymean_7_daysmean_14_daysmean_28_days...temperature_maxtemperature_minprecipitation_sumrain_sumsnowfall_sumprecipitation_hourswind_speed_maxwind_gusts_maxwind_direction_dominantcity_name_encoded
07.77.46.822.98.611.18.510.4285719.1500009.610714...8.8-3.90.00.00.00.015.132.41083
113.911.915.219.114.810.66.413.1285719.60000011.667857...7.92.50.00.00.00.013.019.48331
22.75.16.93.75.17.610.55.9428575.3071435.285714...10.77.51.91.90.04.018.539.222537
\n", + "

3 rows ร— 38 columns

\n", + "
" + ], + "text/plain": [ + " pm_2_5_previous_1_day pm_2_5_previous_2_day pm_2_5_previous_3_day \\\n", + "0 7.7 7.4 6.8 \n", + "1 13.9 11.9 15.2 \n", + "2 2.7 5.1 6.9 \n", + "\n", + " pm_2_5_previous_4_day pm_2_5_previous_5_day pm_2_5_previous_6_day \\\n", + "0 22.9 8.6 11.1 \n", + "1 19.1 14.8 10.6 \n", + "2 3.7 5.1 7.6 \n", + "\n", + " pm_2_5_previous_7_day mean_7_days mean_14_days mean_28_days ... \\\n", + "0 8.5 10.428571 9.150000 9.610714 ... \n", + "1 6.4 13.128571 9.600000 11.667857 ... \n", + "2 10.5 5.942857 5.307143 5.285714 ... \n", + "\n", + " temperature_max temperature_min precipitation_sum rain_sum \\\n", + "0 8.8 -3.9 0.0 0.0 \n", + "1 7.9 2.5 0.0 0.0 \n", + "2 10.7 7.5 1.9 1.9 \n", + "\n", + " snowfall_sum precipitation_hours wind_speed_max wind_gusts_max \\\n", + "0 0.0 0.0 15.1 32.4 \n", + "1 0.0 0.0 13.0 19.4 \n", + "2 0.0 4.0 18.5 39.2 \n", + "\n", + " wind_direction_dominant city_name_encoded \n", + "0 108 3 \n", + "1 83 31 \n", + "2 225 37 \n", + "\n", + "[3 rows x 38 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Transforming the 'city_name' column in the batch data using the retrieved label encoder\n", + "# Transform the 'city_name' column in the batch data using the retrieved label encoder\n", "encoded = retrieved_encoder.transform(batch_data['city_name'])\n", "\n", - "# Concatenating the label-encoded 'city_name' with the original batch data\n", + "# Concatenate the label-encoded 'city_name' with the original batch data\n", "X_batch = pd.concat([batch_data, pd.DataFrame(encoded)], axis=1)\n", "\n", - "# Dropping unnecessary columns ('date', 'city_name', 'unix_time') from the batch data\n", + "# Drop unnecessary columns ('date', 'city_name', 'unix_time') from the batch data\n", "X_batch = X_batch.drop(columns=['date', 'city_name', 'unix_time'])\n", "\n", - "# Renaming the newly added column with label-encoded city names to 'city_name_encoded'\n", + "# Rename the newly added column with label-encoded city names to 'city_name_encoded'\n", "X_batch = X_batch.rename(columns={0: 'city_name_encoded'})\n", "\n", - "# Extracting the target variable 'pm2_5' from the batch data\n", - "y_batch = X_batch.pop('pm2_5')" + "# Extract the target variable 'pm2_5' from the batch data\n", + "y_batch = X_batch.pop('pm2_5')\n", + "\n", + "X_batch.head(3)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b597ea2b", + "execution_count": 10, + "id": "776d5f3e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([11.763566 , 12.726435 , 3.3570244, 5.7458963, 5.986775 ],\n", + " dtype=float32)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Making predictions on the batch data using the retrieved XGBoost regressor model\n", + "# Make predictions on the batch data using the retrieved XGBoost regressor model\n", "predictions = retrieved_xgboost_model.predict(X_batch)\n", "\n", - "# Displaying the first 5 predictions\n", + "# Display the first 5 predictions\n", "predictions[:5]" ] }, { "cell_type": "markdown", - "id": "80e2b142", + "id": "d2fc393e", "metadata": {}, "source": [ "---\n", @@ -234,17 +651,17 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c208069a", + "execution_count": 11, + "id": "421d3a33", "metadata": {}, "outputs": [], "source": [ - "!python3 -m streamlit run streamlit_app.py" + "# !python3 -m streamlit run streamlit_app.py" ] }, { "cell_type": "markdown", - "id": "c97c7f97", + "id": "e83d324f", "metadata": {}, "source": [ "---\n", @@ -260,7 +677,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -274,7 +691,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.11" } }, "nbformat": 4, diff --git a/advanced_tutorials/air_quality/5_function_calling.ipynb b/advanced_tutorials/air_quality/5_function_calling.ipynb new file mode 100644 index 00000000..35b0d26e --- /dev/null +++ b/advanced_tutorials/air_quality/5_function_calling.ipynb @@ -0,0 +1,1309 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ee392cdb", + "metadata": {}, + "source": [ + "## ๐Ÿ“ Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c89ab053", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", + "\u001b[0m" + ] + } + ], + "source": [ + "!pip install -r requirements.txt --quiet" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6ef71c85", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import transformers\n", + "import torch\n", + "\n", + "import joblib\n", + "import inspect\n", + "import json\n", + "from typing import get_type_hints\n", + "import sys" + ] + }, + { + "cell_type": "markdown", + "id": "f6a870a7", + "metadata": {}, + "source": [ + "## ๐Ÿ”ฎ Connect to Hopsworks Feature Store " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d4b907de", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n", + "\n", + "Logged in to project, explore it here https://snurran.hops.works/p/5242\n", + "Connected. Call `.close()` to terminate connection gracefully.\n" + ] + } + ], + "source": [ + "import hopsworks\n", + "\n", + "project = hopsworks.login()\n", + "\n", + "fs = project.get_feature_store() " + ] + }, + { + "cell_type": "markdown", + "id": "abc6094c", + "metadata": {}, + "source": [ + "## โš™๏ธ Feature View Retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4fb6ceee", + "metadata": {}, + "outputs": [], + "source": [ + "# Retrieve the 'air_quality_fv' feature view\n", + "feature_view = fs.get_feature_view(\n", + " name='air_quality_fv',\n", + " version=1,\n", + ")\n", + "\n", + "# Initialize batch scoring\n", + "feature_view.init_batch_scoring(1)" + ] + }, + { + "cell_type": "markdown", + "id": "afaa9b97", + "metadata": {}, + "source": [ + "## ๐Ÿช Retrieve model from Model Registry" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dd7b9905", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connected. Call `.close()` to terminate connection gracefully.\n", + "Downloading model artifact (0 dirs, 6 files)... DONE\r" + ] + } + ], + "source": [ + "# Retrieve the model registry\n", + "mr = project.get_model_registry()\n", + "\n", + "# Retrieve the 'air_quality_xgboost_model' from the model registry\n", + "retrieved_model = mr.get_model(\n", + " name=\"air_quality_xgboost_model\",\n", + " version=1,\n", + ")\n", + "\n", + "# Download the saved model artifacts to a local directory\n", + "saved_model_dir = retrieved_model.download()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "20cb0255", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+       "             colsample_bylevel=None, colsample_bynode=None,\n",
+       "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+       "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+       "             gamma=None, grow_policy=None, importance_type=None,\n",
+       "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+       "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+       "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
+       "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+       "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load the XGBoost regressor model and label encoder from the saved model directory\n", + "model_air_quality = joblib.load(saved_model_dir + \"/xgboost_regressor.pkl\")\n", + "encoder = joblib.load(saved_model_dir + \"/label_encoder.pkl\")\n", + "\n", + "# Display the retrieved XGBoost regressor model\n", + "model_air_quality" + ] + }, + { + "cell_type": "markdown", + "id": "656f0db7", + "metadata": {}, + "source": [ + "## ๐Ÿ—„๏ธ Functions\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "98381fc5", + "metadata": {}, + "outputs": [], + "source": [ + "def transform_data(data, encoder):\n", + " \"\"\"\n", + " Transform the input data by encoding the 'city_name' column and dropping unnecessary columns.\n", + " \n", + " Args:\n", + " - data (DataFrame): Input data to be transformed.\n", + " - encoder (LabelEncoder): Label encoder object to encode 'city_name'.\n", + " \n", + " Returns:\n", + " - data_transformed (DataFrame): Transformed data with 'city_name_encoded' and dropped columns.\n", + " \"\"\"\n", + " \n", + " # Create a copy of the input data to avoid modifying the original data\n", + " data_transformed = data.copy()\n", + " \n", + " # Transform the 'city_name' column in the batch data using the retrieved label encoder\n", + " data_transformed['city_name_encoded'] = encoder.transform(data_transformed['city_name'])\n", + " \n", + " # Drop unnecessary columns from the batch data\n", + " data_transformed = data_transformed.drop(columns=['unix_time', 'pm2_5', 'city_name', 'date'])\n", + "\n", + " return data_transformed" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "fcfecdfe", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from typing import Any, Dict, List\n", + "\n", + "def get_data_for_date(date: str, city_name: str, feature_view, model) -> pd.DataFrame:\n", + " \"\"\"\n", + " Retrieve data for a specific date and city from a feature view.\n", + "\n", + " Args:\n", + " date (str): The date in the format \"%Y-%m-%d\".\n", + " city_name (str): The name of the city to retrieve data for.\n", + " feature_view: The feature view object.\n", + " model: The machine learning model used for prediction.\n", + "\n", + " Returns:\n", + " pd.DataFrame: A DataFrame containing data for the specified date and city.\n", + " \"\"\"\n", + " # Convert date string to datetime object\n", + " date_datetime = datetime.datetime.strptime(date, \"%Y-%m-%d\").date()\n", + " \n", + " # Retrieve batch data for the specified date range\n", + " batch_data = feature_view.get_batch_data(\n", + " start_time=date_datetime,\n", + " end_time=date_datetime + datetime.timedelta(days=1),\n", + " )\n", + " \n", + " # Filter batch data for the specified city\n", + " batch_data_filtered = batch_data[batch_data['city_name'] == city_name]\n", + " \n", + " return batch_data_filtered[['date', 'pm2_5']].sort_values('date').reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "abd770b1", + "metadata": {}, + "outputs": [], + "source": [ + "def get_data_in_date_range(date_start: str, date_end: str, city_name: str, feature_view, model) -> pd.DataFrame:\n", + " \"\"\"\n", + " Retrieve data for a specific date range and city from a feature view.\n", + "\n", + " Args:\n", + " date_start (str): The start date in the format \"%Y-%m-%d\".\n", + " date_end (str): The end date in the format \"%Y-%m-%d\".\n", + " city_name (str): The name of the city to retrieve data for.\n", + " feature_view: The feature view object.\n", + " model: The machine learning model used for prediction.\n", + "\n", + " Returns:\n", + " pd.DataFrame: A DataFrame containing data for the specified date range and city.\n", + " \"\"\"\n", + " # Convert date strings to datetime objects\n", + " date_start_dt = datetime.datetime.strptime(date_start, \"%Y-%m-%d\").date()\n", + " date_end_dt = datetime.datetime.strptime(date_end, \"%Y-%m-%d\").date()\n", + " \n", + " # Retrieve batch data for the specified date range\n", + " batch_data = feature_view.get_batch_data(\n", + " start_time=date_start_dt,\n", + " end_time=date_end_dt + datetime.timedelta(days=1),\n", + " )\n", + "\n", + " # Filter batch data for the specified city\n", + " batch_data_filtered = batch_data[batch_data['city_name'] == city_name]\n", + " \n", + " return batch_data_filtered[['date', 'pm2_5']].sort_values('date').reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "73f9937d", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import pandas as pd\n", + "\n", + "def get_future_data(date: str, city_name: str, feature_view, model) -> pd.DataFrame:\n", + " \"\"\"\n", + " Predicts future PM2.5 data for a specified date and city using a given feature view and model.\n", + "\n", + " Args:\n", + " date (str): The target future date in the format 'YYYY-MM-DD'.\n", + " city_name (str): The name of the city for which the prediction is made.\n", + " feature_view: The feature view used to retrieve batch data.\n", + " model: The machine learning model used for prediction.\n", + "\n", + " Returns:\n", + " pd.DataFrame: A DataFrame containing predicted PM2.5 values for each day starting from the target date.\n", + "\n", + " \"\"\"\n", + " # Get today's date\n", + " today = datetime.date.today()\n", + "\n", + " # Convert the target date string to a datetime object\n", + " date_in_future = datetime.datetime.strptime(date, \"%Y-%m-%d\").date()\n", + "\n", + " # Calculate the difference in days between today and the target date\n", + " difference_in_days = (date_in_future - today).days\n", + "\n", + " # Retrieve batch data for the specified date range\n", + " batch_data = feature_view.get_batch_data(\n", + " start_time=today,\n", + " end_time=today + datetime.timedelta(days=1),\n", + " )\n", + " \n", + " # Filter batch data for the specified city\n", + " batch_data_filtered = batch_data[batch_data['city_name'] == city_name]\n", + " \n", + " # Transform batch data\n", + " batch_data_transformed = transform_data(batch_data_filtered, encoder)\n", + " \n", + " # Initialize a DataFrame to store predicted PM2.5 values\n", + " predicted_pm2_5_df = pd.DataFrame({\n", + " 'date': [today.strftime(\"%Y-%m-%d\")], \n", + " 'pm2_5': batch_data_filtered['pm2_5'].values[0],\n", + " })\n", + "\n", + " # Iterate through each day starting from tomorrow up to the target date\n", + " for day_number in range(1, difference_in_days + 1):\n", + "\n", + " # Calculate the date for the current future day\n", + " date_future_day = (today + datetime.timedelta(days=day_number)).strftime(\"%Y-%m-%d\")\n", + " \n", + " # Predict PM2.5 for the current day\n", + " predicted_pm2_5 = model.predict(batch_data_transformed)\n", + "\n", + " # Update previous day PM2.5 values in the batch data for the next prediction\n", + " batch_data_transformed['pm_2_5_previous_7_day'] = batch_data_transformed['pm_2_5_previous_6_day']\n", + " batch_data_transformed['pm_2_5_previous_6_day'] = batch_data_transformed['pm_2_5_previous_5_day']\n", + " batch_data_transformed['pm_2_5_previous_5_day'] = batch_data_transformed['pm_2_5_previous_4_day']\n", + " batch_data_transformed['pm_2_5_previous_4_day'] = batch_data_transformed['pm_2_5_previous_3_day']\n", + " batch_data_transformed['pm_2_5_previous_3_day'] = batch_data_transformed['pm_2_5_previous_2_day']\n", + " batch_data_transformed['pm_2_5_previous_2_day'] = batch_data_transformed['pm_2_5_previous_1_day']\n", + " batch_data_transformed['pm_2_5_previous_1_day'] = predicted_pm2_5\n", + " \n", + " # Append the predicted PM2.5 value for the current day to the DataFrame\n", + " predicted_pm2_5_df = predicted_pm2_5_df._append({\n", + " 'date': date_future_day, \n", + " 'pm2_5': predicted_pm2_5[0],\n", + " }, ignore_index=True)\n", + " \n", + " return predicted_pm2_5_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "9dd29714", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (7.54s) \n", + "โ›ณ๏ธ 2024-01-10\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datepm2_5
02024-01-1020.3
\n", + "
" + ], + "text/plain": [ + " date pm2_5\n", + "0 2024-01-10 20.3" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_for_date = get_data_for_date(\n", + " '2024-01-10', \n", + " 'Paris',\n", + " feature_view,\n", + " model_air_quality,\n", + ")\n", + "print(f'โ›ณ๏ธ {data_for_date.date.max()}')\n", + "data_for_date.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4b353d52", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (7.52s) \n", + "โ›ณ๏ธ ('2024-01-10', '2024-01-20')\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datepm2_5
02024-01-1011.7
12024-01-1115.2
22024-01-1212.1
32024-01-135.4
42024-01-143.8
\n", + "
" + ], + "text/plain": [ + " date pm2_5\n", + "0 2024-01-10 11.7\n", + "1 2024-01-11 15.2\n", + "2 2024-01-12 12.1\n", + "3 2024-01-13 5.4\n", + "4 2024-01-14 3.8" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_in_range = get_data_in_date_range(\n", + " '2024-01-10', \n", + " '2024-01-20', \n", + " 'Amsterdam',\n", + " feature_view,\n", + " model_air_quality,\n", + ")\n", + "print(f'โ›ณ๏ธ {data_in_range.date.min(), data_in_range.date.max()}')\n", + "data_in_range.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5e896081", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (7.50s) \n", + "โ›ณ๏ธ ('2024-02-23', '2024-02-25')\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datepm2_5
02024-02-238.100000
12024-02-247.449322
22024-02-258.308480
\n", + "
" + ], + "text/plain": [ + " date pm2_5\n", + "0 2024-02-23 8.100000\n", + "1 2024-02-24 7.449322\n", + "2 2024-02-25 8.308480" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_future = get_future_data(\n", + " '2024-02-25', \n", + " 'London',\n", + " feature_view,\n", + " model_air_quality,\n", + ")\n", + "print(f'โ›ณ๏ธ {data_future.date.min(), data_future.date.max()}')\n", + "data_future.head()" + ] + }, + { + "cell_type": "markdown", + "id": "2cea7055", + "metadata": {}, + "source": [ + "## โฌ‡๏ธ Model Loading" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "f817eda3", + "metadata": {}, + "outputs": [], + "source": [ + "def load_model(model_name: str):\n", + " tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)\n", + "\n", + " with torch.device(\"cuda:0\"):\n", + " model = transformers.AutoModelForCausalLM.from_pretrained(\n", + " model_name, \n", + " torch_dtype=torch.bfloat16,\n", + " ).eval()\n", + " \n", + " return tokenizer, model" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "d21b4f40", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "cdd409643efb4d7cbe4022f55d06626c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00โš™๏ธ Tools \n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "12607b0c", + "metadata": {}, + "outputs": [], + "source": [ + "def get_type_name(t):\n", + " name = str(t)\n", + " if \"list\" in name or \"dict\" in name:\n", + " return name\n", + " else:\n", + " return t.__name__\n", + "\n", + "def serialize_function_to_json(func):\n", + " signature = inspect.signature(func)\n", + " type_hints = get_type_hints(func)\n", + "\n", + " function_info = {\n", + " \"name\": func.__name__,\n", + " \"description\": func.__doc__,\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {}\n", + " },\n", + " \"returns\": type_hints.get('return', 'void').__name__\n", + " }\n", + "\n", + " for name, _ in signature.parameters.items():\n", + " param_type = get_type_name(type_hints.get(name, type(None)))\n", + " function_info[\"parameters\"][\"properties\"][name] = {\"type\": param_type}\n", + "\n", + " return json.dumps(function_info, indent=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "8fb4bad0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"name\": \"get_data_in_date_range\",\n", + " \"description\": \"\\n Retrieve data for a specific date range and city from a feature view.\\n\\n Args:\\n date_start (str): The start date in the format \\\"%Y-%m-%d\\\".\\n date_end (str): The end date in the format \\\"%Y-%m-%d\\\".\\n city_name (str): The name of the city to retrieve data for.\\n feature_view: The feature view object.\\n model: The machine learning model used for prediction.\\n\\n Returns:\\n pd.DataFrame: A DataFrame containing data for the specified date range and city.\\n \",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"date_start\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"date_end\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"city_name\": {\n", + " \"type\": \"str\"\n", + " },\n", + " \"feature_view\": {\n", + " \"type\": \"NoneType\"\n", + " },\n", + " \"model\": {\n", + " \"type\": \"NoneType\"\n", + " }\n", + " }\n", + " },\n", + " \"returns\": \"DataFrame\"\n", + "}\n" + ] + } + ], + "source": [ + "print(serialize_function_to_json(get_data_in_date_range))" + ] + }, + { + "cell_type": "markdown", + "id": "bc4cfe49", + "metadata": {}, + "source": [ + "## ๐Ÿ”ฎ Function Matching \n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a47290ef", + "metadata": {}, + "outputs": [], + "source": [ + "import xml.etree.ElementTree as ET\n", + "import re\n", + "\n", + "def extract_function_calls(completion):\n", + " completion = completion.strip()\n", + " pattern = r\"((.*?))\"\n", + " match = re.search(pattern, completion, re.DOTALL)\n", + " if not match:\n", + " return None\n", + " \n", + " multiplefn = match.group(1)\n", + " root = ET.fromstring(multiplefn)\n", + " functions = root.findall(\"functioncall\")\n", + " return [json.loads(fn.text) for fn in functions]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "501cb2b5", + "metadata": {}, + "outputs": [], + "source": [ + "def generate_hermes(prompt, model_llm, tokenizer):\n", + " fn = \"\"\"{\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}}\"\"\"\n", + " prompt = f\"\"\"<|im_start|>system\n", + "You are a helpful assistant with access to the following functions:\n", + "\n", + "{serialize_function_to_json(get_data_for_date)}\n", + "\n", + "{serialize_function_to_json(get_data_in_date_range)}\n", + "\n", + "{serialize_function_to_json(get_future_data)}\n", + "\n", + "You need to choose what function to use and retrieve paramenters for this function from the user input.\n", + "IMPORTANT: Today is {datetime.date.today().strftime(\"%A\")}, {datetime.date.today()}.\n", + "IMPORTANT: If the user query contains 'will', it is very likely that you will need to use the get_future_data function\n", + "NOTE: Ignore the Feature View and Model parameters.\n", + "NOTE: Dates should be provided in the format YYYY-MM-DD.\n", + "\n", + "To use these functions respond with:\n", + "\n", + " {fn} \n", + " {fn} \n", + " ...\n", + "\n", + "\n", + "Edge cases you must handle:\n", + "- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>\n", + "<|im_start|>user\n", + "{prompt}<|im_end|>\n", + "<|im_start|>assistant\"\"\"\n", + " \n", + " tokens = tokenizer(prompt, return_tensors=\"pt\").to(model_llm.device)\n", + " input_size = tokens.input_ids.numel()\n", + " with torch.inference_mode():\n", + " generated_tokens = model_llm.generate(\n", + " **tokens, \n", + " use_cache=True, \n", + " do_sample=True, \n", + " temperature=0.2, \n", + " top_p=1.0, \n", + " top_k=0, \n", + " max_new_tokens=512, \n", + " eos_token_id=tokenizer.eos_token_id, \n", + " pad_token_id=tokenizer.eos_token_id,\n", + " )\n", + "\n", + " return tokenizer.decode(\n", + " generated_tokens.squeeze()[input_size:], \n", + " skip_special_tokens=True,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "d6c0c4ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I am a helpful assistant designed to retrieve data related to air pollution. How can I help you with that?\n", + "====================================================================================================\n", + "[{'name': 'get_data_for_date', 'arguments': {'date': '2024-02-23', 'city_name': 'Paris'}}]\n", + "====================================================================================================\n", + "[{'name': 'get_data_for_date', 'arguments': {'date': '2024-02-22', 'city_name': 'New York'}}]\n", + "====================================================================================================\n", + "[{'name': 'get_data_in_date_range', 'arguments': {'date_start': '2024-01-10', 'date_end': '2024-01-14', 'city_name': 'London'}}]\n", + "====================================================================================================\n", + "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-26', 'city_name': 'London'}}]\n", + "====================================================================================================\n", + "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-25', 'city_name': 'London'}}]\n", + "====================================================================================================\n", + "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'London'}}]\n", + "====================================================================================================\n", + "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'Amsterdam'}}]\n", + "====================================================================================================\n" + ] + } + ], + "source": [ + "prompts = [\n", + " \"How are you?\",\n", + " \"What's the air quality today in Paris?\",\n", + " \"What was the air quality yesterday in New York?\",\n", + " \"What was the air quality from 2024-01-10 till 2024-01-14 in London?\",\n", + " \"What will the air quality be like in London in 2024-02-26?\",\n", + " \"What will the air quality be like in London the day after tomorrow?\",\n", + " \"What will the air quality be like in London next Friday?\",\n", + " \"What will the air quality be like on March 1 in Amsterdam?\",\n", + "]\n", + "\n", + "for prompt in prompts:\n", + " completion = generate_hermes(prompt, model_llm, tokenizer)\n", + " functions = extract_function_calls(completion)\n", + "\n", + " if functions:\n", + " print(functions)\n", + " else:\n", + " print(completion.strip())\n", + " print(\"=\"*100)" + ] + }, + { + "cell_type": "markdown", + "id": "fcd2d41b", + "metadata": {}, + "source": [ + "## ๐Ÿš€ Function Calling" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a8318c11", + "metadata": {}, + "outputs": [], + "source": [ + "def invoke_function(function, feature_view, model):\n", + " # Extract function name and arguments from input_data\n", + " function_name = function['name']\n", + " arguments = function['arguments']\n", + " \n", + " # Using Python's getattr function to dynamically call the function by its name and passing the arguments\n", + " function_output = getattr(sys.modules[__name__], function_name)(**arguments, feature_view=feature_view, model=model)\n", + " \n", + " # Round the 'pm2_5' value to 2 decimal places\n", + " function_output['pm2_5'] = function_output['pm2_5'].apply(round, ndigits=2)\n", + " return function_output" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "b1921e19", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'get_future_data',\n", + " 'arguments': {'date': '2024-03-01', 'city_name': 'Amsterdam'}}]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "functions" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "ff8cfac2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (7.39s) \n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datepm2_5
02024-02-236.70
12024-02-246.40
22024-02-256.32
32024-02-266.58
42024-02-276.58
\n", + "
" + ], + "text/plain": [ + " date pm2_5\n", + "0 2024-02-23 6.70\n", + "1 2024-02-24 6.40\n", + "2 2024-02-25 6.32\n", + "3 2024-02-26 6.58\n", + "4 2024-02-27 6.58" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_batch = invoke_function(functions[0], feature_view, model_air_quality)\n", + "data_batch.head()" + ] + }, + { + "cell_type": "markdown", + "id": "bbbb16e4", + "metadata": {}, + "source": [ + "## ๐Ÿงฌ Context Retrieval" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "5fb16d4d", + "metadata": {}, + "outputs": [], + "source": [ + "def get_context_data(user_query, model_llm, tokenizer, model_air_quality, encoder):\n", + " completion = generate_hermes(user_query, model_llm, tokenizer)\n", + " \n", + " functions = extract_function_calls(completion)\n", + " print(functions)\n", + " \n", + " if functions:\n", + " data = invoke_function(functions[0], feature_view, model_air_quality)\n", + " return '\\n'.join([f'Date: {row[1][\"date\"]}; Air Quality: {row[1][\"pm2_5\"]}' for row in data.iterrows()])\n", + "\n", + " return completion" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "71bd91c1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_data_in_date_range', 'arguments': {'date_start': '2024-01-10', 'date_end': '2024-01-14', 'city_name': 'New York'}}]\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.53s) \n", + "Date: 2024-01-10; Air Quality: 7.2\n", + "Date: 2024-01-11; Air Quality: 5.9\n", + "Date: 2024-01-12; Air Quality: 10.8\n", + "Date: 2024-01-13; Air Quality: 5.9\n", + "Date: 2024-01-14; Air Quality: 5.1\n" + ] + } + ], + "source": [ + "QUESTION1 = \"What was the air quality from 2024-01-10 till 2024-01-14 in New York?\"\n", + "\n", + "data_pred_q1 = get_context_data(QUESTION1, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q1)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "7c2c755d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_data_for_date', 'arguments': {'date': '2024-02-22', 'city_name': 'Amsterdam'}}]\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.49s) \n", + "Date: 2024-02-22; Air Quality: 5.2\n" + ] + } + ], + "source": [ + "QUESTION2 = \"What was the air quality yesterday in Amsterdam?\"\n", + "\n", + "data_pred_q2 = get_context_data(QUESTION2, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q2)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "e3221e42", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-27', 'city_name': 'London'}}]\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.86s) \n", + "Date: 2024-02-23; Air Quality: 8.1\n", + "Date: 2024-02-24; Air Quality: 7.45\n", + "Date: 2024-02-25; Air Quality: 8.31\n", + "Date: 2024-02-26; Air Quality: 8.57\n", + "Date: 2024-02-27; Air Quality: 8.15\n" + ] + } + ], + "source": [ + "QUESTION3 = \"What will the air quality be like in London in 2024-02-27?\"\n", + "\n", + "data_pred_q3 = get_context_data(QUESTION3, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q3)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "5ed3ec9d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-25', 'city_name': 'Chicago'}}]\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.63s) \n", + "Date: 2024-02-23; Air Quality: 14.1\n", + "Date: 2024-02-24; Air Quality: 12.87\n", + "Date: 2024-02-25; Air Quality: 9.85\n" + ] + } + ], + "source": [ + "QUESTION4 = \"What will the air quality be like in Chicago the day after tomorrow?\"\n", + "\n", + "data_pred_q4 = get_context_data(QUESTION4, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q4)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "39665b4d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'London'}}]\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.81s) \n", + "Date: 2024-02-23; Air Quality: 8.1\n", + "Date: 2024-02-24; Air Quality: 7.45\n", + "Date: 2024-02-25; Air Quality: 8.31\n", + "Date: 2024-02-26; Air Quality: 8.57\n", + "Date: 2024-02-27; Air Quality: 8.15\n", + "Date: 2024-02-28; Air Quality: 7.97\n", + "Date: 2024-02-29; Air Quality: 7.97\n", + "Date: 2024-03-01; Air Quality: 8.32\n" + ] + } + ], + "source": [ + "QUESTION5 = \"What will the air quality be like in London next Friday?\"\n", + "\n", + "data_pred_q5 = get_context_data(QUESTION5, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q5)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5b3d8312", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "None\n", + "\n", + "I am a machine learning model and I don't have feelings, but I am here to help you with your queries. How can I assist you today?\n" + ] + } + ], + "source": [ + "QUESTION6 = \"How are you?\"\n", + "\n", + "data_pred_q6 = get_context_data(QUESTION6, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q6)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "d1af9d23", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'Amsterdam'}}]\n", + "Finished: Reading data from Hopsworks, using ArrowFlight (7.76s) \n", + "Date: 2024-02-23; Air Quality: 6.7\n", + "Date: 2024-02-24; Air Quality: 6.4\n", + "Date: 2024-02-25; Air Quality: 6.32\n", + "Date: 2024-02-26; Air Quality: 6.58\n", + "Date: 2024-02-27; Air Quality: 6.58\n", + "Date: 2024-02-28; Air Quality: 6.58\n", + "Date: 2024-02-29; Air Quality: 6.54\n", + "Date: 2024-03-01; Air Quality: 6.58\n" + ] + } + ], + "source": [ + "QUESTION7 = \"What will the air quality be like on March 1 in Amsterdam?\"\n", + "\n", + "data_pred_q7 = get_context_data(QUESTION7, model_llm, tokenizer, model_air_quality, encoder)\n", + "print(data_pred_q7)" + ] + }, + { + "cell_type": "markdown", + "id": "0bf1e6de", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 5aca7ab0e60f5325badb567463b9eaa38afdd64d Mon Sep 17 00:00:00 2001 From: Maksym Zhytnikov <63515947+Maxxx-zh@users.noreply.github.com> Date: Fri, 8 Mar 2024 13:27:02 +0200 Subject: [PATCH 2/3] AirQuality FunctionCalling Chatbot --- .../1_air_quality_feature_backfill.ipynb | 81 +- .../2_air_quality_feature_pipeline.ipynb | 570 ++++---- .../3_air_quality_training_pipeline.ipynb | 83 +- .../4_air_quality_batch_inference.ipynb | 3 +- .../air_quality/5_function_calling.ipynb | 1254 ++++++----------- advanced_tutorials/air_quality/app.py | 100 ++ advanced_tutorials/air_quality/app_voice.py | 39 + .../air_quality/feature_pipeline.py | 158 --- .../air_quality/features/__init__.py | 0 .../air_quality/features/air_quality.py | 6 +- advanced_tutorials/air_quality/functions.py | 392 ------ .../functions/air_quality_data_retrieval.py | 164 +++ .../air_quality/functions/common_functions.py | 25 + .../functions/context_engineering.py | 191 +++ .../air_quality/functions/llm_chain.py | 202 +++ .../functions/parse_air_quality.py | 79 ++ .../air_quality/functions/parse_weather.py | 81 ++ .../air_quality/requirements.txt | 13 +- 18 files changed, 1649 insertions(+), 1792 deletions(-) create mode 100644 advanced_tutorials/air_quality/app.py create mode 100644 advanced_tutorials/air_quality/app_voice.py delete mode 100644 advanced_tutorials/air_quality/feature_pipeline.py delete mode 100644 advanced_tutorials/air_quality/features/__init__.py delete mode 100644 advanced_tutorials/air_quality/functions.py create mode 100644 advanced_tutorials/air_quality/functions/air_quality_data_retrieval.py create mode 100644 advanced_tutorials/air_quality/functions/common_functions.py create mode 100644 advanced_tutorials/air_quality/functions/context_engineering.py create mode 100644 advanced_tutorials/air_quality/functions/llm_chain.py create mode 100644 advanced_tutorials/air_quality/functions/parse_air_quality.py create mode 100644 advanced_tutorials/air_quality/functions/parse_weather.py diff --git a/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb b/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb index 1c4bb853..75f1b875 100644 --- a/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb +++ b/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "3cdebecf", + "id": "32cd155d", "metadata": {}, "source": [ "# **Hopsworks Feature Store** \n", @@ -22,7 +22,7 @@ }, { "cell_type": "markdown", - "id": "0e2fd829", + "id": "ce71c0b2", "metadata": {}, "source": [ "## ๐Ÿ“ Imports" @@ -31,7 +31,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "73f04813", + "id": "f92001bd", "metadata": {}, "outputs": [ { @@ -56,20 +56,17 @@ { "cell_type": "code", "execution_count": 2, - "id": "d46a2162", + "id": "e974d9d5", "metadata": {}, "outputs": [], "source": [ - "import datetime\n", - "import time\n", - "import requests\n", "import json\n", "\n", "import pandas as pd\n", "import folium\n", "\n", "from features import air_quality\n", - "from functions import *\n", + "from functions.common_functions import convert_date_to_unix\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" @@ -77,7 +74,7 @@ }, { "cell_type": "markdown", - "id": "d4b00c77", + "id": "88d519dd", "metadata": {}, "source": [ "## ๐ŸŒ Representing the Target cities " @@ -86,7 +83,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "0c4cefac", + "id": "e0f7a26b", "metadata": { "tags": [] }, @@ -103,7 +100,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "d47f6f59", + "id": "f8063796", "metadata": {}, "outputs": [], "source": [ @@ -122,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53f1b78d", + "id": "fcde29f7", "metadata": {}, "outputs": [], "source": [ @@ -132,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "970c179a", + "id": "2a2d3674", "metadata": {}, "source": [ "## ๐ŸŒซ Processing Air Quality data" @@ -140,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "bbcf628e", + "id": "b081d3f2", "metadata": {}, "source": [ "### [๐Ÿ‡ช๐Ÿ‡บ EEA](https://discomap.eea.europa.eu/map/fme/AirQualityExport.htm)\n", @@ -150,7 +147,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "c4a76e26", + "id": "986686f5", "metadata": { "tags": [] }, @@ -190,7 +187,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "65dff77b", + "id": "be358330", "metadata": {}, "outputs": [ { @@ -278,7 +275,7 @@ }, { "cell_type": "markdown", - "id": "3f9ca2fd", + "id": "f02141bd", "metadata": {}, "source": [ "### [๐Ÿ‡บ๐Ÿ‡ธ USEPA](https://aqs.epa.gov/aqsweb/documents/data_api.html#daily)\n", @@ -290,7 +287,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "26e9d8f1", + "id": "87c439b7", "metadata": { "tags": [] }, @@ -326,7 +323,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "88aeafb6", + "id": "3429aebd", "metadata": { "tags": [] }, @@ -416,7 +413,7 @@ }, { "cell_type": "markdown", - "id": "3d9caa58", + "id": "5ee7b660", "metadata": {}, "source": [ "### ๐Ÿข Processing special city - `Seattle`\n", @@ -427,7 +424,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "13e7c33a", + "id": "f401130e", "metadata": { "tags": [] }, @@ -464,7 +461,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "78ee1447", + "id": "5ac26217", "metadata": { "tags": [] }, @@ -554,7 +551,7 @@ }, { "cell_type": "markdown", - "id": "d58b59fd", + "id": "e23a6e68", "metadata": {}, "source": [ "### ๐ŸŒŸ All together" @@ -563,7 +560,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "a727a333", + "id": "d913087f", "metadata": { "tags": [] }, @@ -665,7 +662,7 @@ }, { "cell_type": "markdown", - "id": "7ff3a932", + "id": "268791c4", "metadata": { "tags": [] }, @@ -676,7 +673,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "d1ecd72e", + "id": "aff7a97b", "metadata": { "tags": [] }, @@ -689,7 +686,7 @@ { "cell_type": "code", "execution_count": 21, - "id": "c559d20d", + "id": "1d45e480", "metadata": { "tags": [] }, @@ -719,7 +716,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "92722e0e", + "id": "02c8e1e5", "metadata": { "tags": [] }, @@ -743,7 +740,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "00608459", + "id": "4c627429", "metadata": { "tags": [] }, @@ -775,7 +772,7 @@ }, { "cell_type": "markdown", - "id": "2ad26f52", + "id": "4296b629", "metadata": { "tags": [] }, @@ -786,7 +783,7 @@ { "cell_type": "code", "execution_count": 27, - "id": "25410ee6", + "id": "52e4eb11", "metadata": {}, "outputs": [ { @@ -902,7 +899,7 @@ }, { "cell_type": "markdown", - "id": "bae7c356", + "id": "cd0d4d7b", "metadata": {}, "source": [ "---" @@ -911,7 +908,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "5526fc60", + "id": "ec1e91c1", "metadata": { "tags": [] }, @@ -932,7 +929,7 @@ }, { "cell_type": "markdown", - "id": "6b35ec15", + "id": "472f7eb5", "metadata": {}, "source": [ "## ๐Ÿ”ฎ Connecting to Hopsworks Feature Store " @@ -941,7 +938,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "cfe90a1e", + "id": "410f0b7b", "metadata": {}, "outputs": [ { @@ -965,7 +962,7 @@ }, { "cell_type": "markdown", - "id": "cef1c97b", + "id": "6176991c", "metadata": {}, "source": [ "## ๐Ÿช„ Creating Feature Groups" @@ -973,7 +970,7 @@ }, { "cell_type": "markdown", - "id": "749153f8", + "id": "370bbf0b", "metadata": {}, "source": [ "### ๐ŸŒซ Air Quality Data" @@ -982,7 +979,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "475f6ee6", + "id": "b5a58bfc", "metadata": { "scrolled": true, "tags": [] @@ -1052,7 +1049,7 @@ }, { "cell_type": "markdown", - "id": "855e3754", + "id": "09cbe8aa", "metadata": {}, "source": [ "### ๐ŸŒฆ Weather Data" @@ -1061,7 +1058,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "2e99385d", + "id": "089f45b7", "metadata": {}, "outputs": [ { @@ -1121,7 +1118,7 @@ }, { "cell_type": "markdown", - "id": "ec169da5", + "id": "34f5ffec", "metadata": {}, "source": [ "---\n", @@ -1134,7 +1131,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb index 6240eef6..472718b5 100644 --- a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb +++ b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "2552d8bf", + "id": "fca940b7", "metadata": {}, "source": [ "# **Hopsworks Feature Store** - Part 02: Feature Pipeline\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "b231c0db", + "id": "092aa908", "metadata": {}, "source": [ "### ๐Ÿ“ Imports" @@ -25,18 +25,20 @@ { "cell_type": "code", "execution_count": 1, - "id": "edf983f6", + "id": "1856e6c3", "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import time\n", - "import requests\n", "import pandas as pd\n", "import json\n", "\n", "from features import air_quality\n", - "from functions import *\n", + "from functions.parse_air_quality import get_aqi_data_from_open_meteo\n", + "from functions.parse_weather import get_weather_data_from_open_meteo\n", + "from functions.common_functions import *\n", + "\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" @@ -45,7 +47,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "dbc36bb3", + "id": "7f50400a", "metadata": { "tags": [] }, @@ -60,7 +62,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "d07e46c6", + "id": "32ca6a86", "metadata": { "tags": [] }, @@ -68,7 +70,7 @@ { "data": { "text/plain": [ - "(datetime.date(2024, 2, 23), '2024-02-23')" + "(datetime.date(2024, 3, 8), '2024-03-08')" ] }, "execution_count": 3, @@ -86,7 +88,7 @@ }, { "cell_type": "markdown", - "id": "50e64602", + "id": "dcfc09fc", "metadata": {}, "source": [ "### ๐Ÿ”ฎ Connecting to Hopsworks Feature Store " @@ -95,7 +97,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "06651b20", + "id": "541fe4e1", "metadata": {}, "outputs": [ { @@ -119,7 +121,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "4a6f5fc2", + "id": "53669d5f", "metadata": {}, "outputs": [], "source": [ @@ -136,7 +138,7 @@ }, { "cell_type": "markdown", - "id": "640ab38b", + "id": "c060b74c", "metadata": {}, "source": [ "## ๐ŸŒซ Filling gaps in Air Quality data (PM2.5)" @@ -145,7 +147,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "edc5510b", + "id": "4166b886", "metadata": { "tags": [] }, @@ -154,8 +156,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (3.26s) \n", - "Finished: Reading data from Hopsworks, using ArrowFlight (2.25s) \n" + "Finished: Reading data from Hopsworks, using ArrowFlight (2.89s) \n", + "Finished: Reading data from Hopsworks, using ArrowFlight (2.31s) \n" ] } ], @@ -168,7 +170,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "cfb667b0", + "id": "4097adfc", "metadata": { "tags": [] }, @@ -188,7 +190,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "74862993", + "id": "22967ce2", "metadata": { "tags": [] }, @@ -197,8 +199,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "โ›ณ๏ธ Last update for Paris: 2024-02-23\n", - "โ›ณ๏ธ Last update for Columbus: 2024-02-23\n" + "โ›ณ๏ธ Last update for Paris: 2024-03-07\n", + "โ›ณ๏ธ Last update for Columbus: 2024-03-07\n" ] } ], @@ -217,7 +219,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "7790102a", + "id": "b4829636", "metadata": {}, "outputs": [], "source": [ @@ -229,7 +231,7 @@ }, { "cell_type": "markdown", - "id": "67fd48eb", + "id": "301fa83a", "metadata": {}, "source": [ "### ๐Ÿง™๐Ÿผโ€โ™‚๏ธ Parsing PM2.5 data" @@ -238,7 +240,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "ed8324e7", + "id": "ed244952", "metadata": { "scrolled": true, "tags": [] @@ -248,144 +250,144 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processed PM2_5 for Amsterdam since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Amsterdam since 2024-02-08 till 2024-03-08.\n", "Took 0.12 sec.\n", "\n", - "Processed PM2_5 for Athina since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Athina since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", + "\n", + "Processed PM2_5 for Berlin since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Berlin since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Gdansk since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Gdansk since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Krakรณw since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Krakรณw since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for London since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for London since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Madrid since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Madrid since 2024-01-26 till 2024-02-23.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Marseille since 2024-02-08 till 2024-03-08.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Marseille since 2024-01-26 till 2024-02-23.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Milano since 2024-02-08 till 2024-03-08.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Milano since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Mรผnchen since 2024-02-08 till 2024-03-08.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Napoli since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Mรผnchen since 2024-01-26 till 2024-02-23.\n", - "Took 0.16 sec.\n", + "Processed PM2_5 for Paris since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Napoli since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Sevilla since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Paris since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Stockholm since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Sevilla since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for Tallinn since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Stockholm since 2024-01-26 till 2024-02-23.\n", - "Took 0.12 sec.\n", + "Processed PM2_5 for Varna since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tallinn since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Wien since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Varna since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for Albuquerque since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Wien since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Atlanta since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Albuquerque since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for Chicago since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Atlanta since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Columbus since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Chicago since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Dallas since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Columbus since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Denver since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Dallas since 2024-01-26 till 2024-02-23.\n", - "Took 0.14 sec.\n", - "\n", - "Processed PM2_5 for Denver since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Houston since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Houston since 2024-01-26 till 2024-02-23.\n", - "Took 0.14 sec.\n", - "\n", - "Processed PM2_5 for Los Angeles since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Los Angeles since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for New York since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for New York since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Phoenix-Mesa since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Phoenix-Mesa since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Salt Lake City since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Salt Lake City since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for San Francisco since 2024-01-26 till 2024-02-23.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for San Francisco since 2024-02-08 till 2024-03-08.\n", + "Took 0.26 sec.\n", "\n", - "Processed PM2_5 for Tampa since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Tampa since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Bellevue-SE 12th St since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Bellevue-SE 12th St since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-01-26 till 2024-02-23.\n", - "Took 0.17 sec.\n", - "\n", - "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Seattle-10th & Weller since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for Seattle-10th & Weller since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for TACOMA - L STREET since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tacoma-S 36th St since 2024-01-26 till 2024-02-23.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for TACOMA - L STREET since 2024-02-08 till 2024-03-08.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tukwila Allentown since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Tacoma-S 36th St since 2024-02-08 till 2024-03-08.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-01-26 till 2024-02-23.\n", + "Processed PM2_5 for Tukwila Allentown since 2024-02-08 till 2024-03-08.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-02-08 till 2024-03-08.\n", "Took 0.1 sec.\n", "\n", "----------------------------------------------------------------\n", - "Parsed new PM2.5 data for ALL locations up to 2024-02-23.\n", - "Took 5.04 sec.\n", + "Parsed new PM2.5 data for ALL locations up to 2024-03-08.\n", + "Took 5.05 sec.\n", "\n" ] } @@ -425,7 +427,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "c47cfcb6", + "id": "93a60ead", "metadata": {}, "outputs": [ { @@ -456,22 +458,22 @@ " \n", " \n", " \n", - " 1302\n", + " 1347\n", " Tulalip-Totem Beach Rd\n", - " 2024-02-21\n", - " 8.4\n", + " 2024-03-06\n", + " 6.3\n", " \n", " \n", - " 1303\n", + " 1348\n", " Tulalip-Totem Beach Rd\n", - " 2024-02-22\n", - " 3.4\n", + " 2024-03-07\n", + " 8.8\n", " \n", " \n", - " 1304\n", + " 1349\n", " Tulalip-Totem Beach Rd\n", - " 2024-02-23\n", - " 8.7\n", + " 2024-03-08\n", + " 11.0\n", " \n", " \n", "\n", @@ -479,9 +481,9 @@ ], "text/plain": [ " city_name date pm2_5\n", - "1302 Tulalip-Totem Beach Rd 2024-02-21 8.4\n", - "1303 Tulalip-Totem Beach Rd 2024-02-22 3.4\n", - "1304 Tulalip-Totem Beach Rd 2024-02-23 8.7" + "1347 Tulalip-Totem Beach Rd 2024-03-06 6.3\n", + "1348 Tulalip-Totem Beach Rd 2024-03-07 8.8\n", + "1349 Tulalip-Totem Beach Rd 2024-03-08 11.0" ] }, "execution_count": 11, @@ -495,7 +497,7 @@ }, { "cell_type": "markdown", - "id": "9054f8d7", + "id": "77529eea", "metadata": { "tags": [] }, @@ -506,7 +508,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "bc1b957c", + "id": "528c475f", "metadata": { "tags": [] }, @@ -519,7 +521,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "be43a5b0", + "id": "da05c767", "metadata": { "tags": [] }, @@ -570,74 +572,74 @@ " \n", " \n", " \n", - " 1302\n", - " Krakรณw\n", - " 2024-02-23\n", - " 22.2\n", - " 18.7\n", - " 23.8\n", - " 19.2\n", - " 26.7\n", - " 27.7\n", - " 26.2\n", - " 65.4\n", + " 1347\n", + " London\n", + " 2024-03-08\n", + " 24.3\n", + " 25.7\n", + " 20.6\n", + " 12.3\n", + " 12.2\n", + " 19.4\n", + " 7.0\n", + " 6.5\n", " ...\n", - " 17.511933\n", + " 6.179256\n", " 2024\n", - " 23\n", - " 2\n", + " 8\n", + " 3\n", " 4\n", " 0\n", - " 0.801361\n", - " 0.598181\n", + " 0.920971\n", + " 0.38963\n", " -0.433884\n", " -0.900969\n", " \n", " \n", - " 1303\n", - " Columbus\n", - " 2024-02-23\n", - " 23.3\n", - " 17.0\n", - " 10.3\n", - " 22.8\n", - " 17.8\n", - " 9.5\n", - " 5.6\n", - " 5.8\n", + " 1348\n", + " Milano\n", + " 2024-03-08\n", + " 27.9\n", + " 39.8\n", + " 15.0\n", + " 25.2\n", + " 13.1\n", + " 11.2\n", + " 18.4\n", + " 27.3\n", " ...\n", - " 5.525128\n", + " 29.069146\n", " 2024\n", - " 23\n", - " 2\n", + " 8\n", + " 3\n", " 4\n", " 0\n", - " 0.801361\n", - " 0.598181\n", + " 0.920971\n", + " 0.38963\n", " -0.433884\n", " -0.900969\n", " \n", " \n", - " 1304\n", - " Milano\n", - " 2024-02-23\n", - " 24.4\n", - " 49.2\n", - " 62.8\n", - " 79.5\n", - " 92.2\n", - " 119.4\n", - " 114.2\n", - " 110.1\n", + " 1349\n", + " Krakรณw\n", + " 2024-03-08\n", + " 35.4\n", + " 21.7\n", + " 27.2\n", + " 38.0\n", + " 48.3\n", + " 55.4\n", + " 54.4\n", + " 41.3\n", " ...\n", - " 31.152944\n", + " 15.398092\n", " 2024\n", - " 23\n", - " 2\n", + " 8\n", + " 3\n", " 4\n", " 0\n", - " 0.801361\n", - " 0.598181\n", + " 0.920971\n", + " 0.38963\n", " -0.433884\n", " -0.900969\n", " \n", @@ -648,34 +650,34 @@ ], "text/plain": [ " city_name date pm2_5 pm_2_5_previous_1_day \\\n", - "1302 Krakรณw 2024-02-23 22.2 18.7 \n", - "1303 Columbus 2024-02-23 23.3 17.0 \n", - "1304 Milano 2024-02-23 24.4 49.2 \n", + "1347 London 2024-03-08 24.3 25.7 \n", + "1348 Milano 2024-03-08 27.9 39.8 \n", + "1349 Krakรณw 2024-03-08 35.4 21.7 \n", "\n", " pm_2_5_previous_2_day pm_2_5_previous_3_day pm_2_5_previous_4_day \\\n", - "1302 23.8 19.2 26.7 \n", - "1303 10.3 22.8 17.8 \n", - "1304 62.8 79.5 92.2 \n", + "1347 20.6 12.3 12.2 \n", + "1348 15.0 25.2 13.1 \n", + "1349 27.2 38.0 48.3 \n", "\n", " pm_2_5_previous_5_day pm_2_5_previous_6_day pm_2_5_previous_7_day \\\n", - "1302 27.7 26.2 65.4 \n", - "1303 9.5 5.6 5.8 \n", - "1304 119.4 114.2 110.1 \n", + "1347 19.4 7.0 6.5 \n", + "1348 11.2 18.4 27.3 \n", + "1349 55.4 54.4 41.3 \n", "\n", " ... exp_std_28_days year day_of_month month day_of_week \\\n", - "1302 ... 17.511933 2024 23 2 4 \n", - "1303 ... 5.525128 2024 23 2 4 \n", - "1304 ... 31.152944 2024 23 2 4 \n", + "1347 ... 6.179256 2024 8 3 4 \n", + "1348 ... 29.069146 2024 8 3 4 \n", + "1349 ... 15.398092 2024 8 3 4 \n", "\n", " is_weekend sin_day_of_year cos_day_of_year sin_day_of_week \\\n", - "1302 0 0.801361 0.598181 -0.433884 \n", - "1303 0 0.801361 0.598181 -0.433884 \n", - "1304 0 0.801361 0.598181 -0.433884 \n", + "1347 0 0.920971 0.38963 -0.433884 \n", + "1348 0 0.920971 0.38963 -0.433884 \n", + "1349 0 0.920971 0.38963 -0.433884 \n", "\n", " cos_day_of_week \n", - "1302 -0.900969 \n", - "1303 -0.900969 \n", - "1304 -0.900969 \n", + "1347 -0.900969 \n", + "1348 -0.900969 \n", + "1349 -0.900969 \n", "\n", "[3 rows x 31 columns]" ] @@ -698,7 +700,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "8b046956", + "id": "1b5a24e7", "metadata": {}, "outputs": [ { @@ -720,7 +722,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "6ae477ab", + "id": "5c34d3e1", "metadata": { "tags": [] }, @@ -728,7 +730,7 @@ { "data": { "text/plain": [ - "(45, 31)" + "(90, 31)" ] }, "execution_count": 15, @@ -743,7 +745,7 @@ }, { "cell_type": "markdown", - "id": "12f33e25", + "id": "834bef82", "metadata": {}, "source": [ "## ๐ŸŒฆ Filling gaps in Weather data" @@ -752,7 +754,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "32eeb729", + "id": "bf43c662", "metadata": { "tags": [] }, @@ -771,7 +773,7 @@ }, { "cell_type": "markdown", - "id": "fb144e4a", + "id": "00ec9b25", "metadata": { "tags": [] }, @@ -782,7 +784,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "3ed0d92c", + "id": "6835d984", "metadata": { "scrolled": true, "tags": [] @@ -792,144 +794,144 @@ "name": "stdout", "output_type": "stream", "text": [ - "Parsed weather for Amsterdam since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", + "Parsed weather for Amsterdam since 2024-03-07 till 2024-03-08.\n", + "Took 2.11 sec.\n", "\n", - "Parsed weather for Athina since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Athina since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Berlin since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Berlin since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Gdansk since 2024-02-23 till 2024-02-23.\n", - "Took 2.11 sec.\n", + "Parsed weather for Gdansk since 2024-03-07 till 2024-03-08.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Krakรณw since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Krakรณw since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for London since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for London since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Madrid since 2024-02-23 till 2024-02-23.\n", - "Took 2.11 sec.\n", + "Parsed weather for Madrid since 2024-03-07 till 2024-03-08.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Marseille since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Marseille since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Milano since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Milano since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Mรผnchen since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Mรผnchen since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Napoli since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Napoli since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Paris since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", - "\n", - "Parsed weather for Sevilla since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", - "\n", - "Parsed weather for Stockholm since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", + "Parsed weather for Paris since 2024-03-07 till 2024-03-08.\n", + "Took 2.11 sec.\n", "\n", - "Parsed weather for Tallinn since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Sevilla since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Varna since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Stockholm since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Wien since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Tallinn since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Albuquerque since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", + "Parsed weather for Varna since 2024-03-07 till 2024-03-08.\n", + "Took 2.12 sec.\n", "\n", - "Parsed weather for Atlanta since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Wien since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Chicago since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Albuquerque since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Columbus since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Atlanta since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Dallas since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", - "\n", - "Parsed weather for Denver since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Chicago since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Houston since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Columbus since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Los Angeles since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Dallas since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for New York since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Denver since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Phoenix-Mesa since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Houston since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Salt Lake City since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Los Angeles since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for San Francisco since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for New York since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tampa since 2024-02-23 till 2024-02-23.\n", - "Took 2.11 sec.\n", + "Parsed weather for Phoenix-Mesa since 2024-03-07 till 2024-03-08.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Bellevue-SE 12th St since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Salt Lake City since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for San Francisco since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for KENT - JAMES & CENTRAL since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Tampa since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-02-23 till 2024-02-23.\n", - "Took 2.11 sec.\n", + "Parsed weather for Bellevue-SE 12th St since 2024-03-07 till 2024-03-08.\n", + "Took 2.12 sec.\n", "\n", - "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for KENT - JAMES & CENTRAL since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - BEACON HILL since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for SEATTLE - DUWAMISH since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", + "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-03-07 till 2024-03-08.\n", + "Took 2.11 sec.\n", "\n", - "Parsed weather for Seattle-10th & Weller since 2024-02-23 till 2024-02-23.\n", - "Took 2.1 sec.\n", + "Parsed weather for SEATTLE - BEACON HILL since 2024-03-07 till 2024-03-08.\n", + "Took 2.11 sec.\n", "\n", - "Parsed weather for TACOMA - ALEXANDER AVE since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for SEATTLE - DUWAMISH since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for TACOMA - L STREET since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-03-07 till 2024-03-08.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for Seattle-10th & Weller since 2024-03-07 till 2024-03-08.\n", + "Took 2.1 sec.\n", + "\n", + "Parsed weather for TACOMA - ALEXANDER AVE since 2024-03-07 till 2024-03-08.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Tacoma-S 36th St since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for TACOMA - L STREET since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tukwila Allentown since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Tacoma-S 36th St since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tulalip-Totem Beach Rd since 2024-02-23 till 2024-02-23.\n", + "Parsed weather for Tukwila Allentown since 2024-03-07 till 2024-03-08.\n", "Took 2.1 sec.\n", "\n", + "Parsed weather for Tulalip-Totem Beach Rd since 2024-03-07 till 2024-03-08.\n", + "Took 2.11 sec.\n", + "\n", "----------------------------------------------------------------\n", - "Parsed new weather data for ALL cities up to 2024-02-23.\n", - "Took 94.76 sec.\n", + "Parsed new weather data for ALL cities up to 2024-03-08.\n", + "Took 94.82 sec.\n", "\n" ] } @@ -973,7 +975,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "a149c4d6", + "id": "594213e0", "metadata": { "tags": [] }, @@ -995,7 +997,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "7960e1b4", + "id": "6c799b26", "metadata": { "tags": [] }, @@ -1037,49 +1039,49 @@ " \n", " \n", " \n", - " 42\n", - " Tacoma-S 36th St\n", - " 2024-02-23\n", + " 87\n", + " Tukwila Allentown\n", + " 2024-03-08\n", " 12.1\n", - " 2.8\n", + " 1.8\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 8.0\n", - " 12.2\n", - " 5\n", - " 1708646400000\n", + " 9.4\n", + " 15.1\n", + " 217\n", + " 1709856000000\n", " \n", " \n", - " 43\n", - " Tukwila Allentown\n", - " 2024-02-23\n", - " 13.2\n", - " 3.3\n", + " 88\n", + " Tulalip-Totem Beach Rd\n", + " 2024-03-07\n", + " 8.2\n", + " 0.4\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 6.3\n", - " 12.6\n", - " 329\n", - " 1708646400000\n", + " 9.6\n", + " 12.2\n", + " 70\n", + " 1709769600000\n", " \n", " \n", - " 44\n", + " 89\n", " Tulalip-Totem Beach Rd\n", - " 2024-02-23\n", - " 12.7\n", - " 3.7\n", + " 2024-03-08\n", + " 10.7\n", + " 3.0\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 7.9\n", " 11.5\n", - " 357\n", - " 1708646400000\n", + " 25.6\n", + " 145\n", + " 1709856000000\n", " \n", " \n", "\n", @@ -1087,19 +1089,19 @@ ], "text/plain": [ " city_name date temperature_max temperature_min \\\n", - "42 Tacoma-S 36th St 2024-02-23 12.1 2.8 \n", - "43 Tukwila Allentown 2024-02-23 13.2 3.3 \n", - "44 Tulalip-Totem Beach Rd 2024-02-23 12.7 3.7 \n", + "87 Tukwila Allentown 2024-03-08 12.1 1.8 \n", + "88 Tulalip-Totem Beach Rd 2024-03-07 8.2 0.4 \n", + "89 Tulalip-Totem Beach Rd 2024-03-08 10.7 3.0 \n", "\n", " precipitation_sum rain_sum snowfall_sum precipitation_hours \\\n", - "42 0.0 0.0 0.0 0.0 \n", - "43 0.0 0.0 0.0 0.0 \n", - "44 0.0 0.0 0.0 0.0 \n", + "87 0.0 0.0 0.0 0.0 \n", + "88 0.0 0.0 0.0 0.0 \n", + "89 0.0 0.0 0.0 0.0 \n", "\n", " wind_speed_max wind_gusts_max wind_direction_dominant unix_time \n", - "42 8.0 12.2 5 1708646400000 \n", - "43 6.3 12.6 329 1708646400000 \n", - "44 7.9 11.5 357 1708646400000 " + "87 9.4 15.1 217 1709856000000 \n", + "88 9.6 12.2 70 1709769600000 \n", + "89 11.5 25.6 145 1709856000000 " ] }, "execution_count": 19, @@ -1121,7 +1123,7 @@ }, { "cell_type": "markdown", - "id": "72b1db92", + "id": "4f534824", "metadata": { "tags": [] }, @@ -1132,18 +1134,18 @@ { "cell_type": "code", "execution_count": 20, - "id": "403a8f41", + "id": "2179deb0", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "13836ef8e3384504b27063ea496c122f", + "model_id": "7b77a556603345b999ca05a825ccb1f1", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Uploading Dataframe: 0.00% | | Rows 0/45 | Elapsed Time: 00:00 | Remaining Time: ?" + "Uploading Dataframe: 0.00% | | Rows 0/90 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, @@ -1161,7 +1163,7 @@ { "data": { "text/plain": [ - "(, None)" + "(, None)" ] }, "execution_count": 20, @@ -1177,18 +1179,18 @@ { "cell_type": "code", "execution_count": 21, - "id": "fc2cb1d5", + "id": "8eb1a6dd", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "01811207264a4dc8a78130e301d43f16", + "model_id": "37402c20f64f4d3bab027087f7f83eaf", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Uploading Dataframe: 0.00% | | Rows 0/45 | Elapsed Time: 00:00 | Remaining Time: ?" + "Uploading Dataframe: 0.00% | | Rows 0/90 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, @@ -1206,7 +1208,7 @@ { "data": { "text/plain": [ - "(, None)" + "(, None)" ] }, "execution_count": 21, @@ -1221,7 +1223,7 @@ }, { "cell_type": "markdown", - "id": "309617db", + "id": "7e88d41c", "metadata": {}, "source": [ "---\n", diff --git a/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb b/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb index 1f54825c..f10eeac3 100644 --- a/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb +++ b/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "77d695b6", + "id": "0468fa97", "metadata": { "tags": [] }, @@ -24,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "a4e180e5", + "id": "0b486b05", "metadata": {}, "source": [ "### ๐Ÿ“ Imports" @@ -33,7 +33,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "dca2f8a6", + "id": "a8c7d91f", "metadata": {}, "outputs": [ { @@ -52,7 +52,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "e07a7998", + "id": "13c54813", "metadata": { "tags": [] }, @@ -67,14 +67,9 @@ ], "source": [ "import os\n", - "import datetime\n", - "import time\n", - "import json\n", - "import pickle\n", "import joblib\n", "\n", "import pandas as pd\n", - "import numpy as np\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", @@ -91,7 +86,7 @@ }, { "cell_type": "markdown", - "id": "4046f61d", + "id": "361fb860", "metadata": {}, "source": [ "## ๐Ÿ“ก Connecting to Hopsworks Feature Store " @@ -100,7 +95,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "74e9b3c5", + "id": "cfdf990e", "metadata": {}, "outputs": [ { @@ -125,7 +120,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "90851146", + "id": "2e8b9506", "metadata": {}, "outputs": [], "source": [ @@ -142,7 +137,7 @@ }, { "cell_type": "markdown", - "id": "58c4715e", + "id": "ade435c2", "metadata": {}, "source": [ "## ๐Ÿ– Feature View Creation and Retrieval " @@ -151,7 +146,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "dce6c4ac", + "id": "b01f3e2e", "metadata": {}, "outputs": [], "source": [ @@ -165,7 +160,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "2e63d523", + "id": "a75142b3", "metadata": { "scrolled": true, "tags": [] @@ -178,7 +173,7 @@ }, { "cell_type": "markdown", - "id": "eb62cf36", + "id": "c4ed7fa2", "metadata": {}, "source": [ "`Feature Views` stands between **Feature Groups** and **Training Dataset**. ะกombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n", @@ -203,7 +198,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "5394c761", + "id": "2ce37fac", "metadata": {}, "outputs": [ { @@ -226,7 +221,7 @@ }, { "cell_type": "markdown", - "id": "a71e815b", + "id": "5442ee40", "metadata": {}, "source": [ "For now, your `Feature View` is saved in Hopsworks and you can retrieve it using `FeatureStore.get_feature_view()`." @@ -234,7 +229,7 @@ }, { "cell_type": "markdown", - "id": "409936f9", + "id": "7fa4af05", "metadata": {}, "source": [ "## ๐Ÿ‹๏ธ Training Dataset Creation\n", @@ -263,7 +258,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "dcd1c91f", + "id": "2ad3def9", "metadata": {}, "outputs": [ { @@ -289,7 +284,7 @@ }, { "cell_type": "markdown", - "id": "3ab95b51", + "id": "ff5fbe9b", "metadata": {}, "source": [ "## ๐Ÿงฌ Modeling" @@ -298,7 +293,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "9ebd4ae2", + "id": "7106677e", "metadata": {}, "outputs": [], "source": [ @@ -315,7 +310,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "b50de188", + "id": "d0ae6dc3", "metadata": { "tags": [] }, @@ -334,7 +329,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "02e2f796", + "id": "9fb26419", "metadata": { "tags": [] }, @@ -347,7 +342,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "43cdecb5", + "id": "4f67fee0", "metadata": { "tags": [] }, @@ -367,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "de9eb5ff", + "id": "2e564df8", "metadata": {}, "outputs": [], "source": [ @@ -376,7 +371,7 @@ }, { "cell_type": "markdown", - "id": "9bd07554", + "id": "ea5adfa8", "metadata": {}, "source": [ "## ๐Ÿƒ๐Ÿปโ€โ™‚๏ธ Model Training" @@ -385,7 +380,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "5c73f5b9", + "id": "192e9cf5", "metadata": {}, "outputs": [ { @@ -442,7 +437,7 @@ }, { "cell_type": "markdown", - "id": "6f78ae54", + "id": "4837c932", "metadata": {}, "source": [ "## โš–๏ธ Model Validation" @@ -451,7 +446,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "431e1863", + "id": "dfda89cd", "metadata": { "tags": [] }, @@ -486,7 +481,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "d23df9c5", + "id": "b12a4140", "metadata": { "tags": [] }, @@ -502,14 +497,14 @@ { "cell_type": "code", "execution_count": 21, - "id": "9ca2eee4", + "id": "5e9b2016", "metadata": { "tags": [] }, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkcAAAHHCAYAAAC1G/yyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAACIS0lEQVR4nO3deXhU5dk/8O+ZNZM9rCEsAXEDFaQuFO1YRBSU2rq0dWvVat1bN9TW2uIuVt9qW9RS2yq2b6vVVttXq1AUwZ8KqCwKCCiI7GEL2Wc/z++P+5yZM5NJMpNMMjPJ93NdXCEzJ2fOnEnm3HM/93M/mlJKgYiIiIgAALZsHwARERFRLmFwRERERGTB4IiIiIjIgsERERERkQWDIyIiIiILBkdEREREFgyOiIiIiCwYHBERERFZMDgiIiIismBwRERZp2ka7rnnnrR/7ssvv4SmaZg3b17GjykT5s2bB03T8OWXX3a47ciRI3H55Zd36/FcfvnlGDlyZLc+BlFvwOCIiADELuSapuHdd99tdb9SCsOHD4emafjGN76RhSPsvMWLF0efm6ZpsNvtGDRoEL797W9j/fr12T48IsoxDI6IKE5BQQH+9re/tbp9yZIl2LFjB9xudxaOKjNuvPFG/OUvf8Ef//hHXHLJJfjPf/4Dr9eLmpqabnm873//+/D5fKiuru6W/RNR92BwRERxzjrrLLz00ksIh8Nxt//tb3/Dcccdh8rKyiwdWdd5vV5873vfww9+8AM8/vjjePzxx3HgwAH8+c9/7pbHs9vtKCgogKZp3bJ/IuoeDI6IKM5FF12EAwcOYOHChdHbgsEg/vGPf+Diiy9O+jPNzc2YOXMmhg8fDrfbjSOOOAL/8z//A6VU3HaBQAC33HILBg4ciJKSEnzzm9/Ejh07ku5z586duOKKKzB48GC43W4cddRReOaZZzL3RCHBEgBs3ry5U489Z84cHHXUUSgsLERFRQWOP/74uKxbspojpRQeeOABDBs2DIWFhTj11FOxbt26Vvu+5557kgZVyfb573//GzNmzEBVVRXcbjdGjx6N+++/H5FIpMNz8MILL+C4445DSUkJSktLccwxx+A3v/lNhz9H1Js5sn0ARJRbRo4ciUmTJuH555/HmWeeCQB44403UF9fjwsvvBC//e1v47ZXSuGb3/wm3n77bVx55ZU49thjsWDBAtx+++3YuXMnHn/88ei2P/zhD/G///u/uPjii3HSSSdh0aJFmDFjRqtj2LNnD7761a9C0zT86Ec/wsCBA/HGG2/gyiuvRENDA26++eaMPFczwKioqEj7sf/whz/gxhtvxLe//W3cdNNN8Pv9+OSTT7B8+fI2g0gAmDVrFh544AGcddZZOOuss7By5UqcccYZCAaDnX4e8+bNQ3FxMW699VYUFxdj0aJFmDVrFhoaGvDoo4+2+XMLFy7ERRddhNNOOw2//OUvAQDr16/He++9h5tuuqnTx0OU9xQRkVLq2WefVQDUhx9+qJ544glVUlKiWlpalFJKfec731GnnnqqUkqp6upqNWPGjOjP/etf/1IA1AMPPBC3v29/+9tK0zS1adMmpZRSq1evVgDU9ddfH7fdxRdfrACou+++O3rblVdeqYYMGaL2798ft+2FF16oysrKose1ZcsWBUA9++yz7T63t99+WwFQzzzzjNq3b5/atWuXmj9/vjr00EOVpmnqgw8+SPuxv/Wtb6mjjjqq3cc1z+mWLVuUUkrt3btXuVwuNWPGDKXrenS7n/3sZwqAuuyyy6K33X333SrZW3TiPpVS0WOyuuaaa1RhYaHy+/3R2y677DJVXV0d/f6mm25SpaWlKhwOt/s8iPoaDqsRUSvf/e534fP58Nprr6GxsRGvvfZam9mQ119/HXa7HTfeeGPc7TNnzoRSCm+88UZ0OwCttkvMAiml8M9//hNnn302lFLYv39/9N+0adNQX1+PlStXdup5XXHFFRg4cCCqqqowffp01NfX4y9/+QtOOOGEtB+7vLwcO3bswIcffpjy47/55psIBoP48Y9/HDdk1tVMmMfjif6/sbER+/fvh9frRUtLCzZs2NDmz5WXl6O5uTluCJWIOKxGREkMHDgQU6dOxd/+9je0tLQgEong29/+dtJtt27diqqqKpSUlMTdPmbMmOj95lebzYbRo0fHbXfEEUfEfb9v3z7U1dXh6aefxtNPP530Mffu3dup5zVr1ix4vV40NTXhlVdewQsvvACbLfYZMZ3H/slPfoI333wTJ554Ig499FCcccYZuPjii3HyySe3+fjmuTjssMPibh84cGDc0F661q1bh5///OdYtGgRGhoa4u6rr69v8+euv/56vPjiizjzzDMxdOhQnHHGGfjud7+L6dOnd/pYiHoDBkdElNTFF1+Mq666CjU1NTjzzDNRXl7eI4+r6zoA4Hvf+x4uu+yypNuMGzeuU/s+5phjMHXqVADAOeecg5aWFlx11VX42te+huHDh6f12GPGjMHGjRvx2muvYf78+fjnP/+Jp556CrNmzcK9997bqeOzamuGW2KRdV1dHb7+9a+jtLQU9913H0aPHo2CggKsXLkSP/nJT6LPKZlBgwZh9erVWLBgAd544w288cYbePbZZ3HppZfiueee6/JzIMpXDI6IKKlzzz0X11xzDZYtW4a///3vbW5XXV2NN998E42NjXHZI3M4x+zxU11dDV3XsXnz5rhs0caNG+P2Z85ki0Qi0UCmuzz88MN45ZVX8OCDD2Lu3LlpP3ZRUREuuOACXHDBBQgGgzjvvPPw4IMP4s4770RBQUGr7c1z8fnnn+OQQw6J3r5v3z4cPHgwblszk1RXVxcXmJrZJ9PixYtx4MABvPzyyzjllFOit2/ZsqXjEwDA5XLh7LPPxtlnnw1d13H99dfj97//PX7xi1/g0EMPTWkfRL0Na46IKKni4mL87ne/wz333IOzzz67ze3OOussRCIRPPHEE3G3P/7449A0LTrjzfyaONvt17/+ddz3drsd559/Pv75z39i7dq1rR5v3759nXk6SY0ePRrnn38+5s2bh5qamrQe+8CBA3H3uVwujB07FkophEKhpI83depUOJ1OzJkzJ67NQeI5MI8NAN55553obc3Nza0yOna7HQDi9hcMBvHUU0+19bTbfA42my2aGQsEAh3+PFFvxcwREbWpraElq7PPPhunnnoq7rrrLnz55ZcYP348/vvf/+Lf//43br755uhF/thjj8VFF12Ep556CvX19TjppJPw1ltvYdOmTa32+fDDD+Ptt9/GxIkTcdVVV2Hs2LGora3FypUr8eabb6K2tjZjz/H222/Hiy++iF//+td4+OGHU37sM844A5WVlTj55JMxePBgrF+/Hk888QRmzJjRqv7KNHDgQNx2222YPXs2vvGNb+Css87CqlWr8MYbb2DAgAFx255xxhkYMWIErrzyStx+++2w2+145plnMHDgQGzbti263UknnYSKigpcdtlluPHGG6FpGv7yl7+06jGVzA9/+EPU1tZiypQpGDZsGLZu3Yo5c+bg2GOPjdaMEfVJWZsnR0Q5xTqVvz2JU/mVUqqxsVHdcsstqqqqSjmdTnXYYYepRx99NG66ulJK+Xw+deONN6r+/furoqIidfbZZ6vt27e3msqvlFJ79uxRN9xwgxo+fLhyOp2qsrJSnXbaaerpp5+ObpPuVP6XXnop6f2TJ09WpaWlqq6uLuXH/v3vf69OOeUU1b9/f+V2u9Xo0aPV7bffrurr66PbJJt2H4lE1L333quGDBmiPB6Pmjx5slq7dq2qrq6Om8qvlFIrVqxQEydOVC6XS40YMUI99thjSff53nvvqa9+9avK4/Goqqoqdccdd6gFCxYoAOrtt9+Obpc4lf8f//iHOuOMM9SgQYOij3HNNdeo3bt3t3s+iXo7TakUPl4QERER9RGsOSIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWbAKZJl3XsWvXLpSUlLS59hERERHlFqUUGhsbUVVVFbfgdDIMjtK0a9cuDB8+PNuHQURERJ2wfft2DBs2rN1tGBylyVwWYPv27SgtLc3y0RAREVEqGhoaMHz48DaX97FicJQmcyittLSUwREREVGeSaUkhgXZRERERBYMjoiIiIgsGBwRERERWeRVcPTOO+/g7LPPRlVVFTRNw7/+9a+4+5VSmDVrFoYMGQKPx4OpU6fi888/j9umtrYWl1xyCUpLS1FeXo4rr7wSTU1NPfgsiIiIKJflVXDU3NyM8ePH48knn0x6/yOPPILf/va3mDt3LpYvX46ioiJMmzYNfr8/us0ll1yCdevWYeHChXjttdfwzjvv4Oqrr+6pp0BEREQ5TlNKqWwfRGdomoZXXnkF55xzDgDJGlVVVWHmzJm47bbbAAD19fUYPHgw5s2bhwsvvBDr16/H2LFj8eGHH+L4448HAMyfPx9nnXUWduzYgaqqqg4ft6GhAWVlZaivr+dsNSIiojyRzvU7rzJH7dmyZQtqamowderU6G1lZWWYOHEili5dCgBYunQpysvLo4ERAEydOhU2mw3Lly9Put9AIICGhoa4f0RERNR79ZrgqKamBgAwePDguNsHDx4cva+mpgaDBg2Ku9/hcKBfv37RbRLNnj0bZWVl0X/sjk1ERNS79ZrgqLvceeedqK+vj/7bvn17tg+JiIiIulGv6ZBdWVkJANizZw+GDBkSvX3Pnj049thjo9vs3bs37ufC4TBqa2ujP5/I7XbD7XZ3z0ETmZQOHFwF+PcDBQOAigmAxs8uRETZ0GvefUeNGoXKykq89dZb0dsaGhqwfPlyTJo0CQAwadIk1NXVYcWKFdFtFi1aBF3XMXHixB4/ZiIAQM0i4O3pwDvnAcsul69vT5fbiYiox+VV5qipqQmbNm2Kfr9lyxasXr0a/fr1w4gRI3DzzTfjgQcewGGHHYZRo0bhF7/4BaqqqqIz2saMGYPp06fjqquuwty5cxEKhfCjH/0IF154YUoz1YgyrmYR8OE1QLARcPcH7G4gEgDqPpHbT/g9UDkl20dJRNSn5FVw9NFHH+HUU0+Nfn/rrbcCAC677DLMmzcPd9xxB5qbm3H11Vejrq4OX/va1zB//nwUFBREf+avf/0rfvSjH+G0006DzWbD+eefj9/+9rc9/lyIoHTg04clMCocCpiLITo8gH0o4Nsp9w+ezCE2IqIelLd9jrKFfY4oY2pXyBCao1gCokThFiDcDJzyMtDvuJ4/PiKiXqRP9jkiyjv+/YAelKG0ZOwFcr9/f88eFxFRH8fgiChbCgYANpfUGCUT8cv9BQN69riIiPo4BkdE2VIxASg9AggeABJHt5UCgrVyf8WE7BwfEVEfxeCIKFs0GzD2p4CzRIqvwy1SpB1uke+dpXI/i7GJiHoU33WJsqlyikzXLx8nxde+3fK1fBxwwlxO4yciyoK8mspP1CtVTpHp+uyQTUSUExgcEeUCzcbp+kREOYIfTYmIiIgsGBwRERERWTA4IiIiIrJgcERERERkweCIiIiIyILBEREREZEFgyMiIiIiCwZHRERERBYMjoiIiIgsGBwRERERWTA4IiIiIrJgcERERERkwYVniXKN0oGDqwD/fqBgAFAxQRamJSKiHsHgiCiX1CwCPn0YaNgI6EHA5gJKjwDG/hSonJLtoyMi6hP4cZQoV9QsAj68Bjj4CeAoBjxD5GvdJ3J7zaJsHyERUZ/A4IgoFyhdMkbBRqBwKODwyFCawwN4hgKhRrlf6dk+UiKiXo/BEVEuOLhKhtLc/QFNi79P0wBXP7n/4KrsHB8RUR/C4IgoF/j3S42R3Z38fnuB3O/f37PHRUTUBzE4IsoFBQOk+DoSSH5/xC/3Fwzo2eMiIuqDGBwR5YKKCTIrLXgAUCr+PqWAYK3cXzEhO8dHRNSHMDgiygWaTabrO0sA304g3CLF1+EW+d5ZKvez3xERUbfjOy1RrqicApzwe6B8HBBuBny75Wv5OOCEuexzRETUQ9gEkiiXVE4BBk9mh2wioixicESUazQb0O+4bB8FEVGfxY+jRERERBYMjoiIiIgsOKxGyXFleCIi6qMYHFFrXBmeiIj6MKYCKB5Xhicioj6OwRHFcGV4IiIiBkdkwZXhKRuUDtSuAHYtkK8Mvokoy1hzRDGprAwfPMiV4SlzWN9GRDmImSOK4crw1JNY30ZEOYrBEcVwZXjqKaxvI6IcxuCIYrgyPPUU1rcRUQ7jVY7icWV46gmp1LfpQda3EVFWsCCbWuPK8NTdrPVtDk/r+1nfRkRZxOCIkuPK8NSdzPq2uk8A+9D4oTWzvq18HOvbiCgrmAogop7H+jYiymF85yGi7GB9GxHlKA6rUX5ROmuhehPWtxFRDmJwRPmD3ZR7J9a3EVGO4cczyg/spkxERD2EwRHlPnZTJiKiHsTgiHIfuykTEVEPYnBEuc+/X5oFqggQrJcZTda139hNmYiIMogF2ZT7mrcAoTpZEBeQbJGtAPBUSj8cdlMmIqIMYuaIclvNImD9rwAoyRZpTgB2IOIDmrcBwQbpplx6BLspExFRRjA4otQoHahdAexaIF97ovjZLMQONQKF1YDNAaiQ3Kc5AD0MtGwFHCXspkxERBnDYTXqWLb6C1kLsR0eQKuWLsp6QAInTQNgA46cyT5HRESUMQyOqH1mf6FgowQpdrcUR5v9hU74ffcFJv79EozZ3fK9s0T+hVska6TZgFA9UDyqex6fiIj6JI5DUNuy3V+oYIBkqSKB+NsdhYCrVI6FhdhERJRhDI6obdnuL1QxQYbvggfip+4D8j0LsYmIqBswOOqNMlU8nTislai7+wtpNqlrcpYAvp0ynKZ0+erbKdP4WYhNREQZxpqj3iaTxdPWYS2Hp/X9PdFfqHKK1DWZzyl4UB6zfBwXnCUiom7B4Kg3yXTxtDmsVfcJYB8aP7RmDmuVj+v+Ya3KKcDgyTJ8598vwVjFBGaMiIioW/Sqq8s999wDTdPi/h155JHR+/1+P2644Qb0798fxcXFOP/887Fnz54sHnEGdUfxdC4Na2k2oN9xQNU0+crAiIiIukmvu8IcddRR2L17d/Tfu+++G73vlltuwauvvoqXXnoJS5Yswa5du3Deeedl8WgzqLuKp81hrfJxsqaZb7d8LR8HnDCXw1pERNTr9LphNYfDgcrKyla319fX409/+hP+9re/YcoUuaA/++yzGDNmDJYtW4avfvWrPX2omZVK8XTwYOrF00qPH8aa/DpQ9zGHtYiIqNfrdcHR559/jqqqKhQUFGDSpEmYPXs2RowYgRUrViAUCmHq1KnRbY888kiMGDECS5cuzf/gKJPF0+0VdVdNy/yxmxIDsmwHYLl2PERE1CN6VXA0ceJEzJs3D0cccQR2796Ne++9F16vF2vXrkVNTQ1cLhfKy8vjfmbw4MGoqalpc5+BQACBQKwJYUNDQ3cdftdkqnjaWtTtKATsRQD07u+Ina0lSvLleIiIqMf0quDozDPPjP5/3LhxmDhxIqqrq/Hiiy/C40mSTUnB7Nmzce+992bqELuPWTz94TVSLO3qJ0NpEb8ERonF08myIoAEBP4DgB4CQgeNNcxsgOaSIOHTh2XmWCYzKNlcoiQfjoeIiHpUrx4jKC8vx+GHH45NmzahsrISwWAQdXV1cdvs2bMnaY2S6c4770R9fX303/bt27v5qLsg1eLpmkXA29OBd84Dll0uX9+eDnz+NHDwYyDcBOh+ADbA5pSvuh8INcn9meyIne0lSnL9eIiIqMf1qsxRoqamJmzevBnf//73cdxxx8HpdOKtt97C+eefDwDYuHEjtm3bhkmTJrW5D7fbDbe7jSLnXNRRT6D2siJ1ayXLpFR8YbemSeYoEgBCdYB/b+aON51Zdv2Oy9zj5svxEBElw5rIbtWrgqPbbrsNZ599Nqqrq7Fr1y7cfffdsNvtuOiii1BWVoYrr7wSt956K/r164fS0lL8+Mc/xqRJk/K/GDuR2RMoUWJWxLz4OzxSp9S4CVBhCYSS7tcOqAgQOJC5Y830LLvedjy5im/MRNnDmshu16uCox07duCiiy7CgQMHMHDgQHzta1/DsmXLMHDgQADA448/DpvNhvPPPx+BQADTpk3DU089leWj7kEdZUWcpUC4QQIgZW9d1A1dAiRX/849vnlB9e2VxWTd/SXQ0pzZXaLEKp1Zf301QOAbM1H2sCayR2hKJS53Tu1paGhAWVkZ6uvrUVpamu3DSc+uBVJj5BmS/CIeagIaP5MACAA0BwANgJKMEjTAVQGc+nr6Q0rmBfXgxzI0pyLyOM5y2bdSQPGo1gGZb6fUTJ06v2cCD6VL/VXdJ1Jj1NbxjLkDWP9IbgcI3RG8tfXGHDwgndT70htzXw2O81Fvea3M96eDn8Rn/4HsvF/mmXSu370qc9SrdMcfc8EAydIE6yQwsTkAe6HlD0yTi7zNDUBJEbZScr/dI8FSxfj011IzL6j+A1LsDQXADugRGaIyL7BNWwBPZfuz7DrS1fOWyqy/ytOB5VdIcbarQoLNXPvk1h3ZnY6GZX07u2c2Yy5i9ix/9KbXijWRPYbBUS7qrgvb/uWStQk3G9PzNcBWIAGJo0Sm7pcdDYTqgVCD8Qdo1BmFWwBXWfprqZkX1ECDkX1SEqBpGgCHPD8FwFEst4WaJGCyueQTUDrPOVPnzZz1Z+7LejyDTzcChDoAGhBpAQL7JUDy5EiA0F1pd74xCw5r5I/e9lqxJrLHMDjKNd3xx1yzCFh1hyz/oSKQYTIdgAOI+ICmLyU4KRgATHhEfsYMDMwam4rxnQvOzAuqswjwHZTsk/XCqjkAFQRcAyV4Gv9QrBbJ1V8CMrPXUkfPMZPnLdmsv8BBYPkPjMybA7DZJbMW8QHNW4Gi6uwHCN2Z3eEbM7Nn+aQ3vlaZXAmB2sXgKJd0xx9zzSLgg6uB5u1yIbe5AejS5FGFEW11pWnA8U/FAoj22gGkw7ygOopiQ3RWmgbouhyHHgJadgJb/px69kfpQO0KYNVtgL9WAhSbcZxdfRO0zvozx/pDTUbWzQ5Ai7U50IPSV6rk0OwGCN2Z3eEbM7Nn+aQ3vlaZWgmBOpQn4XIfkc4fcyqiQ1oHAShp6KjZJOth98gF3l4AFI2Q4MVdYXk8IzComiZfO/vJyrygKt14Tgn1/0oZ+9Zlm8+fkGJDR7EMVTmKY9mfmkXxz+3zucD8E4C3zzSaVzYATZtkSLAr5y0Z87VxVSR/HpoD0AOSVcpmgJBKdkcPdi54M9+YgweM2YsW5htz6RG9+425O88vZVZvfK3MmkhniXzoC7fIe2G4Rb5Pt0aT2sQzmEsy/cccHdIqhlzMEwMup2SPbG7J2nTHm4R5QQ03S32TOTPNZPZVCrfI/8PBjjtT1ywC5p8IfPRj4OBqo/5Hl+cX8QHN2+IDJPO8+fZKlmnXAvmaTpdr87VxliV/HppmFJjXZTdAsGZ3kulKdifdN2Yzq9eZ852ruvP8Umb11tcq1ZUQqEs4rJZLMj1sYb2gR7Md1nofY0gr7Gt7v5mc/eUPyeOrEKBsxnR+G2B3AnYXEFGAe0D7WbNNTwPr/8cyTOiCDBNGjEDLKfv11UiRuabJeVM6sPZeeSNJt1hb6UBgn+w3VA8UVAIt2+R5wGh3YNZyOYuz+8mtu9Pu7RWrW89lb5ohZMVhjfzRm1+rjlZCoC5jn6M0dWufo1R77KTaw6J2haybZi8CfDskqxKdKWY8norIJ/5+X2m930xe4Nrqc+QoAopGAQNOAra9BBRWWTIPSmaD6WHZNnAQKBomBeThRrnN3Dbih2SPbEbAFAGKR0urgqYt0pbAUZp+b57oOdgA+PfIsTiK5JyFGmLtDpQOuMqBk/8ODJma3rnJNLM4PdSYvBVBJj5dthc09/ZeSD1xftPVW/r4ZFouvlaUNelcvxkcpanbm0Bm8o/ZGmw5SmVGlZmtgU0yHzYHUDgCODHhgpXJC1z0jXuvzEJzVgD73gV2LwD8u2OF2sGDkpUpGCCBh68mFnwARpapyBjW2ZUQ6EUkgDNbBUDF+g+Fm2ToMN0mk4nnQA8a5zAsNUaFIwAYQ2nOEmDiM9kPjEzZytz0lSZ1uZQZy6VjyUU8P2RgcNSNeqRDdqYzNmYDxkhLLIAweUYAX/1T/EU9kxe4ZM/FPVCCIj0SH3g1bZIgx10JBPcbgZwxbKUHY0ODBUOAwB4A9vjHjwZINmO7QTJ7rWmLDNclG6oMt8h4/Skvx89YaeschBqBll1yLm0OwD0YKDsyN99os5FNMLOVjuL0zne2j7szevo4kz3ensW9O0uXKfnyO0Xdih2y810mx5MrpwCjrwXW/EKKrs3lQEz+XcDqnwLaI7E30UxMgVU68PnTwLr7gHAAKBgs+4sEgPo1EhgVHxK7gDo8QGE10LRZjgk2mV0HxDJc7kESmAVqpSg64gNgyR5BA2CXnkrFo4ET50qDxmVXpN+bp61z4CwByo6QYwg3Acc+BIy8ODffaNtagLg7dbUXUj59yu/J85vsvJQcLueyN/Xx6S7Z+FugvNbH/2JyWKam0isdqFkgw2qeoUadjl2CC1uBZILq10gvJHOqfFdnzdUsAt6eBqy8BfDtkfog347Y0iFmstJfEz/jy1Uq2R4zeFNhALq0HSiqBjyDjMf2y5psmt0o7tZlP3pIehy5BwITHgX6nyD768yMlY7Ogct4fPfA7rnw5OtMr67MEDKznKm0cuhL2jovB1dKY1d7QWZafxBRFIOj3s6aAQnVQfoduY1+R0Z2Rin5BGpOlc/EBa52ZWz2mGaPTbEPNhjH4JAgJ9IS//OOIgA2WdKkaJRkgEoPl4wNAHiq5I0/3CjZKLtHHkcPyO1lx8TXT3W2N082pwHXLJIhvXfOk4WC3zlPvs+H4KCz5zuxAWp7rRx6o7aC4fbOi6ufMZPyQPJ95mMfH6IcweCotzMzICpiBBCO2OwqFYn9314I1K0BPntKvi89vGsXOGc/uc1mBGHmFPvQQcjUd2NfejhhHxEJcmwuySQ5CuPvtzllWZHSI+R7e6HUE1WMB46bA0z/IH74JZ3ePNYLVGfPQUc6ygjle/aks03qMt0ANZ+0Fwy3d15sDjmPul/Ob6J87eNDlANYc5QvzIvq/uXy/YCJqQ23mRmQsE/2ocEISBTiao8C+2RYas0s6Q/kHiDbN22RISRXefysuY4ucDA6X5sz0cyFZlVIApxIwMhcWX4FlZI3eWeZZJRUBZL2JqkYD0x+XYYUUqnJiuvNs0FmzWk2qXka94Dc31bhuOaQi3qymYPp9jPqqJ6mt6wFlWovJKu+um5bR2sCHvLDts+LvVAyp+FmIBKKfzfP9z4+RFnG4CjXmYXNG34ljQfNTIvNBZQdJQvFtleoag5z1K40skQhtGoGCUhWCTYpeo74gfq1Rn8hmyzL4dspdT7mArSDJ0uwZg1OrBc4zSbDdxGfdMAG5DGVMgKtPXLxV8ZzNIMOVxkw+hpg89z2gxKbI70CSzP4+OTnQNMX8v+WXcD6R6T30ua58ReosB9o3iI/6x4s9VLRi/wxwPDzJZisXZFasXwqC+O6ynrPWlDpTiroi+u2pRIM73gl9mEi8bxomixnE/FJRtbu7HoAT0QAGBzltppFwKo7jKEEc/hFk2yGHpKlM5ZeCkz6c9sBkjnMsfxqecNMXBMsfmMZ1vLXWIa7NAmK9KB0sR5zh9z89vTWGZBh58Vf4DxDpC9QJCB1R2YxdsQnNUUFlZKxCtW1ziz0m5Be5iGVc/nRdUZwMjAWnBz8WHou2QpifZDMHksRvzEU2Az0Gy/Pz1kGbP8nsG526rOpUs0IHXFL78qepDNDqDd3M25LKkOJLbukMWrzl8nPix4AysfLuogNn2Xmb4WIGBzlrJpFwAfXSHARDYwACTDC8gaohyW4+HR2+0MtlVOAMbcCK24yZoAZ+4Etft+aJs0V9VDsdqVLzyHY5f+rfypLaISSZECavpDAw7dD3sidxvCcbzeggvGPc8QtwJhb2s4sZLKdQXvBieon0/3NTJp/n3G8utFKwAYgAjRuBj6bI+ctsT+TNfuT7GKUaj1N8ED+ZU8y1T/GusxMpoYxc12qQ4nDzgW++GPb52XCI1xKgijDGBzloujFvNZSDKwh1qPInLbukmCnbm37Qy01i4DNfzSKf7UkG9iMZosRo3dQsuxSBIg0SbbKVSY9iXSfBEk2B1BQJf2JnOVSs+TbKcce2Be/P80lt2+eK9mh9j7ZZqo3SXvBiQrLbZEAUL9BnpN5vLoyZvNBnpdvu/y/7CgpNAdSqwdK9SLo7p9f2ZNM9yTqTK1SPkt1KHHIVGlL0dF5yfWhVqI8wuAoF5kXc3sxgLqEO80LphEkman19noOfXgN4K+Vn9Xsxo9HYt9rRrNF3Vg8tRVr48gIoGzSzdpc2kPTZFjKVSHB0FF3Ap//AahbadmHUXytdAmo9ADw4Q3ApHnyxh/X6TrD3WzbC05sRgduhOX5wBqM6kZnbrv835xZpvsAW5Hl9HRQD5R4EQwb68XZHDIbL5oRGtR+9sRRAgw/D9i9MLXz0p1dgVOpoepsgNRXsiDpDCVqtr5zXohyAIOjXGRezJ1lsSnmrSgAujETzCVBya4F8W+a1uEkVz8ZDlMh4+eNi7+yyW2aESipSBsHZQmQwsZ0fM2B6JIeER/gD8oMmsLhRoG30bHaZo/tW+nyuJEw0LgBWOiVC4A5NLDpaWDTH2R4D8hMh+Rkn9DNRW0jIctzNoKgaJbOPM/KOH4jEExsPwC0Xw8ULYr/yHi8oDFz0Cavnd0J9Ds+lhE6Yiaw+WljeE9J4OEZJvelWuvUnZ2mu3tWXV/pZpzuUGJfOS9EOYDBUS4yL+ZmJ+uI0VkaQOyirUmmRzOCm49/ZtQKQRolHnqVvJE2bJQ3XN/2hAcx92cGAzZjWnAQrSX2+dHluOKW7XAa/ZSagFU/ARrWGz8XBnQd0HSj3ilxXxGgbjXw7nfl4tC8JRY42AokK9HVbETiJ/Rwo2VRW91yTGaQpCxfjUDJ7L8ExLcfMLVXD6TZgMppsg5WtDGmkUXTmwHdEbvfGtAAEnwM9AK7XgNCTallaTqb1Uk105SJ5WX6qsRzPHhy3xpKJMoTDI5ykfVi7hkiM1WiU/CtwYWRtVFGcXWoSS74/r3ARz8GikbKEJYKy8Xd5oIMFSXsy+YCPMON+qBUaMkvimaxeMvWhO0tQ1KtduWU5xY8YMymsxnHCeO51ACFI2QGWWezEdZP6E1bYsuYwGap6bIh/twa99uM5pWwxR7XllAfouuAf48scaL0WHAX3ZVuLOFSLOdH98dqnezFEihtfR7YNLd1obtvp2SRrLPpgLazNJ3N6qSTaUq1hsq3t3W7h1Reu966SGh75/jU+b3zORPlKQZHuch6MQ81SCbIv8eoiTE5jKUEHDIjrGUboqvYa5AAqPlL40JtDG+ZWSK72+h5FDYu1iEjMHGmeIB2oxbHGFYz92UGF85yubArIH6mXasnivgCcWP4KjqM4DKWP6gBCoell41I9gn9+N8B710Qq7eyQQIdPWCcn4g8vll7ZS6tAsiMPUeJHJt/V2wIJHDAWCNOl5mF/+/brYMKM9PiGWIMm1hqjuyFUoNUv1aWTikamTCbrkJm0CVrZp8sS9OZrE66maZUComVDqy9V4YG0xnWy6eFZ9PRXTVaRNQt+NEkV5kzd8rHATAaJ7oHAyVHAof/GPjqn+Q2zxDj4mxkhjQbooXWyizajkidix6Qf5GAcZtRO6Npsq9UfxtcpTIEF81C6dLwETZ5XGexBAGy8w521lYRuPnjjtjyCOFGYN/StrNQpraWY2jcZAQg1dIdu3i0XHjtHjkOzSHPpXCIFEqb581sUmnO9tNDEhQ1fRmrjfIMlf0mW+ojrjmmJsfgKpOvZsZNDxlZpMTZdEYgpwdbr0MHtF4/y/pYSkn35GC9fFWq9fadWdOso/XTfDXyWjV9md4SKPm+dEpb+vq6cUR5iJmjXNbezJ1dC+SCal0zDTBuS1LbAyAWqFiGuTSnbOs3mh62y8iuRAKy7ljEF8uARIJSL2QvlH9mA0g9WQ2TSVkKxNvZRg8DLTvk/+seBHb+X/uFyB9eAwQaAGeRBCBKl2aP9Wvl2Iuq44csoscaMsqvXHLRMtcGcxRKQbSjQH7eb7QnsBfIz7gHAQ537IKXOHxlzbQkyxyZQU+yLIy5fpZ5HhIl1jqZjxWolWxgshmF1u07k2kyM5sfXC3ZSUeRPA9oxmMG5LHSGdZLDCCA2HlyVsh+82HplGRYo0WUdxgc5bq2Zqi0WjNNa6dHkSnJfdHZY8byIa2GwYxaG3PhWKUk6DBn1ziLgWCdNFI03+g1zWgAOVC2S5k5Zd64mKuIEVwZx+0okgtlW0MR5gXWf0ACRN/B+MAgAglmEoeDnCUSMLXskAt7sC5WdO0oBEoOi13UVMjIZFmK5Fua5Dw5iiTQcpbEX/DMTMuBjyw1R5bjAmIF+IkchcbyEf5YABx9vkl6H1VMkPNetxpJZxT6WoDyY2Pbd2VNM2c50LJTzhcgx1k0Sv7vHpBeIGANIOIK5o3zpDklwM3HAKKvrhtHlMfy7CMYRZkX3IgxXKIH0H5g1BZjqMjman3xNe9XEeNCFZQA4KhfyPpigQNA/TqpdzKDK/9uINggxxSqR1q/YgWVxnBgyJjJZQmMNJtR86Ok4WSyoYiDq+QCGm4yAkWzh5Ndvg/75Ln497QeDnIUS1BTfLjRiiAc20/DZ5ItatktQ0XhFrQ+17oxlLTFaIzplqGsXUahbeXpsr9wE6CMoEVpxmP45TH9e4BAffwK6+ZUfrtbAqFUV7lXiD1HzbKvxMO2ZrWSSTYLz8zOteyQ4cmSw4zMWrHRLiLcfiBgHdYzmQGEHgSat7V+/fSgDOPtfjP5fnNZZ84xEWUVg6N8ZQ5tuCrQftFzRyxT+m02SPYm2TbGdhEfsOZuyc5AGX2NRgBlY6VwXA8DTZtluY24GhmjXUCbbIDeAhQMlhqg6Ow84z7oQGCv7LtpkwQfZgbC5Nsr67RBGdPlbUbWwch8acZzsbtjQ2bWQENzAOF6KSJ2FBmBmiatFFq2SSG2CiE25T/ZqQrJEF2jsc7Vp78E3p4hgZzdDdiLjEmGYflqL5Lb/TWSgWn+Amj8HKjfKAGDb6dkYY65Xxb9DTfL8YWbJWN0wtz47NnBVTLrsHCYZJ2sdWGOQrk9sC923jqqHwrWyv1mpilZ/YyzGPAMlOybHpDzmW4gUDBAXiPfbmNiQcLrB6OGbscr+Vebk+45JqKs47BaPqucAoyZCXx0Ywq1O+3RjcyTAx1mn1TYmHZ/AIAGFB8qBdqhRmPGm7G/cH3iD7axb6OI216AaGsCe6HcFmpAdIq95ozdb204aS0srv1IgjPzomqlaYCyAYgAoy6VKeZxfWWOAQIHJRtSONQI7BTaDYTaolsCg3CLkQmKAHAAJaOMInOj5kiFJRulItIhO9wszy/SAvj8sqjohEeM1/qWjqd7mxkYzxDZJrEbt9IlADHPW7qNCDuqn3EPAsJbYq0NUl0CpWKCLLDq22lkMRML+SNGv65dmRla68l2AX1x3TiiPMfgKJel8gZeNErebHUdCKXap6gtSQp+2z9AyahoI4CmrUYheLqf6nXJzjjL5dvxD0nNjH+PFPzq4YSLpaXhJFoAd7/Y9O+DqxAtNo/oUgNjreNREenWPegUYPyDEiDtXy73OUuBT34eu+jbPEYs15mhSoNZR6QbfaYQBpq3S5bNYTyfhs8Q7YvkKJbsm1mIHKyVzODgycZTb6P+zCpxmr2jMP7+ZJmbdNY066h+xuExivLd6QUCmk0WWK39SDJdNmd8mwjNLucm3NT12pxstAvoa+vGEeU5Bke5KtU38IIBxhTxENCV5FFn6QGpEVHtzUrrSESGlexFEhhVTQMOfIhosYymGUMpEYlVzAJjKAlu1j8imSt7oVHLYwRp5nmDZlxgbRKEFQyS8/vJz4GmL2JT9MMNQKEbgEfWT9O0FGIj81gSb3YYx2JpmQBI7VbDBhne0uzGTEO71FLZHLGp/oAECI2fpZcpSWe9LiAWgOshCRgBmenWVjCeSo8jZ4msr7f95fQCgSFTgQ2PGXVZZmd1mwyzeobIeepqbU42+w31pXXjiPIcg6Nco3RZX2zt/XKhKahs/w3cvBjuez97x2wdRuosFQLCdVJTpHQJemwOY3ZZQn+faJlUEFhxU+yGUINxhxbb0GxWaSuQNcwqxgO7FwFr75Zzqhm1UJrdaJy5FdBGWVoduDqXETMDI2tRuSnil8dxDzCWgDEyVfaELE+6s5jMQGfwabJ8S8sOIwBoI3PTXgDeVjCWavB16NXyL51AoHy8dAFv2Ag4B8nrb3Maw4FKMlHJhuRS1d1rwqWC66MR5QUGR7mkZpEsLLrv/xk9d4wLtqdSLrb2EqmL+XR2/Bv4sPMk09KJ8pics+oO4LM5EhSEW9BxUGK9PzGDYwypFQySAMdZKrPG1txtDC+5Ec38mNkdFQJadgFFw2KBk81tBFlGUXBcfZf5mAkZJE2T3k/R9dms29pk2CxQa9xmM17jhDqbdGYxJQY6SpfnFDggvyfWzM3gycBnc4F19wHhgBTBu/unlkFJt34mWSCQbLjYXFeuaYvM+gs1yH49VfJ7kInaHPYbIqIUMTjKFWa6319r1MYYBciRFunsbK2d2fsu8PnT0ohx3WxpbqhnY0ytG0SaZdhJK0CX6n1kZ/IlVA+UHA6MvhLY8pyRMXJZLrJGHZNSkJ5PzUDYL9kmc1q9zSEFxlDS26dVt2pzNp6xDpuuIxYYJQZOxv9VUGZ6wS5tCqzaK15O1NZQUfCABEWH3SBDVmYQ8vY0YO87sQDcZwTgztLUMiiVU2QpFuuwpL0wtfqZZNkq90BpAaFHJJvmLJWi8YhPGou6+kvGr6u1OfnQb6i3ritHlGcYHOUCa7rf3U9qX8zp6+aq8UqXN3XdqKX55C7ZJtSAWPakjfqXfKQ66tadhnCLZCTW3i+ZFMBoW2ChacZaakYGKdKMaANFzSbDm44iyZA4io2GlGVyMVXGTKryo6UwvfkLxHcpT3xNjD5HNhcw5qfAlmdiWRibW9oRBOukdmfMHe1fHFMZKtrzFjD2dgmMll5q9HkyM2Vhoz+THygeKYFJRxmUmkVS5+XbFSsmL6ySY+0oMEoWxNWvkcCo+BCjjskjxxFukVq04lHA5NdjjTk7K1m9lFKxAngVllmR2eo31FvXlSPKQwyOcoE13Q89VnCsm71+NESbMZo9X4L1iK4HpjkQLTqm5PSQBBtm0GIu0quSZHgcFbJ+HSCZkYOr5SLdsksu6GYWI1lx7Z7FwNLvA/69bXQucCNaC6VCssaaOYvp4McSGKlIrPh4/SPymrd1cUx1qKh2hQxZ+moQPxQI43yEJCNWWtJ+BiUxwHEPkGCj+Uvgo+vaHo5rK4iDivX+8dfIa6Rp8s9ZBGhDJItU93HXh7oS66USO3ErXdYYDBzs2uN0BhemJcopzNfmAmu6314owzlm4z4A0YuZHpQ3cs0cvgEkC2FjYNQem12GsDQbYmvQBY214fxSUK77jYukLtt+NgdY9wDw5V+Ahk9lm8KhMgvr1PltX6gGT5YO4qVHIf6zh7EEhh4yirSNYPfjn8tjjrkj9voXVBodp4s6XnQ1laEiPQjsXyrdzAFjBh/QulDcZzRwbKPWqSsLqLYVxOlGsGozFhhOHK5sq6N2Z5j1Us4SySQ2fSnPWRktAzS7HMtH1/XsIrdcmJYo5zBzlAsS0/2uCmkG2Bbr7DAVAiK9pN6ouyhApvabi+SGrHckbhgLnKAgS1dEgFCT1L9s+JU0vmzaBGz6gwwtAbHaGUA6UOtBmWVlDpGay6JEabLvUD2w7HIJiEJNRqH2vthwleaSfbVVA1QwQIYDg3XGUJ0jvreRGei07DCKyp0Jzzvh/+EmOc5ktU5dKWhuK4gzF9Y1e0olLq6b6aU1zHqp9y4wek9p8hHRXCzZUdwzs9asWChOlHMYHOUCM91f+5FcuHU/ks/S6kU1RT1JhQBoUvSrUpjSZxa3a3ZjppfDCJhCsmzK/zsvtr6aZpNMn6NIht8A6WHkGSLbhhsRNxvOHL6zOQHY5NhC9TIMJwdrBDnGzDgzs9XWoquBgxJIB+uM/dpl6M680JtF3Z5h5pNrHYDE7W8/UDQ8+aywrhQ0t9UfyVForEPXEjvXpnSK0tPhrojVjCULKHs6GMmHQnGiPobDarlAswGV06S4OtLUxhAZ49iuUakFRgBkyDIiw2t6QLIXmk0ClVBjLOAxF+vV/bLuG5R80g/WAjBrZox6MCgATgkEbO7YEI6mSdZChYxZiuYsOi02BV/pUosUDaAMNYtkCMgcEjKbVpoF6M1bYtPfB04yaqysdWyJS3RAHu/4p2LDhkqXeqVdCySjZXN2bgHV9tYXK6iUr+bxp7Kwblf490sA7CqXpW8Su4hncigvFVyYlijn8IqbC5QObH2+9UUjDmuK4pkX9+6swzCDB2OGoPU2M6sEADC7cmuxYCnSItkJe4EMVUV/1Cg2NpfFsHsk8wSgzc8qml0CJ3OmHRBfp1I8Kr64GFpsyMgMdJQus8EaNyR5jsbvneaSAnF3hXyfOHtKcxptDppk2ZpU100D2u+PFG6QVgIFlRKAheq6d2mNVLp892Qwkm5XcyLqdgyOckHtCulVBM3o72MUBVM7FLp/iLGd/SvEAhDrlH2zV1KwPrboq3m/mR2KZo3sEhSEffE/n3hxhC7buvrHbk+sU3GWSq8k67T0SFhmgO1aIBf6UZdKbyLoiH9uZruCQZJR8e9ve/ZUuEn+NW8xurensG6aKdn6YoAUHR96FTD6hzIrrbt7/ORaMMKFaYlyDoOjXLB/uWWxTRsAm1zYujUrQp1jBhUh4+WxDk8pQBlDI/49iL1+Nsv/jd5VNo904XaUGF2sncYuQpA/S2vnbk3WhPMMih1GsjoV67pswXoguAtYeWusLYBniFxo9WCsi7Y1SPLXSOarcTOw81/JeycVj5IhO2gSJKW7gKq5vtjnTwObn5Zp+i07pZnp9pdlH1XT2t8HgC41S8zFYIQL0xLlFAZHucS60oTNmZk1y6ibJctgJQ736VJnpCKx6dg2BwB77OJcNMroNh2O9d3RNBl20xzSW8maybAODdkLYhkjm0Nm1zVvlcdzFEttjdmLSA/IMJ6rvwRD0GOBmR4CNF2Wp9GD0sMo2ewpjzGzbvxDMkMv3eBkz2Jg468639MnE80SczEYaWthWkCyy+yaTdRjGBzlggET5Y1ZDyG6Hhhr5fNYkuE4zQ4UjpCAJOKTGWahutjFGDCGsRqMoTKjzijcInVAiZkMc2jowEfxARUQG4qzF0nHdSDWMVsPSobE32RksFyIZqhsDjnGwD4JtgqHJn965uwp98DUsjxxpyaFjt7tTaPPZLPEtoKRbAYeiQvTsms2UVbwCpwL+h0HlB0l/48EjIudr/2fofyiB2SafenhQPFoCXiOuivWUNLMZFSMN2ZrNcvXivHACXPjL4TmkFLhyNjsOWUUgytAZtspyRhZaVqsTsj8XoVke7sHKBohs7dc5RKYBeuSP5euFCyn09MnUaabJXY0NGedqVe7ouebMJqB4MFPJANotmfoqDEoEXUZM0e5QLMBEx6JrXulsxi711ER6ZPkgHFBLzGm11suxqlkMqKZhA1GXZOx0K0KWxJWRh1UqB7wDI4/DjMwchQBBYON9gEOaSdgBivOckDbJcGRqyKzBctd6emTyWaJHWVksp2x6WqGjYi6hMFRrqicAnx1nnTuDdaBxdi9UUQWDvbvAYqq5QKoh40ZWnulMNvVXwqvh5ze+qJnHVJyFMb6G0X7Yhn9kczmlHpAhuVadcw2AhPNDjiLWx+mHpAAye7OfMFyV6bRZ6pZYkdDc6OvBTbPze46Z+yaTZRVDI5yibtC6kSKygD/QSBSl+0jokwKNcVWsm/eCiz+hjGEatQBKV2GxlzlscVtrc0YrZmEUCOiM9/i2CENLMOAssV3w45mfY6RLFP9mransleMl/Xe1j+S2YLlrkyjz0R/oo4yMi07gE8fkiL1bGZs2DWbKKsYHOUS/35pshdoMZr5Ue7pwhIuoYNGb6OhMpTV9CViCwgbVFAChAMfxWcpEjMJNrtlKr6llYDNBiiX0SdLN3or6QlZnztl846mspu1UJksWO7KNPpM9CfqKCPjKARatkthejYzNrnWqJKoj+FgdS5p2iJDagyMcpdW0PE2ydgLZS2vsqMBu8voFdTGciYqLENbwYZYgXFHmYRoSwFltAkw644apZdQuFkCB7O42ywALx8n9yXbBojNnqqaJl8zkS1J9bETmYGVs0QCq3BL+kuNdHQeNWNNu7beGntqaZH2llsxA8HSI9g1m6ibMHOUK5QObPsHWGuU45QPqWePjO1sbpll5B4gjRObvkSHr3PEB7gHxbIUiZmEUKPlGCzHoocBBGO3qQjgGQGMvho47Or0C8C7S2cfu6v9iTrKyKiIkTFq4/XpqYxNLjaqJOpDGBzlioOrjCVEKLfZIRdOa9fr9jYvBKovAXa9KtmKxu1IbQFco/eQHgL2LZXp/54hRiPHUmOh22SMfZsL3roqpM7p04dk9tqQqfFBSGJfnejDd6EDdaraeuyOdCWo62hoLtwiGb5IC6AyPFMvXbnYqJKoj9CUane1U0rQ0NCAsrIy1NfXo7S0NHM73rUAeOccDqnlDZsEO3owebBjK5Ci5nEPSKH9O+fJkI1vpwQeKtz6ZxI5yyWD5DIWglW69DTSQ7FGj9GskRb/f80uvYuc5UBgv9FnyQG4BwNlR7Z/cc32NPbuZs5WCzUmz8iMvkZmq7V1f3tDf92hJwJVoj4gnes3/8Jyhbsf+xvlFV0Cl1aBkQbALkNo4x6Qi6jS5fvAflnaI1Whekgzx36SNXIPkFlUKmyUFFn/fO2W/xtBkh4EfNuNhqK6EVSF2m8i2BcaD3ZU8zR2ZudqorpLd9R9EVG7mDlKU7dljvYvB/771cztj7LH5pFsQ/EhkvVp/MzoZN2Q4pCaSYt10zYF6oHmzZIV8lRJRihw0Fg+xJqNMof/zNlsxp+55gSKRsqxlI+TDt3mxVbpwNvTJTAqTDLk5NvZ+mfyWSodspmxIeo10rl+s+YoF9QsAj64JttHQZmiByRrc3Al4KyQhVrd/QHffsC/PY0daZaO6WZwYwdgN4a7HBI4uQfKkE/LdllGBGHED7dZgxxd1ncrHNZ6SnpfazzYUc1TZ2uiiCjvMTjKtppFsmyIb2e2j4QyxlKobXPHZkW5igC/0aQxVWFj7bQoI2hRiDV4NAOXwAGZDQdI4KQHEcsaGdPTNYcEb0pvPSWdjQeJiAAwOMoupQOr7pCaBuqdAjXSmNFTKcGMpgEw6oY6bAeQOBsuoYWAfw9gc8YKhjWHZDuUbumNY8kg2ZzGsiIhIOxrPSW9o2nuwToZFgzsM7p52zj0lA6eK6K8weAom2pXAPXrsn0U1K2UTKW3eySbo2mQeRB2wF4MhOs6v2vNIUuSmFO8+x8PlI4FNs01apvMYMpmBEZmV21NOrH3+0r8lPS2prmHGoGWXTK93eYAVv8M2PJnoHIaULOg985qy6TePgOQqJdhcJRN+5fLG2X0okW9kxEglRwhU/zDzQB0IFyf/n6ibNJp+9iHpObIzEQAEnAfXCmPZWZ5oMWG0jSbLJMx5o74zEWyxoN6SNaBU2EJxgpHyIX9wEfAnsWxGW3ZWJw1X5g1hcFaCYidZfI339a5YoaJKOsYHOUCzdbp5booT+gBmVLvKLXUEHVhnTZAAh33QJnibXXUnbE+Pp4hklmKtFhmytkkkFn/iPzuWS/McY0HN8jQnYoYQVCl9PlRRoNKs0mlOQTX04uz5gNz6Lxlm5y3UIOcE5sbKKiU763nqjdlmBjkUR7jb2o2DZgowx16Cg0BKb8pGENg+yBZn0J0LSLW217GwtrHBwBgN2b0O+SCXHa09Exqq3dR5RSZrj/+IclyFFUDJYdJYARIoKX7jZ5LQWONMyUZsVADoGzA/g+B1T8FDnzYdlZU6TK0vGuBfO3qdrno86eBuo+lv5Vml7932KRHVss2ye6ZMwB7U4+pmkXSFuKd84Bll8vXt6fn13OgPq3PBkdPPvkkRo4ciYKCAkycOBEffPBBzx9Ev+PkQpXO7CXKEwlT4c2mkbABhcOB0iMBz/Cu7beouu1lLMwAx/sPoGSUXGQLq6Xvks0uWR7PUMkumYvbxj2MTbJSmh1wlcdP7dfDEgyZBdnBBqDxc6BxE9C0CQjsllqq9f8DLDwFmH9i64tiqhfPfL7IKh3Y/LR8tbmMrIlmZI5cRnH7AaNZ5155HYKN0mPK4TGGPzt4nXJRbwryqM/qk8HR3//+d9x66624++67sXLlSowfPx7Tpk3D3r1trVfVTTQbUH1Bzz4m9ZCErFDxIcDRsyTQcPeTYCPZjLCU92sDhp3X/jCFZgMOrADq10sBdstWoGkz0PCZBDSRFrlI162RjEwi6+w1K7Ow3LxQB/Ya2aPEDKgy+j2tlnYV5kUx1Ytnvl9kD66SmaiaDUmzhJojtlxQ8EDqPaZymdJ7T5BHfVqfDI4ee+wxXHXVVfjBD36AsWPHYu7cuSgsLMQzzzyT8j58Ph+szcVDoRB8Ph+CwWCr7Xw+H3Q99mYQDofh8/kQ8PuAz+bEtg3Z4AvZoFveR8MRuT0Qjn/D9Ic0+EI2RCzvMWFdtvWHur5t2LJtJAPbBsLGtpHObaur2PmxChrbhiJap7ZVlm2tveJDkfS3DSa8RnGv59BvAINORki54PMFEAzDqAGytd7W0OZrH3HDpw1EZFCsBiX6OxUIxIah1v0S/pWz4AuEENFjQzrhYDN8B7+Av3azXLz9e4H3LoJ/3RPwtTQjEjFOfMUEhIsOg69hL/xN9RIAAYC9EP6IB75gGOGI2UMp0v7vSeN+hNc8JFmnTx9GxN8In2Mo/Hr8xTPga4Rv9WyEg/7oRVYvGAqf7pHX03KRDXw8G76WZoTDsaBM1/Xo31zcax8MwufzIRQKWV5P1altU/67r9sFpSuZqajCCIUVfEEt/vdE6fDZq+BTJdAjsR5T4QjgCwIB8xDsBYAehK9uV9vvJ4H4INbv98Pn88VeT8u2fr+/y9taz3skEpFta5bHBXn+kDyPcATRIC9StxG+Xcta7TcQCLS93xS2Tee17+zvSbLXvsu/JwmvZ4fbRsLRYebw3g/ga2lO67XPid+TLmybzmufbNtU9bngKBgMYsWKFZg6dWr0NpvNhqlTp2Lp0qWttg8EAmhoaIj7BwDTp09HXV1ddLs///nP8Hq9eOSRR+J+/vTTT4fX60VNTU30thdffBFerxf3/+IW6WpsOPuPR8M7ZwK2HCiI3vbqugHwzpmAn/3nkLj9fue5o+CdMwEb9hZGb1u4sR+8cybg1n8fGrftpX8bA++cCVi1szh627tflME7ZwKu/+fhcdte/eIR8M6ZgGVfxlqrf7i9BN45E3DFC0fGbXvjK4fBO2cCFm8qj962ZncRvHMm4KK/jI3b9o5XR8M7ZwLe2NA/etum/R5450zAuc8cHbftrDdGwTtnAl5eMzB62446N7xzJuDMp4+J2/ahN0fAO2cCnl85KHrb/iYnvHMmYPKTx8Zt+/iSYfDOmYBnlldGb2sK2OGdMwHeORPigsen3q2Cd84EPPVuVfS2iI7otk2B2FpmzyyvhHfOBDy+ZFjc401+8lh450zA/qYCYO87gB7G8x+PgPchDx76t5IMjJFROPPpY+CdMwE76mINGF9eMxDeORMw641Rxi12ABrO/dMR8P5qGDbt90TfJN/4x+/g9Xpxxw0XAvNPABbPANbMwkV/HATvnGOxZpfb6H8UweJNpfDOORY3vjzKCNB0oGkzrrjtd/Ce8nV8+N8/yMPtWYxl6+rgfawaVz/rkKGz+o1A4ACuf3E4vHMm4N0vSqLHu2pnMbxzJuDSv42xnAUdt/77EHh/ezQWLv0C2PoC0LARG2oHwHu/hu/81rKppuFn/66E9+cH8epzd0cvslv2a/DeB5z9P7Ht4OqH+/+2G95Tvo4XX3wxuouamhp4vV6cfvrpca/FI488Aq/Xiz//+bnoOavbsgRerxderzdu2zlz5sDr9eLpp5+O3ub3+6PbWt+sn376aXi9XsyZMyduH16vF97v3ou6QIFkfTQ7/vxBBby/ORKPvDnYmDkYAjQbTn/ECe9370dNY0E0S/ficsB7H3D/K8YOI37A5sLZV/0KXq8XW7ZsiT7Wq6++Cq/Xi5/97Gdxx/Cd73wHXq8XGzZsiN62cOFCeL1e3HrrrXHbXnrppfB6vVi1KpaZevfdd+H1enH99dfHbXv11VfD6/Vi2bJl0ds+/PBDeL1eXHHjg3GNRG98Tp7H4vXGhvYCrNkOeL91Iy666KK4/d5xxx3wer144403ordt2rQJXq8X5557bty2s2bNgtfrxcsvvxy9bceOHfB6vTjzzDPjtn3ooYfg9Xrx/PPPR2/bv38/vF4vJk+eHLft448/Dq/XG/dBuampKfraW4OCp556Cl6vF0899VT0tkgkEt22qakpevszzzwDr9eLxx9/PO7xJk+eDK/Xi/37Y41Vn3/+eXi9Xjz00ENx25555pnwer3Y8Y9p0WHml3/7fXhP+Tpm3XFt3LbnnnsuvF4vNm3aFL3tjTfekPeIO+6I2/aiiy6C1+vFmjVrorctXrwYXq8XN954Y9y2V1xxBbxeLz788MPobcuWLYPX68XVV18dt+31118Pr9eLd999N3rbqlWr4PV6cemll8Zte+utt8Lr9WLhwoXR2zZs2ACv14vvfOc7cdv+7Gc/g9frxauvvhq9bcuWLfB6vTj77LPjtr3//vvh9Xrj3iNS1edmq+3fvx+RSASDBw+Ou33w4MFxbyKm2bNn49577+2eg2lOZykJyl8KOPgx8M45QGSo3BRuAcL+dn+qNWOtNPMT6aLTgUqjGeOnTgCDgPq1wMHNkM891lq2iMyYS3Zsif//9JdAdbOsTB8IARhgLFsCGYrz+QH7ePlecwAdLs9o3K/C0hbAnIWVjPk4Zi1Oe926ke75A/DFc8A79xt1Pm4ASQraM6nkUCC81miBYGYCJEiFzQ6UHSPnEGEZeg2ukxl/cUu+KGkDUD4O0HJ8OEpztN9INOI3PhRQ2sxh64aNwJAy+duwGbcdXCnDzPk2ozGH9bmFZ3ft2oWhQ4fi/fffx6RJk6K333HHHViyZAmWL18et30gEIhLQzY0NGD48OGoqanBoEGDoBn1AaFQCOFwGHa7HS5X7I3fTPO53W7YbJKoC4fDCIVCsK28Fe6tc2PbGsNAbocOm/HeGI4AId0Gm6bgdsReKn9Ig4IGl12H3cj/hXUgFLFBg0KBs2vbOu06HMa2ER0IdnHbQFiDrjQ4bToc9vS31RUQCMuDeJyxC0QwrCGiNDhsCk67SntbpQC/sW2BQ4/1PYxoCOvpbWvXFFyW1yju9bTZAc9QhEIBhHUH7IWVcAW3AYH9AFQar70NfnslVGA/XLYQ7DbJJoUjofhtNQegwim+9prxesLYFrC7igCbB+GCYQjpmmxra5FhsWAt/MUToI65B87lF8Oh6YCvBhE90v7vSUEZHCf+Fvj4LkRsxQjCAw1AgSVOCvhaoIea4ZxwHxzrZgGOYug2DwLGNcBjbhtuQcDfDH3S83AOPhEOh1xwdV2P/r16PLGLc3D7fxH54AY4Io1wFvYH7G6ocAD+5lrAWQzPybFeQ8FgEJFIBA6HA06n03jtVTRjVFBQkPLffUHd+9A+uhYINSJkq0A4omBXLXChWTJKJ/4evjJ5H3IffA+2FdcBoUaE7f0QQgFsuh9uVSszBU+YG9s22fuJzQa3OxZM+v1+KKXgcrlgt9vjttU0DQUFBV3a1ul0Rs97JBJBMBiU137pOVIX5hkKf1iDUoDTDjhssnhxpPQYBE/6FzSbPW6/gUAAuq4n32/CMSTbts3XPsnrmc62Hb32mfg9sb6eSbdVOnwLpgEH18JdWgWbXfYbjgChsIItsBPuAbFFodt7PXPi96QL26bz2idum87Cs30uOAoGgygsLMQ//vEPnHPOOdHbL7vsMtTV1eHf//53uz+fzsnt0Bd/BpZd1rV9UI4zexlpkj3QHNKYsegQYPg5wGdPyCfpcLPMZmtV1AxjynxYsipFI6X3kDnzTQVbb9/p40TsWKGAolFSPJ4o3CLH6/0H8PFdciEMNAKqpf2HcPUDzt0NLPlG9OIZV3ys5OKJ8nHA5NeBxWd1vJ1xMWiX0mWG28FPpEi4K/vqjHR6F/WGPkdmIX2oUV5zc3mbYCzIy5vnkitqV8hQmqM4eUbO/Js85WUultyOdK7ffS6/6XK5cNxxx+Gtt96KBke6ruOtt97Cj370o549mJEXAx/9KGFhUepdLGuctWyLfV+/GmjeLMFSJAIUHypNIvWw/D4E6gEVgNQZ6VLUWzjMWBvNH1tANuPHaS5vEpGamGTMBWgDtbGO2uEWoN2aR4ccc/2a1l24Ey+eY38qAWMq26USzBxclfpMsO64sFROkSaPqTRETGfbXBXXSHRjbHmb8nH5FeTlEi4K3eP6XHAESPHXZZddhuOPPx4nnngifv3rX6O5uRk/+MEPevZAbA7g6LuB1bf17ONSliQkacONkOAHQPOXRgfqEplVppS8EQ4+Hdj5b6BohFwgg/VGh2q99f4yQkM0i9RWc1KjOBgFAySYOOH3wKrbpK4q2WK5dg/gqQLCTfLmXTUttYtnOhfZ9rox58KFRbOlHnils22u6g1BXi7paFFo698kZUSfDI4uuOAC7Nu3D7NmzUJNTQ2OPfZYzJ8/v1WRdo8YO1PSoWvv7vnHphxgTOdX4fhFZCvGSwDgKgP2vBl7U0zsMZRx5n41CSiUaj0MZRYHmw0oK6cAJ/4eWPJNGQK02WNLlTiK5F+4Jf7NO9WLZyrbdTQUxQtLdvSGIC9XtLUoNJD8b5K6rM/VHHVVRmuOrP5WDKA5c/ujHNDR2mnm/XYJek58GigYHB8AmPUyZu0NIFPqw00d7LuLCowGfuEU60YSj7O9uh4gcxkFs74l2CjDZuYCuMEDkoU74fcSXKV6bMxsUK5iLVeXpXP95jtBrpj8z2wfAWVUKovKWup89JCsS1Y1TT5tmxdpzSYZEGeJXMQDB4xaoO78TGOTIb4TfidTzYMHgaYv5WvZMcnfhBOPM9wiAVO4Rb43a4T2LM7cciCpdmMGUjs2BkaUy6xrJoabpYFruFm+Z2CUcXw3yBUFA2Q2E/UC6f5ZddC91XxT9AyTi7meqRlqyWiyKG1gn6yTpsVujvva3nG29eYNZHY5kHQKrXlhod7AXDPxlJeBr86Tr6fO5+9vN+iTNUc5qWICUHI44K/p5osfdS8zMEo3s6O3vsBbDZ4si8Y6SiWdHtibfNp/l9mAkLFUyNr7ZTV563BV3RoJZE74ffI35LZqhADJEJlZHvO5OjxSQ+HbKVmewZNTz+CkW2jNImHqDVjL1SMYHOUKzQYMPx/YuxiA3eiaqwOqjenUlKO6UCi97mGgdEzyoOPgKqDxM6lJatnaTYERjBYBIQBBIOKSvkrpBjLJ3rxrV2R+On1nCq0zeWFpb4YcEeU1/iXnkuJRUv9g9wCasaCnzQXYC9H+eAb1PBsy+5rYZJinrRXLzSwJdKMBZGePwYZo+4BkortT8ruYqRXiU8ny6MH0ptObM3iCB1ovYWLO4Ck9ontm8NQsylztFBHlnLSDo1AoBIfDgbVr13bH8fRtBQOkBqNoBFA8WjoUF48GysYA/b4CuAej3Qsb9aBM9hkyZqu5yoGGDcCXfwN2LZBsixkomVmSYF0nH9cIcpzlbaxrZvY2CsX+H26WZpThhM7XnQlkrFmeZDoznT7VIvBMZ3PMWUOZqp2irlF6dDHhuL8Zoi5Ie1jN6XRixIgRcasTU4ZYe1l4hgKOhCnHGmQoI9wI1H0qHZI57Jb/NA2wueUi7tsDrLxVlgqx9usZPFn+v39Zh7uz7Bix5UAAwCaPVVQtxdYKkCDPGuhpiA4NBvbJP9gk4+MeCBQM7Fwg0119WjLZjTk6TLZXZgW6+gOeQfHDZYkz5DJRO0Wd1xuWW6Gc1Kk+R3/605/w8ssv4y9/+Qv69Uuy9lIv1m19jkyp9LKoXQWs+YXxKdwYfqMckcoU/sQfcUotkX+PNE8sqpYsUmK/HgBYehng29GJx9NkiRIoaRkQbjKG6ewAwkYMpRnDU+188LF5AEcB0O/49PsCdWeflq7W/5gX2YMfA6E6eR00u2TazIaclVO4xlUuSaXHFQMksuj2hWcnTJiATZs2IRQKobq6GkVFRXH3r1y5Mt1d5o1uD46A9j8NAfKG4D8g9+lBoxsxU8l5S3NIgKT75aJbclgsI5G4GOump4GVM2XbtNiBimOB6ouAmgUSBJi1Oja3xFiRIKKBkeZou+hbcwLjZ0t393Tl4id98yLrPyBBo9IlMIIOQJPXpKC/XGz1kNQYeYYkD76ULm0CvjpPelZR98j2YsKUl7p94VnravbUDVKZDl08Sr6PtMgaWJGAtAEA5BOU0o3AyQyaGDzlLBWWf5pDmi9a3+jNAuiDHwMLvyYXXocHCBrF2bAZb/7memuakQEysz82Wb7D1U8yTpvnAsf/DnBXALvfBHa8Iuu6+fchuq6a5mxnNpyxVlrNAmDMLelfeHJtOr11mMxsrmkWjSslw9YqDAQbZLvxD3ZuKRLObMusbC8mTL1ep4Kju+/mOmDdLtXp0A5L1s5RJEMCR90FDJwk3Yy3vQg0b5WhmkAtsOp2o06JQ3HZ08ZQWFtJXD0kWZ6GoCzg6u4POGoB3y656NoKAGcxUHoUMOI8YNMfgabNUiNkd8psR3PIzLcTWP+IZKEAoGIcUPM2sPkP8vvj32MZXjMvOuZxGdkUeyFQv6HzF55c6tNiXmQdhUDooNFCw6BpABySpXP3l+2A9GuncjFblu9yYTFh6tW61OdoxYoVWL9+PQDgqKOOwoQJXPSuW3X0huDwSHBUcljs4jPqe7H7lQ7selVqlpzlEjR11J2ZusgaYJhBkUq436j7gQJ8NYCjJP6i69slF96Cyli2omCAXLDNwPfEufKaH1wFfPpLoHBY68xGsiyU2XA04pMLipmFiivkNhnZqOB+2W73m7Hfs3zNjET/poqMQDNxNqgRKGp2yQoFaiWo+fAaCTST1U5ZZ8i1VRdjzmxjXUzncDFh6madCo727t2LCy+8EIsXL0Z5eTkAoK6uDqeeeipeeOEFDBw4MJPHSKauviFoNmDsnbGi2MKhQMsuRC/MlGHGEJQeBlQQrYMiGLdZCqAjfhkqNTOCoebY66qHpNjXzARpGlAwCAgYwYpm6ziATsxCRYtYa2Vmmua0dGhv63fCJs/p8yeB/ifITfmaGTH/pqAbC/2qhGEaFRumNP+2+h2X2gw5zmzrPlylnrpZp/4if/zjH6OxsRHr1q1DbW0tamtrsXbtWjQ0NODGG2/M9DGSKRNN76xrTEEBrhLj4sAmk5mnjNqfxHOrJdymEG3OqCISAEX79ewAoEtQ07JVhssaP5cZZ0DrnkPt9RNSqnUWylyotbBaDkMPof36NE1qcOyF8rir7gA+yOOeP+bfVKQF0FzxtVZKyfe2AnktrH9bqaxxlU5dDKUnWz2uqM/oVOZo/vz5ePPNNzFmzJjobWPHjsWTTz6JM844I2MHRwnMN4RUU/ptSSyKdfcDDnwIfHK3DJlQF1h7Cykj2Ei8L1n7BSV1QxGf/Ixvt1FUbzRl1OzGDCol2zRvk2ahmiM+W5j4iTrikyyPzSEX+4hffmec8TNM4So1Mok70D7j2O0FEjTUr5UsV1vLjHz8MyByd+t+QbnC+jelB6V3WCQQP1tNcwCustZ/Wx3VTvWluphsDKtmsscVUYJOBUe6rsPpdLa63el0Qtc5K6pbZeoNIfGNvf8JsvDt/ztPmkxSJyUJelIaslSy4GvFsVI/FNgPrLkXaNxizEj0G8XCGgCnFNX7amTYrmJ8LKNhXuyXXiqBi9JbDxV5qpIfgt0cqrWhw9mNwQPyu4cIoApbZ0bCTTJ0W/sRsPT7EkDl6lCb9W+qoz5H6egrdTHZLDjPtdmP1Gt0qs/Rt771LdTV1eH5559HVZW80e7cuROXXHIJKioq8Morr2T8QHNFj/Q5SkV3fVLb/Sbw/kWyTIWKoNWF3VEiQzJNn3f9sXo9DRJoRBJuS6w9snSnLh4NnPw3+fb/fVuGqFRYCq9VxOiJpAF6RG4vHCrDOtaLUM0iCUj8+9E6yFGAZ1jyC3LLDpmt1hn2QikCd5ZIUNS8VTJWZkdumzv3m/Ol0iE73f29PT3W8b439uJhI0bKI93eBHL79u345je/iXXr1mH48OHR244++mj83//9H4YNG9a5I88DOREcdXcKu2YRsG62ZB7CxiKn7v7AiO8C4+8H9r4jXYLD9Zl7zF7Jjna7TSfS7EY90CDJ7jRtAYpHymsbajRmlwXi+xkdcx9w1E9i+7A2x/NUAbplWE0rABo/le3Kjo5drEONUpgfaUbnC/M1yRgUjZCMVsRnZLoiEvA5inpPQJCO7uwKnm1sxEh5ptuDIwBQSuHNN9/Ehg0bAABjxozB1KlTO7OrvJL14KinUtgdBWC73wSWfEMu1tSas0xmmqXcKsFYfFYPS5Yh0izDbJ6h8VmesNH002wcecor8cOjbS1vYf5cuFGG7JxlEoTpISMrFTaCs3RbO1gyX5pd6pD0IKIZM7snvuN3X1xeo7f2OeJSKpRnurVDdigUgsfjwerVq3H66afj9NNP7/SBUpq6o2dKW0FQR8WmQ6YCX38NeP9iGYLISAfuTqxLlpM0yQ5Ez0liM8VENmPGoJI+O84ieS3q1wL+3ZJxsBmBqaMw/lN54szExCLgZBknQPYZboqt5+YoBlwVxmy2FAMkm/EYutmmwHzeCtCMGXiucplZZ3PI0Ju9QHoF7Vvad2pEemtdTF8qOKc+J+3gyOl0YsSIEYhE0hguoK7rjp4pXf1EO2QqcPILwKezgbo1EiQlq1OKsgE2p2W6uDGlvfgwoN+xwLaX5Bg0m2XNuHykjIxaR0GREQxG64hCkmlxFMrdBUPkdW3ZKgvTpjIz0VoEbNYq6UZWSLMBug4gIkXeI78PfPGnWGCkaXIxixjTotsLVG1uY0YXEL8Om2WGl2aTJW3MgnBbgTy/UD2w7kFjP70ki9KRXOoKnil9peCc+qROfXS566678LOf/Qy1tbWZPh5qS6Z7pphZqK72p6mcApy6AJj8H2DcAxK42cshv1oJv142pzH04gJgl8frfwLwjbXAwK/JNko3Luy9YdZjR1kwFfuqB+R19FTG7nb3kyCoqFqGJ3y75Wv5uLZrVay9sFp2xdYLUyEjwxMGYJP91CyQ18NVHvud8gwxAhs7pGYKaNWnyeaKBUbKnNpfJIXe7v5Sd6Sbj2eXxpKwyzBLYK8Eva5++dcTieJlou8aUY7q1FT+J554Aps2bUJVVRWqq6tRVBTfM2XlypUZOTiyyGQKO9NZKPNTcb/jJNj5dDaw993YxdFcINVcikEPyfBRwWBg/EMy5NL/BKNDsdkXqLc2pUwydKiMXkbQpJgZmsz0ifgleDjx97Hu1x0NyZhT+ZddDkT2tn4sU7gZaPoyNjRrfvJ3lkgw5tstRdXKJlklV4VkfEL1MlPOZvRcMmuVPEOAcANQfqxcFJu3Gxkj61M2MoE2l7FUidb+71y+LkmSSbl8DjLVd40oB3UqODrnnHMyfBjUoUymsLtzRWuzvuLzp4F198nx2o0FcSM+ebPXbLIo7oRHZPuaRcDahxKG0bpSe2SXi77mTH9Gnea0BGjpMoYNld7OPpI9L7PmCEaDx61A4Qgj2Bgnr0E6F5jKKcCh1wGf3GW50YhSbE55PBWSmqPikVJnZF2CwVkiQVnium17FktH7LqPjUyXTYbJXBVyrM5SYPj5EhwXDpMLpB4whvKMY9Ds8tjWJVKS/c711iLmdOTDOWAjRuql0g6OwuEwNE3DFVdc0aun7OecTK4l1N2FlJoNOPxaoPTw2JumvVAuhp4hwOirgcOulu3M4T1/LbpckK05JaiwG8tANG9P/+fNi3eHzGDFbPJoBGQqkuaML5tllpgxFGkuFVI4ovOfvIuGGz9nBCTQ4vejjNlkQ6YB2/+R/JO/ux8w4dHY+mmVU4DpH0jgu/lpo4u38XqZF0NzOM0zRAJ1c5ac7pftzfokPeEcWX/nuFhrfp2D3lpwTn1a2sGRw+HAo48+iksvvbQ7jofakskUdk8VUnb0pmkd3nP3k+wDzOLeTtQcOcsBd7nRhHBb6wtwm4xAU7MnZNISC6odkJodDbCXAE4P4CiThpg240/JrPFpj81pHJsRWLnKZQYXgrFt9DBQNaPzF0B3f3k+0SGwBMq4fdApQOXpqX/yNwPfw65O/rrWroj/3TKLy8NGgbbSjeLshLce83fO3Q/4+K7ML9aay8NTifJxwdreWHBOfVqnhtWmTJmCJUuWYOTIkRk+HGpXplLYPbmidXtvmtbhPRgXTdgAzW3UK6U6W83SiVrXpdOzHjKGyDqYdQVAsj5OIBJM2Nb6f5t8rzllDbRj7gMGTpIg5s2vGzPCgJSCOmXdtzKW4QCguWQfug4gDGz9KzD0GzIrMF0FgyRYDB6Uc2nOiDMXU9Vscn/BIHl90v3k39brWj5eskYNn0lNmbmGm6NQnp/eDNiLJZMYPR+W3zkg80O++TA8ZdWdw95ElJJOBUdnnnkmfvrTn2LNmjU47rjjWhVkf/Ob38zIwVESmUhh50ohZdzwnjHVO+KTAMTmlKxHh5wyXFcyWmpn6tfGhsWUtUGlMcQUDbiMGhxHkdTN6KHYIq8KxvCPJQtk1tcUDJZanZLD5MKkdOk2Xbc69VqlVi0KVGxILtrmANIa4b0LgJP/nn6AVDFB1gSr/QiIhAAVlKBLsxlT6p2t12Tr6oXWDEKatkgWsLFefq88VfJ62p2A7pBzG/El/50L1GZ2yDefhqdM7B9ElHWdCo6uv/56AMBjjz3W6j5N09gDqbtl4kKWC4WUicN7nkoZDovO3uqIBjgLgYKBQPVFwMbH0XaWKKEhY/EhwJjbgNE/lAJj/36geQvwxbMyNATdWHLDCTgrZNV6R6HU0FiHHDWbFJYv/T7gS3VdMnP2njGsptmNwMiaubIBsMsad8t/AHz1ufReE2sAHGoE7AMRXVA20pL5ADguCBkg+zdnvDVvkXXK+h0PVE6TFgJt/c4lDsslSmfINx+HpwD2DyLKAZ0KjnQ9haEDyn3ZLqRMHN5zlsbW5jLXdGuTUWBceqT0V1r/iAxtFY8GmjbLJnENKY1gw8wcDfs2UDRKAiPrcx79Q2Dh1+TiXVBpDAcZF9W2hhwrpwBH3AqsvVeW5+iQTYaYlKWzdFxgZJ1VpoBQU+cu4okBsO7rngA4aRDikdcz3CKNIItHAZNfl4BzzC1t/85lcsg3X4enenLYm4iSSusqeNZZZ6G+PjY1+uGHH0ZdXV30+wMHDmDs2LEZOzjqAWYWqmpa+lPGM/HYY38qU8d9O+VC6iiW9cScJXIhd5QC5RMkkPFUAUUjgZIjpF6m3/HA6e8C7orYRdBZLMNf0cDIHE5TluemgM9+Lb2A3jlPFs80GxDaHNJ7qWAAEDoYaz8QbpFjdJYCY+6QC+quBZLp2P0msHkuoBVI0NP+kzaG7IKyvpnmNArQLUFctMmi0VnaVZ5eg0+T0gFXGXDkLcD4B4GJz8g6V6fOz2xmsK0gRNOk5sgzRLJIdR8bt7fzO5fsdyLx/Kea8UpleEoPAr698jqar6cejv++pxuSpnsOlJ7d4yXqhdLKHC1YsACBQKyO46GHHsJ3v/tdlJeXA5Bp/hs3bszoAVIv19bwXn9jCGbzXMC/Sz7lu8pjNSoFA2INJBMvgq4KSwbHnGoPS6bGJsGTs0yCkMT6k/aGHCunSZbKLO6FMi5eAEoOBSLlQOMXaLOYXDOG02wOYMztwJ5FwIHl0pTRzBZZC6ftHgkE/TXp1Zi0V4Sc6QA40zUymRryTWV4SumS8fPtNpas0Y2CdWN2XbaKt1M9B/lWbE6UJ9IKjlRCi/jE74k6pb3hvX4TOr5AJF4EzXW/lCUwAhCd1WYzsjVKNzJNSepPkh1T4CDw0XUyfGQvkOGuiA/RQKhhA1A0DCiskhlz1se2F0otjs1tPHYYqDoDGDBRMljh5lhH6egF2i51WHogvRqTni5C7o4amUwM+XY0POWrkf5LTV/KedKD0vjSDI6KquW4s1W83dE5yMdic6I80amaI6KMa6vIPJWLZOJF0GasDabZYhkYMyCSufLxvXbaqj+xHpPSZfgt2CjDGi3bJPtkFlMDUtPTZHSVdhRLRkkz6pw8VTLEpZQEYmbNiGaT4a73LpDia/PY7EaBuqMkfvuOZKMIubtqZLo68aC9WZmBA0bQWRA7Tw3bIVm9ApkU4N8jsxI9WSzebusc5GuxOVGeSOuvRtM0aAmFjYnfE2VcR3VRiTUagNFTJwQggmjzQRVGdAFW83uTWX/i35+8hsNaV+OvMZoouhDtQi0PKrf7a2TKv81hBE5Gl+q2akaGTJXp+oVVsZqr4tGSvUi3zibTCxSnItUaGaDna2PM4anycfGL9xaPii24rGlGJ+9ArB+U5pCsUqSl+85bV2TjdSbqQ9IeVrv88svhdkttgd/vx7XXXhvtc2StRyLqUYk1GnaXXNyAJI0gjYtJ8zbJ8piLvNpc0qPn7emtazgGn2YEVZHYRRSwBEfmtHybPK7NIct/tGwFYJMFW61DgoMnS4BgZsMqp8h0ffP4/TXp19koHdi3VOqt7EmGt4Cu98hpq9N0RzUyQPLz2hO1Mcmyj/69wLIrYnVSujHMGu0mbtR9mV3Wc623EHshEXWrtIKjyy67LO77733ve6224bIilDWJF8GmzcDHP5fAxLo8vOaU4EUPSibBUSxDP55hwIb/kVqixBqOhvVGNsSXcBGF0bDS+GBgdr8ONxtrjA0Hqr8jj1FUDVRfCOx9p+1A4dT5nauzMQtz69YAwXqjt5FHMiPOkth2XemR01Hxb1tDoHsWZ782JnF4KrGfks1c3sSYIWjOFDSHXnOttxB7IRF1K02xqjotDQ0NKCsrQ319PUpLS7N9ONSe2hXAknNjNUh6QDIGKmJMp1cAjFlrrgr52rIjvoYDkO1adsRqlkINsZom837dsi4alCzL4Rkq3wb2xYIJ90DJCunh+EAheECCmM4ECtbCXFc/wLdDhokAee5mdsxa73Tq/PRqUdoq/u3ouM1arYOfJD+vnT2erjKPq+4TeZ00TZY8MTu0q5AElyWHyfbZOs62JDv+6H1ZPK9EOSyd6zf/aqj38u+Xi5yrXIqhCwZJk0m7BzLDLCIXmaJqYMxMCWLaquFw95eAymwKqYeMYm9dHsPmkF5MjmLpvj3yMhnWaNkRq22xFwH1a2SWlLNUPvFrNqM7+FDJ9nz6cHq1OImFuc5CKeS2GbUzehho2dW5PkFtPUY6x52rtTHJ6qQKBgPQjOFYTb6P+Dp/3rpTJvtBEVEr/Muh3ss69GBylko2oHi0BCwFg4AT50pg01ENh2YDDrsBKDvGCDwCll5EZYBvGxBplmzKZ7+RwMgaBGnKyFZBskdWnQ0UkgUfZqdxu0dui7RIoFY+DjhhbnqZKaUDX/4NOLhaAsNEHR13qo0Ys1Ebk1isHW6SDKL5L9wkt3fmvPWEtorNc/V4ifIIp/JT79XWFHNNk75DZsDQ7zi5sKdSwzFkKjD2duDzp4HNTxvrhwWkZkmzSSbFXgCENhvDcdsAbYQELLrRCVtzSGAVbokPODpTRNtW8OEslTYA4WYZSjzqLuDw69MfSvv0YQmMAgek1UBgv2SmnJaUdHvHneu1McnqpMrHx9bb6+klddKV7SWAiHopBkfUe7XX58a6ErxmS69Xj2YDDr8WOOxqqWv64Fqg+UtZ2kTTgGCD/JzZW8lXI4GKOdQFSEbGnAll6kyg0F7woRnrzzlLgIGTOl9j5Cg2ejBpxkKy2yQzZQZI7R13PqwTlqyXUC6ttdbWDEFTJhaiJqI4/HhBvVuqQw+dqeHQbPIvsF8WqTUv/ObMJyC+X469UBoM6kZht83y2cQMFEqPSC9QMIOP4IHYkF1X95lYY+Q2gkroRrFyRAI+pTp+DNbGdE3NIim8fue85GsB5jqu+0Z5ipkj6v1SHXrozJpeyYa1HIWyTEh05pPRL8ehSRDV/EV8BilZJitV6WTHUpWsjskzxFhaIwTAFtt/xNfxY2RqrbS+Jt+XB+G6b5THOJU/TZzK3wd0NIxhVbtCPs07iuOHtUKNEkzoYQkwikfLPoK1kk0qqIyf4t/Vi0YmL0S7FkiWwjMk/nmHGo0aK79kj9z9gYpj02tSydqY1ORqC4RUdbb1A1E3Suf6zcwRUaJ0ajjaqqlxllg6ZGtSs2N3x3fIzmSgkMnC3LbqmJwl8i9QKzO5vvIYMPLi1B+DtTGpS6cFQq6dU677Rr0AgyOirmhvWCvcIAHSmJnSKiAxYMn0RS1TwUdHRdSRFskYpRMYUXryeXmQfA7siAx8ZyPqqvaKvk/8PXDYtW0vmpuLWESdnu4oOk7Wo8sq2y0Q2pPLva2IUsTMEVEm9LZ+MyyiTk13FR3nQwuEtuR6byuiFDA4IsqU3lZT09sCvkzrztlk3TELsafkc2BHZMjBvywiyhlmwJdPw4I9oSvrzaUqX5cH4bAs9QLMHBERpaunio7zNXvHYVnKcwyOiIjS1ZOzyfJ1uDZfAzsiMDgioq7oq40dWXScmnwN7KjPY3BERJ3Tl5eHYNExUa/WBz7iEVHGmTO1Dn4iS6d4hshXc6ZWviyM2lksOibq1fiXS5RN+bhqeU/M1MoH+TqbjIg6xGE1omzJ12EpLg8Rw6Jjol6JwRFRNnRnA8Huls/rfnUHFh0T9Tr8eEPU0/J9WCqf1/0iIkoBgyOinpbOsFQuMmdqBQ/IzCwrc6ZW6RGcqUVEeYvBEVFPy/dVyzlTi4h6Ob57EfW03jAsxZlaRNSL9argaOTIkdA0Le7fww8/HLfNJ598Aq/Xi4KCAgwfPhyPPPJIlo6W+qzeMixVOQU4dT5wysvAV+fJ11PnMzAiorzX62ar3Xfffbjqqqui35eUlET/39DQgDPOOANTp07F3LlzsWbNGlxxxRUoLy/H1VdfnY3Dpb7IHJb68BoZhnL1k6G0iF8Co3waluJMLSLqhXpdcFRSUoLKysqk9/31r39FMBjEM888A5fLhaOOOgqrV6/GY489xuCIehZXLSciylmaUol5/fw1cuRI+P1+hEIhjBgxAhdffDFuueUWOBwSA1566aVoaGjAv/71r+jPvP3225gyZQpqa2tRUVHRap+BQACBQKw2pKGhAcOHD0d9fT1KS0u7/TlRL9dXF24lIuphDQ0NKCsrS+n63asyRzfeeCO+8pWvoF+/fnj//fdx5513Yvfu3XjssccAADU1NRg1alTczwwePDh6X7LgaPbs2bj33nu7/+Cpb+KwFBFRzsn5j6g//elPWxVZJ/7bsGEDAODWW2/F5MmTMW7cOFx77bX41a9+hTlz5sRlftJ15513or6+Pvpv+/btmXpqRERElINyPnM0c+ZMXH755e1uc8ghhyS9feLEiQiHw/jyyy9xxBFHoLKyEnv27Inbxvy+rTolt9sNt7uNfjRERESUOTlSapDzwdHAgQMxcODATv3s6tWrYbPZMGjQIADApEmTcNdddyEUCsHpdAIAFi5ciCOOOCLpkBoRERH1kBxajDvnh9VStXTpUvz617/Gxx9/jC+++AJ//etfccstt+B73/teNPC5+OKL4XK5cOWVV2LdunX4+9//jt/85je49dZbs3z0REREfZi5GPfBTwBHMeAZIl/NxbhrFvXo4eR85ihVbrcbL7zwAu655x4EAgGMGjUKt9xyS1zgU1ZWhv/+97+44YYbcNxxx2HAgAGYNWsWp/ETERFlS+Ji3Oaakw4PYB8q/eA+fRgYPLnHhth61VT+npDOVEAiIiLqQO0K4J3zJFPk8LS+P9wiyxOd8nKXZvemc/3uNcNqRERElIdycDFuBkdERESUPTm4GDeDI6K+SumSzt61QL4qPdtHRER9UQ4uxt1rCrKJKA05NGWWiPq4HFyMm5kjor4mx6bMEhFFF+MuHyfF177d8rV8HHDC3B7/0MbMEVFfkoNTZomIAEgANHgyO2QTUQ87uEqG0tz9Y4GRSdMknd2wUbbjgrhE1NNyZDFufjQk6ktycMosEVGuYXBE1Jfk4JRZIqJcw+CIqC/JwSmzRES5hsERUV9iTpl1lkjxdbhFirTDLfJ9FqbMEhHlGr4DEvU1OTZllogo13C2GlFflENTZomIcg2DI6K+KkemzBIR5Rp+TCQiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyyJvg6MEHH8RJJ52EwsJClJeXJ91m27ZtmDFjBgoLCzFo0CDcfvvtCIfDcdssXrwYX/nKV+B2u3HooYdi3rx53X/wRERElDfyJjgKBoP4zne+g+uuuy7p/ZFIBDNmzEAwGMT777+P5557DvPmzcOsWbOi22zZsgUzZszAqaeeitWrV+Pmm2/GD3/4QyxYsKCnngYRERHlOE0ppbJ9EOmYN28ebr75ZtTV1cXd/sYbb+Ab3/gGdu3ahcGDBwMA5s6di5/85CfYt28fXC4XfvKTn+A///kP1q5dG/25Cy+8EHV1dZg/f35Kj9/Q0ICysjLU19ejtLQ0Y8+LiIiIuk861++8yRx1ZOnSpTjmmGOigREATJs2DQ0NDVi3bl10m6lTp8b93LRp07B06dI29xsIBNDQ0BD3j4iIiHqvXhMc1dTUxAVGAKLf19TUtLtNQ0MDfD5f0v3Onj0bZWVl0X/Dhw/vhqMnIiKiXJHV4OinP/0pNE1r99+GDRuyeYi48847UV9fH/23ffv2rB4PERERdS9HNh985syZuPzyy9vd5pBDDklpX5WVlfjggw/ibtuzZ0/0PvOreZt1m9LSUng8nqT7dbvdcLvdKR0DERER5b+sBkcDBw7EwIEDM7KvSZMm4cEHH8TevXsxaNAgAMDChQtRWlqKsWPHRrd5/fXX435u4cKFmDRpUkaOgYiIiPJf3tQcbdu2DatXr8a2bdsQiUSwevVqrF69Gk1NTQCAM844A2PHjsX3v/99fPzxx1iwYAF+/vOf44Ybbohmfq699lp88cUXuOOOO7BhwwY89dRTePHFF3HLLbdk86kRERFRDsmbqfyXX345nnvuuVa3v/3225g8eTIAYOvWrbjuuuuwePFiFBUV4bLLLsPDDz8MhyOWIFu8eDFuueUWfPrppxg2bBh+8YtfdDi0Z8Wp/ERERPknnet33gRHuYLBERERUf7pk32OiIiIiDKBwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRhSPbB0BERNQjlA4cXAX49wMFA4CKCYDGHAG1xuCIiIh6v5pFwKcPAw0bAT0I2FxA6RHA2J8ClVOyfXSUYxgyExFR71azCPjwGuDgJ4CjGPAMka91n8jtNYuyfYSUYxgcERFR76V0yRgFG4HCoYDDI0NpDg/gGQqEGuV+pWf7SCmHMDgiIqLe6+AqGUpz9wc0Lf4+TQNc/eT+g6uyc3yUkxgcERFR7+XfLzVGdnfy++0Fcr9/f88eF+U0BkdERNR7FQyQ4utIIPn9Eb/cXzCgZ4+LchqDIyIi6r0qJsistOABQKn4+5QCgrVyf8WE7Bwf5SQGR0RE1HtpNpmu7ywBfDuBcIsUX4db5HtnqdzPfkdkwd8GIiLq3SqnACf8HigfB4SbAd9u+Vo+DjhhLvscUStsAklERL1f5RRg8GR2yKaUMDgiIqK+QbMB/Y7L9lFQHmDITERERGTB4IiIiIjIgsERERERkQWDIyIiIiILBkdEREREFgyOiIiIiCwYHBERERFZMDgiIiIismBwRERERGTBDtlpUsaqzg0NDVk+EiIiIkqVed02r+PtYXCUpsbGRgDA8OHDs3wkRERElK7GxkaUlZW1u42mUgmhKErXdezatQslJSXQNK1bHqOhoQHDhw/H9u3bUVpa2i2PQcnx3GcXz3/28NxnF89/91NKobGxEVVVVbDZ2q8qYuYoTTabDcOGDeuRxyotLeUfSZbw3GcXz3/28NxnF89/9+ooY2RiQTYRERGRBYMjIiIiIgsGRznI7Xbj7rvvhtvtzvah9Dk899nF8589PPfZxfOfW1iQTURERGTBzBERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgc5Zgnn3wSI0eOREFBASZOnIgPPvgg24fUK91zzz3QNC3u35FHHhm93+/344YbbkD//v1RXFyM888/H3v27MniEeevd955B2effTaqqqqgaRr+9a9/xd2vlMKsWbMwZMgQeDweTJ06FZ9//nncNrW1tbjkkktQWlqK8vJyXHnllWhqaurBZ5G/Ojr/l19+eau/henTp8dtw/PfObNnz8YJJ5yAkpISDBo0COeccw42btwYt00q7zXbtm3DjBkzUFhYiEGDBuH2229HOBzuyafS5zA4yiF///vfceutt+Luu+/GypUrMX78eEybNg179+7N9qH1SkcddRR2794d/ffuu+9G77vlllvw6quv4qWXXsKSJUuwa9cunHfeeVk82vzV3NyM8ePH48knn0x6/yOPPILf/va3mDt3LpYvX46ioiJMmzYNfr8/us0ll1yCdevWYeHChXjttdfwzjvv4Oqrr+6pp5DXOjr/ADB9+vS4v4Xnn38+7n6e/85ZsmQJbrjhBixbtgwLFy5EKBTCGWecgebm5ug2Hb3XRCIRzJgxA8FgEO+//z6ee+45zJs3D7NmzcrGU+o7FOWME088Ud1www3R7yORiKqqqlKzZ8/O4lH1TnfffbcaP3580vvq6uqU0+lUL730UvS29evXKwBq6dKlPXSEvRMA9corr0S/13VdVVZWqkcffTR6W11dnXK73er5559XSin16aefKgDqww8/jG7zxhtvKE3T1M6dO3vs2HuDxPOvlFKXXXaZ+ta3vtXmz/D8Z87evXsVALVkyRKlVGrvNa+//rqy2WyqpqYmus3vfvc7VVpaqgKBQM8+gT6EmaMcEQwGsWLFCkydOjV6m81mw9SpU7F06dIsHlnv9fnnn6OqqgqHHHIILrnkEmzbtg0AsGLFCoRCobjX4sgjj8SIESP4WmTYli1bUFNTE3euy8rKMHHixOi5Xrp0KcrLy3H88cdHt5k6dSpsNhuWL1/e48fcGy1evBiDBg3CEUccgeuuuw4HDhyI3sfznzn19fUAgH79+gFI7b1m6dKlOOaYYzB48ODoNtOmTUNDQwPWrVvXg0fftzA4yhH79+9HJBKJ+wMAgMGDB6OmpiZLR9V7TZw4EfPmzcP8+fPxu9/9Dlu2bIHX60VjYyNqamrgcrlQXl4e9zN8LTLPPJ/t/d7X1NRg0KBBcfc7HA7069ePr0cGTJ8+HX/+85/x1ltv4Ze//CWWLFmCM888E5FIBADPf6bouo6bb74ZJ598Mo4++mgASOm9pqamJunfh3kfdQ9Htg+AKBvOPPPM6P/HjRuHiRMnorq6Gi+++CI8Hk8Wj4yoZ1144YXR/x9zzDEYN24cRo8ejcWLF+O0007L4pH1LjfccAPWrl0bV9tIuYuZoxwxYMAA2O32VrMU9uzZg8rKyiwdVd9RXl6Oww8/HJs2bUJlZSWCwSDq6urituFrkXnm+Wzv976ysrLVpIRwOIza2lq+Ht3gkEMOwYABA7Bp0yYAPP+Z8KMf/QivvfYa3n77bQwbNix6eyrvNZWVlUn/Psz7qHswOMoRLpcLxx13HN56663obbqu46233sKkSZOyeGR9Q1NTEzZv3owhQ4bguOOOg9PpjHstNm7ciG3btvG1yLBRo0ahsrIy7lw3NDRg+fLl0XM9adIk1NXVYcWKFdFtFi1aBF3XMXHixB4/5t5ux44dOHDgAIYMGQKA578rlFL40Y9+hFdeeQWLFi3CqFGj4u5P5b1m0qRJWLNmTVyAunDhQpSWlmLs2LE980T6omxXhFPMCy+8oNxut5o3b5769NNP1dVXX63Ky8vjZilQZsycOVMtXrxYbdmyRb333ntq6tSpasCAAWrv3r1KKaWuvfZaNWLECLVo0SL10UcfqUmTJqlJkyZl+ajzU2Njo1q1apVatWqVAqAee+wxtWrVKrV161allFIPP/ywKi8vV//+97/VJ598or71rW+pUaNGKZ/PF93H9OnT1YQJE9Ty5cvVu+++qw477DB10UUXZesp5ZX2zn9jY6O67bbb1NKlS9WWLVvUm2++qb7yla+oww47TPn9/ug+eP4757rrrlNlZWVq8eLFavfu3dF/LS0t0W06eq8Jh8Pq6KOPVmeccYZavXq1mj9/vho4cKC68847s/GU+gwGRzlmzpw5asSIEcrlcqkTTzxRLVu2LNuH1CtdcMEFasiQIcrlcqmhQ4eqCy64QG3atCl6v8/nU9dff72qqKhQhYWF6txzz1W7d+/O4hHnr7ffflsBaPXvsssuU0rJdP5f/OIXavDgwcrtdqvTTjtNbdy4MW4fBw4cUBdddJEqLi5WpaWl6gc/+IFqbGzMwrPJP+2d/5aWFnXGGWeogQMHKqfTqaqrq9VVV13V6gMZz3/nJDvvANSzzz4b3SaV95ovv/xSnXnmmcrj8agBAwaomTNnqlAo1MPPpm/RlFKqp7NVRERERLmKNUdEREREFgyOiIiIiCwYHBERERFZMDgiIiIismBwRERERGTB4IiIiIjIgsERERERkQWDIyLKipEjR+LXv/51tg8jYxYvXgxN01qtk0VE+YfBERFl1Pbt23HFFVegqqoKLpcL1dXVuOmmm3DgwIFsH1rGTJ48GTfffHPcbSeddBJ2796NsrKyHjuOffv2weVyobm5GaFQCEVFRdi2bVuPPT5Rb8XgiIgy5osvvsDxxx+Pzz//HM8//zw2bdqEuXPnRhdQrq2tzdqxRSIR6Lrebft3uVyorKyEpmnd9hiJli5divHjx6OoqAgrV65Ev379MGLEiB57fKLeisEREWXMDTfcAJfLhf/+97/4+te/jhEjRuDMM8/Em2++iZ07d+Kuu+6K276xsREXXXQRioqKMHToUDz55JPR+5RSuOeeezBixAi43W5UVVXhxhtvjN4fCARw2223YejQoSgqKsLEiROxePHi6P3z5s1DeXk5/u///g9jx46F2+3GH//4RxQUFLQa+rrpppswZcoUAMCBAwdw0UUXYejQoSgsLMQxxxyD559/Prrt5ZdfjiVLluA3v/kNNE2Dpmn48ssvkw6r/fOf/8RRRx0Ft9uNkSNH4le/+lXc444cORIPPfQQrrjiCpSUlGDEiBF4+umnUz7f77//Pk4++WQAwLvvvhv9PxF1UZbXdiOiXuLAgQNK0zT10EMPJb3/qquuUhUVFUrXdaWUUtXV1aqkpETNnj1bbdy4Uf32t79Vdrtd/fe//1VKKfXSSy+p0tJS9frrr6utW7eq5cuXq6effjq6vx/+8IfqpJNOUu+8847atGmTevTRR5Xb7VafffaZUkqpZ599VjmdTnXSSSep9957T23YsEE1NTWpwYMHqz/+8Y/R/YTD4bjbduzYoR599FG1atUqtXnz5uhxLV++XCmlVF1dnZo0aZK66qqroqush8Ph6AKvBw8eVEop9dFHHymbzabuu+8+tXHjRvXss88qj8cTt+hodXW16tevn3ryySfV559/rmbPnq1sNpvasGFDm+d569atqqysTJWVlSmn06kKCgpUWVmZcrlcyu12q7KyMnXdddel+eoRkRWDIyLKiGXLlikA6pVXXkl6/2OPPaYAqD179iilJDCYPn163DYXXHCBOvPMM5VSSv3qV79Shx9+uAoGg632tXXrVmW329XOnTvjbj/ttNPUnXfeqZSS4AiAWr16ddw2N910k5oyZUr0+wULFii32x0NapKZMWOGmjlzZvT7r3/96+qmm26K2yYxOLr44ovV6aefHrfN7bffrsaOHRv9vrq6Wn3ve9+Lfq/ruho0aJD63e9+1+axhEIhtWXLFvXxxx8rp9OpPv74Y7Vp0yZVXFyslixZorZs2aL27dvX5s8TUcc4rEZEGaWUSnnbSZMmtfp+/fr1AIDvfOc78Pl8OOSQQ3DVVVfhlVdeQTgcBgCsWbMGkUgEhx9+OIqLi6P/lixZgs2bN0f353K5MG7cuLjHuOSSS7B48WLs2rULAPDXv/4VM2bMQHl5OQCpTbr//vtxzDHHoF+/figuLsaCBQvSLnRev359q2Guk08+GZ9//jkikUj0NuvxaZqGyspK7N27t839OhwOjBw5Ehs2bMAJJ5yAcePGoaamBoMHD8Ypp5yCkSNHYsCAAWkdKxHFc2T7AIiodzj00EOhaRrWr1+Pc889t9X969evR0VFBQYOHJjS/oYPH46NGzfizTffxMKFC3H99dfj0UcfxZIlS9DU1AS73Y4VK1bAbrfH/VxxcXH0/x6Pp1WB9AknnIDRo0fjhRdewHXXXYdXXnkF8+bNi97/6KOP4je/+Q1+/etf45hjjkFRURFuvvlmBIPBNM5G6pxOZ9z3mqa1Wzh+1FFHYevWrQiFQtB1HcXFxQiHwwiHwyguLkZ1dTXWrVvXLcdK1FcwOCKijOjfvz9OP/10PPXUU7jlllvg8Xii99XU1OCvf/0rLr300rhgZdmyZXH7WLZsGcaMGRP93uPx4Oyzz8bZZ5+NG264AUceeSTWrFmDCRMmIBKJYO/evfB6vWkf6yWXXIK//vWvGDZsGGw2G2bMmBG977333sO3vvUtfO973wMA6LqOzz77DGPHjo1u43K54rI/yYwZMwbvvfde3G3vvfceDj/88FYBXTpef/11hEIhnHbaaXjkkUdw3HHH4cILL8Tll1+O6dOntwq2iCh9HFYjoox54oknEAgEMG3aNLzzzjvYvn075s+fj9NPPx1Dhw7Fgw8+GLf9e++9h0ceeQSfffYZnnzySbz00ku46aabAMhssz/96U9Yu3YtvvjiC/zv//4vPB4Pqqurcfjhh+OSSy7BpZdeipdffhlbtmzBBx98gNmzZ+M///lPh8d5ySWXYOXKlXjwwQfx7W9/G263O3rfYYcdhoULF+L999/H+vXrcc0112DPnj1xPz9y5EgsX74cX375Jfbv35800zNz5ky89dZbuP/++/HZZ5/hueeewxNPPIHbbrutM6c2qrq6GsXFxdizZw++9a1vYfjw4Vi3bh3OP/98HHrooaiuru7S/omIwRERZdBhhx2Gjz76CIcccgi++93vYvTo0bj66qtx6qmnYunSpejXr1/c9jNnzsRHH32ECRMm4IEHHsBjjz2GadOmAQDKy8vxhz/8ASeffDLGjRuHN998E6+++ir69+8PAHj22Wdx6aWXYubMmTjiiCNwzjnn4MMPP0ypz8+hhx6KE088EZ988gkuueSSuPt+/vOf4ytf+QqmTZuGyZMno7KyEuecc07cNrfddhvsdjvGjh2LgQMHJq1H+spXvoIXX3wRL7zwAo4++mjMmjUL9913Hy6//PI0zmhyixcvxgknnICCggJ88MEHGDZsGIYMGdLl/RKR0FQ61ZNEREREvRwzR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKyYHBEREREZMHgiIiIiMiCwRERERGRBYMjIiIiIgsGR0REREQWDI6IiIiILBgcEREREVkwOCIiIiKy+P+EtA0HSyJcHAAAAABJRU5ErkJggg==\n", + "image/png": "", "text/plain": [ "
" ] @@ -538,14 +533,14 @@ { "cell_type": "code", "execution_count": 24, - "id": "5596efc5", + "id": "9250db79", "metadata": { "tags": [] }, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -565,7 +560,7 @@ }, { "cell_type": "markdown", - "id": "4d7cf0fc", + "id": "0b5d055e", "metadata": { "tags": [] }, @@ -578,7 +573,7 @@ { "cell_type": "code", "execution_count": 25, - "id": "680b8270", + "id": "90317a39", "metadata": { "tags": [] }, @@ -598,7 +593,7 @@ }, { "cell_type": "markdown", - "id": "97bb4f4f", + "id": "6b5046b8", "metadata": {}, "source": [ "### โš™๏ธ Model Schema" @@ -606,7 +601,7 @@ }, { "cell_type": "markdown", - "id": "0b0bd961", + "id": "cddd14b0", "metadata": {}, "source": [ "The model needs to be set up with a [Model Schema](https://docs.hopsworks.ai/machine-learning-api/latest/generated/model_schema/), which describes the inputs and outputs for a model.\n", @@ -617,7 +612,7 @@ { "cell_type": "code", "execution_count": 26, - "id": "8314a355", + "id": "a908d1ce", "metadata": { "scrolled": true }, @@ -640,7 +635,7 @@ { "cell_type": "code", "execution_count": 27, - "id": "ca017146", + "id": "41315b00", "metadata": { "scrolled": true }, @@ -662,7 +657,7 @@ { "cell_type": "code", "execution_count": 28, - "id": "ebff7e6e", + "id": "6db1aa2b", "metadata": {}, "outputs": [ { @@ -717,7 +712,7 @@ }, { "cell_type": "markdown", - "id": "f637cbc5", + "id": "2abf182a", "metadata": {}, "source": [ "---\n", @@ -729,7 +724,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/advanced_tutorials/air_quality/4_air_quality_batch_inference.ipynb b/advanced_tutorials/air_quality/4_air_quality_batch_inference.ipynb index b883ca25..7d95e23d 100644 --- a/advanced_tutorials/air_quality/4_air_quality_batch_inference.ipynb +++ b/advanced_tutorials/air_quality/4_air_quality_batch_inference.ipynb @@ -32,7 +32,6 @@ "source": [ "import joblib\n", "import datetime\n", - "import time\n", "import pandas as pd" ] }, @@ -691,7 +690,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/advanced_tutorials/air_quality/5_function_calling.ipynb b/advanced_tutorials/air_quality/5_function_calling.ipynb index 35b0d26e..a5b6001e 100644 --- a/advanced_tutorials/air_quality/5_function_calling.ipynb +++ b/advanced_tutorials/air_quality/5_function_calling.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "ee392cdb", + "id": "c97f7197", "metadata": {}, "source": [ "## ๐Ÿ“ Imports" @@ -11,7 +11,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "c89ab053", + "id": "8c52088b", "metadata": {}, "outputs": [ { @@ -30,24 +30,18 @@ { "cell_type": "code", "execution_count": 2, - "id": "6ef71c85", + "id": "06b34ba8", "metadata": {}, "outputs": [], "source": [ - "import datetime\n", - "import transformers\n", - "import torch\n", - "\n", "import joblib\n", - "import inspect\n", - "import json\n", - "from typing import get_type_hints\n", - "import sys" + "\n", + "from functions.llm_chain import load_model, get_llm_chain, generate_response" ] }, { "cell_type": "markdown", - "id": "f6a870a7", + "id": "38df497c", "metadata": {}, "source": [ "## ๐Ÿ”ฎ Connect to Hopsworks Feature Store " @@ -56,7 +50,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "d4b907de", + "id": "44d7c288", "metadata": {}, "outputs": [ { @@ -80,7 +74,7 @@ }, { "cell_type": "markdown", - "id": "abc6094c", + "id": "7b011ef9", "metadata": {}, "source": [ "## โš™๏ธ Feature View Retrieval" @@ -89,7 +83,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "4fb6ceee", + "id": "298a3146", "metadata": {}, "outputs": [], "source": [ @@ -105,16 +99,16 @@ }, { "cell_type": "markdown", - "id": "afaa9b97", + "id": "82c8d3a2", "metadata": {}, "source": [ - "## ๐Ÿช Retrieve model from Model Registry" + "## ๐Ÿช Retrieve AirQuality Model from Model Registry" ] }, { "cell_type": "code", "execution_count": 5, - "id": "dd7b9905", + "id": "53caf38f", "metadata": {}, "outputs": [ { @@ -143,7 +137,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "20cb0255", + "id": "151c27c9", "metadata": {}, "outputs": [ { @@ -201,1084 +195,622 @@ }, { "cell_type": "markdown", - "id": "656f0db7", + "id": "a94279c0", "metadata": {}, "source": [ - "## ๐Ÿ—„๏ธ Functions\n" + "## โฌ‡๏ธ LLM Loading" ] }, { "cell_type": "code", "execution_count": 7, - "id": "98381fc5", - "metadata": {}, - "outputs": [], - "source": [ - "def transform_data(data, encoder):\n", - " \"\"\"\n", - " Transform the input data by encoding the 'city_name' column and dropping unnecessary columns.\n", - " \n", - " Args:\n", - " - data (DataFrame): Input data to be transformed.\n", - " - encoder (LabelEncoder): Label encoder object to encode 'city_name'.\n", - " \n", - " Returns:\n", - " - data_transformed (DataFrame): Transformed data with 'city_name_encoded' and dropped columns.\n", - " \"\"\"\n", - " \n", - " # Create a copy of the input data to avoid modifying the original data\n", - " data_transformed = data.copy()\n", - " \n", - " # Transform the 'city_name' column in the batch data using the retrieved label encoder\n", - " data_transformed['city_name_encoded'] = encoder.transform(data_transformed['city_name'])\n", - " \n", - " # Drop unnecessary columns from the batch data\n", - " data_transformed = data_transformed.drop(columns=['unix_time', 'pm2_5', 'city_name', 'date'])\n", - "\n", - " return data_transformed" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "fcfecdfe", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "from typing import Any, Dict, List\n", - "\n", - "def get_data_for_date(date: str, city_name: str, feature_view, model) -> pd.DataFrame:\n", - " \"\"\"\n", - " Retrieve data for a specific date and city from a feature view.\n", - "\n", - " Args:\n", - " date (str): The date in the format \"%Y-%m-%d\".\n", - " city_name (str): The name of the city to retrieve data for.\n", - " feature_view: The feature view object.\n", - " model: The machine learning model used for prediction.\n", - "\n", - " Returns:\n", - " pd.DataFrame: A DataFrame containing data for the specified date and city.\n", - " \"\"\"\n", - " # Convert date string to datetime object\n", - " date_datetime = datetime.datetime.strptime(date, \"%Y-%m-%d\").date()\n", - " \n", - " # Retrieve batch data for the specified date range\n", - " batch_data = feature_view.get_batch_data(\n", - " start_time=date_datetime,\n", - " end_time=date_datetime + datetime.timedelta(days=1),\n", - " )\n", - " \n", - " # Filter batch data for the specified city\n", - " batch_data_filtered = batch_data[batch_data['city_name'] == city_name]\n", - " \n", - " return batch_data_filtered[['date', 'pm2_5']].sort_values('date').reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "abd770b1", - "metadata": {}, - "outputs": [], - "source": [ - "def get_data_in_date_range(date_start: str, date_end: str, city_name: str, feature_view, model) -> pd.DataFrame:\n", - " \"\"\"\n", - " Retrieve data for a specific date range and city from a feature view.\n", - "\n", - " Args:\n", - " date_start (str): The start date in the format \"%Y-%m-%d\".\n", - " date_end (str): The end date in the format \"%Y-%m-%d\".\n", - " city_name (str): The name of the city to retrieve data for.\n", - " feature_view: The feature view object.\n", - " model: The machine learning model used for prediction.\n", - "\n", - " Returns:\n", - " pd.DataFrame: A DataFrame containing data for the specified date range and city.\n", - " \"\"\"\n", - " # Convert date strings to datetime objects\n", - " date_start_dt = datetime.datetime.strptime(date_start, \"%Y-%m-%d\").date()\n", - " date_end_dt = datetime.datetime.strptime(date_end, \"%Y-%m-%d\").date()\n", - " \n", - " # Retrieve batch data for the specified date range\n", - " batch_data = feature_view.get_batch_data(\n", - " start_time=date_start_dt,\n", - " end_time=date_end_dt + datetime.timedelta(days=1),\n", - " )\n", - "\n", - " # Filter batch data for the specified city\n", - " batch_data_filtered = batch_data[batch_data['city_name'] == city_name]\n", - " \n", - " return batch_data_filtered[['date', 'pm2_5']].sort_values('date').reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "73f9937d", - "metadata": {}, - "outputs": [], - "source": [ - "import datetime\n", - "import pandas as pd\n", - "\n", - "def get_future_data(date: str, city_name: str, feature_view, model) -> pd.DataFrame:\n", - " \"\"\"\n", - " Predicts future PM2.5 data for a specified date and city using a given feature view and model.\n", - "\n", - " Args:\n", - " date (str): The target future date in the format 'YYYY-MM-DD'.\n", - " city_name (str): The name of the city for which the prediction is made.\n", - " feature_view: The feature view used to retrieve batch data.\n", - " model: The machine learning model used for prediction.\n", - "\n", - " Returns:\n", - " pd.DataFrame: A DataFrame containing predicted PM2.5 values for each day starting from the target date.\n", - "\n", - " \"\"\"\n", - " # Get today's date\n", - " today = datetime.date.today()\n", - "\n", - " # Convert the target date string to a datetime object\n", - " date_in_future = datetime.datetime.strptime(date, \"%Y-%m-%d\").date()\n", - "\n", - " # Calculate the difference in days between today and the target date\n", - " difference_in_days = (date_in_future - today).days\n", - "\n", - " # Retrieve batch data for the specified date range\n", - " batch_data = feature_view.get_batch_data(\n", - " start_time=today,\n", - " end_time=today + datetime.timedelta(days=1),\n", - " )\n", - " \n", - " # Filter batch data for the specified city\n", - " batch_data_filtered = batch_data[batch_data['city_name'] == city_name]\n", - " \n", - " # Transform batch data\n", - " batch_data_transformed = transform_data(batch_data_filtered, encoder)\n", - " \n", - " # Initialize a DataFrame to store predicted PM2.5 values\n", - " predicted_pm2_5_df = pd.DataFrame({\n", - " 'date': [today.strftime(\"%Y-%m-%d\")], \n", - " 'pm2_5': batch_data_filtered['pm2_5'].values[0],\n", - " })\n", - "\n", - " # Iterate through each day starting from tomorrow up to the target date\n", - " for day_number in range(1, difference_in_days + 1):\n", - "\n", - " # Calculate the date for the current future day\n", - " date_future_day = (today + datetime.timedelta(days=day_number)).strftime(\"%Y-%m-%d\")\n", - " \n", - " # Predict PM2.5 for the current day\n", - " predicted_pm2_5 = model.predict(batch_data_transformed)\n", - "\n", - " # Update previous day PM2.5 values in the batch data for the next prediction\n", - " batch_data_transformed['pm_2_5_previous_7_day'] = batch_data_transformed['pm_2_5_previous_6_day']\n", - " batch_data_transformed['pm_2_5_previous_6_day'] = batch_data_transformed['pm_2_5_previous_5_day']\n", - " batch_data_transformed['pm_2_5_previous_5_day'] = batch_data_transformed['pm_2_5_previous_4_day']\n", - " batch_data_transformed['pm_2_5_previous_4_day'] = batch_data_transformed['pm_2_5_previous_3_day']\n", - " batch_data_transformed['pm_2_5_previous_3_day'] = batch_data_transformed['pm_2_5_previous_2_day']\n", - " batch_data_transformed['pm_2_5_previous_2_day'] = batch_data_transformed['pm_2_5_previous_1_day']\n", - " batch_data_transformed['pm_2_5_previous_1_day'] = predicted_pm2_5\n", - " \n", - " # Append the predicted PM2.5 value for the current day to the DataFrame\n", - " predicted_pm2_5_df = predicted_pm2_5_df._append({\n", - " 'date': date_future_day, \n", - " 'pm2_5': predicted_pm2_5[0],\n", - " }, ignore_index=True)\n", - " \n", - " return predicted_pm2_5_df\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "9dd29714", + "id": "554e0024", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (7.54s) \n", - "โ›ณ๏ธ 2024-01-10\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datepm2_5
02024-01-1020.3
\n", - "
" - ], - "text/plain": [ - " date pm2_5\n", - "0 2024-01-10 20.3" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_for_date = get_data_for_date(\n", - " '2024-01-10', \n", - " 'Paris',\n", - " feature_view,\n", - " model_air_quality,\n", - ")\n", - "print(f'โ›ณ๏ธ {data_for_date.date.max()}')\n", - "data_for_date.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "4b353d52", - "metadata": {}, - "outputs": [ - { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (7.52s) \n", - "โ›ณ๏ธ ('2024-01-10', '2024-01-20')\n" + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" ] }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datepm2_5
02024-01-1011.7
12024-01-1115.2
22024-01-1212.1
32024-01-135.4
42024-01-143.8
\n", - "
" - ], - "text/plain": [ - " date pm2_5\n", - "0 2024-01-10 11.7\n", - "1 2024-01-11 15.2\n", - "2 2024-01-12 12.1\n", - "3 2024-01-13 5.4\n", - "4 2024-01-14 3.8" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_in_range = get_data_in_date_range(\n", - " '2024-01-10', \n", - " '2024-01-20', \n", - " 'Amsterdam',\n", - " feature_view,\n", - " model_air_quality,\n", - ")\n", - "print(f'โ›ณ๏ธ {data_in_range.date.min(), data_in_range.date.max()}')\n", - "data_in_range.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "5e896081", - "metadata": {}, - "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (7.50s) \n", - "โ›ณ๏ธ ('2024-02-23', '2024-02-25')\n" + "2024-03-08 10:10:27,333 INFO: We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n" ] }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datepm2_5
02024-02-238.100000
12024-02-247.449322
22024-02-258.308480
\n", - "
" - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "4979127c24df46df917c622b293c9ca4", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - " date pm2_5\n", - "0 2024-02-23 8.100000\n", - "1 2024-02-24 7.449322\n", - "2 2024-02-25 8.308480" + "Loading checkpoint shards: 0%| | 0/2 [00:00โฌ‡๏ธ Model Loading" + "## โ›“๏ธ LangChain" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "f817eda3", - "metadata": {}, - "outputs": [], - "source": [ - "def load_model(model_name: str):\n", - " tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)\n", - "\n", - " with torch.device(\"cuda:0\"):\n", - " model = transformers.AutoModelForCausalLM.from_pretrained(\n", - " model_name, \n", - " torch_dtype=torch.bfloat16,\n", - " ).eval()\n", - " \n", - " return tokenizer, model" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "d21b4f40", + "execution_count": 8, + "id": "4be0964b", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", - "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" + "DeprecationWarning: `np.bool8` is a deprecated alias for `np.bool_`. (Deprecated NumPy 1.24)\n" ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "cdd409643efb4d7cbe4022f55d06626c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/2 [00:00โš™๏ธ Tools \n" + "## ๐Ÿงฌ Model Inference\n" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "12607b0c", + "execution_count": 9, + "id": "0b7a24dd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– \n", + "\n", + "Hello! How can I assist you with air quality information today?\n" + ] + } + ], "source": [ - "def get_type_name(t):\n", - " name = str(t)\n", - " if \"list\" in name or \"dict\" in name:\n", - " return name\n", - " else:\n", - " return t.__name__\n", - "\n", - "def serialize_function_to_json(func):\n", - " signature = inspect.signature(func)\n", - " type_hints = get_type_hints(func)\n", + "QUESTION7 = \"Hi!\"\n", "\n", - " function_info = {\n", - " \"name\": func.__name__,\n", - " \"description\": func.__doc__,\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {}\n", - " },\n", - " \"returns\": type_hints.get('return', 'void').__name__\n", - " }\n", - "\n", - " for name, _ in signature.parameters.items():\n", - " param_type = get_type_name(type_hints.get(name, type(None)))\n", - " function_info[\"parameters\"][\"properties\"][name] = {\"type\": param_type}\n", + "response7 = generate_response(\n", + " QUESTION7,\n", + " feature_view,\n", + " model_llm, \n", + " tokenizer,\n", + " model_air_quality,\n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - " return json.dumps(function_info, indent=2)" + "print(response7)" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "8fb4bad0", + "execution_count": 10, + "id": "ff89cc47", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{\n", - " \"name\": \"get_data_in_date_range\",\n", - " \"description\": \"\\n Retrieve data for a specific date range and city from a feature view.\\n\\n Args:\\n date_start (str): The start date in the format \\\"%Y-%m-%d\\\".\\n date_end (str): The end date in the format \\\"%Y-%m-%d\\\".\\n city_name (str): The name of the city to retrieve data for.\\n feature_view: The feature view object.\\n model: The machine learning model used for prediction.\\n\\n Returns:\\n pd.DataFrame: A DataFrame containing data for the specified date range and city.\\n \",\n", - " \"parameters\": {\n", - " \"type\": \"object\",\n", - " \"properties\": {\n", - " \"date_start\": {\n", - " \"type\": \"str\"\n", - " },\n", - " \"date_end\": {\n", - " \"type\": \"str\"\n", - " },\n", - " \"city_name\": {\n", - " \"type\": \"str\"\n", - " },\n", - " \"feature_view\": {\n", - " \"type\": \"NoneType\"\n", - " },\n", - " \"model\": {\n", - " \"type\": \"NoneType\"\n", - " }\n", - " }\n", - " },\n", - " \"returns\": \"DataFrame\"\n", - "}\n" + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– \n", + "\n", + "I am an AI Air Quality Assistant, here to help you with air quality information.\n" ] } ], "source": [ - "print(serialize_function_to_json(get_data_in_date_range))" - ] - }, - { - "cell_type": "markdown", - "id": "bc4cfe49", - "metadata": {}, - "source": [ - "## ๐Ÿ”ฎ Function Matching \n" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "a47290ef", - "metadata": {}, - "outputs": [], - "source": [ - "import xml.etree.ElementTree as ET\n", - "import re\n", + "QUESTION = \"Who are you?\"\n", "\n", - "def extract_function_calls(completion):\n", - " completion = completion.strip()\n", - " pattern = r\"((.*?))\"\n", - " match = re.search(pattern, completion, re.DOTALL)\n", - " if not match:\n", - " return None\n", - " \n", - " multiplefn = match.group(1)\n", - " root = ET.fromstring(multiplefn)\n", - " functions = root.findall(\"functioncall\")\n", - " return [json.loads(fn.text) for fn in functions]" + "response = generate_response(\n", + " QUESTION,\n", + " feature_view,\n", + " model_llm,\n", + " tokenizer,\n", + " model_air_quality,\n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", + "\n", + "print(response)" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "501cb2b5", + "execution_count": 11, + "id": "33ae46d1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (8.92s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for New York:\n", + "Date: 2024-01-10; Air Quality: 7.2\n", + "Date: 2024-01-11; Air Quality: 5.9\n", + "Date: 2024-01-12; Air Quality: 10.8\n", + "Date: 2024-01-13; Air Quality: 5.9\n", + "Date: 2024-01-14; Air Quality: 5.1\n", + "\n", + "The average air quality from January 10 to January 14 in New York was 6.7. This indicates that the air quality was generally moderate, and it is safe to go outside for most activities.\n" + ] + } + ], "source": [ - "def generate_hermes(prompt, model_llm, tokenizer):\n", - " fn = \"\"\"{\"name\": \"function_name\", \"arguments\": {\"arg_1\": \"value_1\", \"arg_2\": value_2, ...}}\"\"\"\n", - " prompt = f\"\"\"<|im_start|>system\n", - "You are a helpful assistant with access to the following functions:\n", - "\n", - "{serialize_function_to_json(get_data_for_date)}\n", - "\n", - "{serialize_function_to_json(get_data_in_date_range)}\n", - "\n", - "{serialize_function_to_json(get_future_data)}\n", - "\n", - "You need to choose what function to use and retrieve paramenters for this function from the user input.\n", - "IMPORTANT: Today is {datetime.date.today().strftime(\"%A\")}, {datetime.date.today()}.\n", - "IMPORTANT: If the user query contains 'will', it is very likely that you will need to use the get_future_data function\n", - "NOTE: Ignore the Feature View and Model parameters.\n", - "NOTE: Dates should be provided in the format YYYY-MM-DD.\n", - "\n", - "To use these functions respond with:\n", - "\n", - " {fn} \n", - " {fn} \n", - " ...\n", - "\n", - "\n", - "Edge cases you must handle:\n", - "- If there are no functions that match the user request, you will respond politely that you cannot help.<|im_end|>\n", - "<|im_start|>user\n", - "{prompt}<|im_end|>\n", - "<|im_start|>assistant\"\"\"\n", - " \n", - " tokens = tokenizer(prompt, return_tensors=\"pt\").to(model_llm.device)\n", - " input_size = tokens.input_ids.numel()\n", - " with torch.inference_mode():\n", - " generated_tokens = model_llm.generate(\n", - " **tokens, \n", - " use_cache=True, \n", - " do_sample=True, \n", - " temperature=0.2, \n", - " top_p=1.0, \n", - " top_k=0, \n", - " max_new_tokens=512, \n", - " eos_token_id=tokenizer.eos_token_id, \n", - " pad_token_id=tokenizer.eos_token_id,\n", - " )\n", + "QUESTION1 = \"What was the average air quality from 2024-01-10 till 2024-01-14 in New York?\"\n", + "\n", + "response1 = generate_response(\n", + " QUESTION1, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer, \n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - " return tokenizer.decode(\n", - " generated_tokens.squeeze()[input_size:], \n", - " skip_special_tokens=True,\n", - " )" + "print(response1)" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "d6c0c4ba", + "execution_count": 12, + "id": "1ddc2b2b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "I am a helpful assistant designed to retrieve data related to air pollution. How can I help you with that?\n", - "====================================================================================================\n", - "[{'name': 'get_data_for_date', 'arguments': {'date': '2024-02-23', 'city_name': 'Paris'}}]\n", - "====================================================================================================\n", - "[{'name': 'get_data_for_date', 'arguments': {'date': '2024-02-22', 'city_name': 'New York'}}]\n", - "====================================================================================================\n", - "[{'name': 'get_data_in_date_range', 'arguments': {'date_start': '2024-01-10', 'date_end': '2024-01-14', 'city_name': 'London'}}]\n", - "====================================================================================================\n", - "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-26', 'city_name': 'London'}}]\n", - "====================================================================================================\n", - "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-25', 'city_name': 'London'}}]\n", - "====================================================================================================\n", - "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'London'}}]\n", - "====================================================================================================\n", - "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'Amsterdam'}}]\n", - "====================================================================================================\n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.57s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for New York:\n", + "Date: 2024-01-10; Air Quality: 7.2\n", + "Date: 2024-01-11; Air Quality: 5.9\n", + "Date: 2024-01-12; Air Quality: 10.8\n", + "Date: 2024-01-13; Air Quality: 5.9\n", + "Date: 2024-01-14; Air Quality: 5.1\n", + "\n", + "The maximum air quality from January 10 to January 14 in New York was on January 12, with an air quality of 10.8. This indicates that the air quality was poor on that day, and it is recommended to limit outdoor activities, especially for sensitive groups such as children, the elderly, and those with respiratory issues.\n" ] } ], "source": [ - "prompts = [\n", - " \"How are you?\",\n", - " \"What's the air quality today in Paris?\",\n", - " \"What was the air quality yesterday in New York?\",\n", - " \"What was the air quality from 2024-01-10 till 2024-01-14 in London?\",\n", - " \"What will the air quality be like in London in 2024-02-26?\",\n", - " \"What will the air quality be like in London the day after tomorrow?\",\n", - " \"What will the air quality be like in London next Friday?\",\n", - " \"What will the air quality be like on March 1 in Amsterdam?\",\n", - "]\n", + "QUESTION11 = \"When and what was the maximum air quality from 2024-01-10 till 2024-01-14 in New York?\"\n", "\n", - "for prompt in prompts:\n", - " completion = generate_hermes(prompt, model_llm, tokenizer)\n", - " functions = extract_function_calls(completion)\n", + "response11 = generate_response(\n", + " QUESTION11, \n", + " feature_view, \n", + " model_llm,\n", + " tokenizer,\n", + " model_air_quality,\n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - " if functions:\n", - " print(functions)\n", - " else:\n", - " print(completion.strip())\n", - " print(\"=\"*100)" - ] - }, - { - "cell_type": "markdown", - "id": "fcd2d41b", - "metadata": {}, - "source": [ - "## ๐Ÿš€ Function Calling" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "a8318c11", - "metadata": {}, - "outputs": [], - "source": [ - "def invoke_function(function, feature_view, model):\n", - " # Extract function name and arguments from input_data\n", - " function_name = function['name']\n", - " arguments = function['arguments']\n", - " \n", - " # Using Python's getattr function to dynamically call the function by its name and passing the arguments\n", - " function_output = getattr(sys.modules[__name__], function_name)(**arguments, feature_view=feature_view, model=model)\n", - " \n", - " # Round the 'pm2_5' value to 2 decimal places\n", - " function_output['pm2_5'] = function_output['pm2_5'].apply(round, ndigits=2)\n", - " return function_output" + "print(response11)" ] }, { "cell_type": "code", - "execution_count": 22, - "id": "b1921e19", + "execution_count": 13, + "id": "4d5900a4", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[{'name': 'get_future_data',\n", - " 'arguments': {'date': '2024-03-01', 'city_name': 'Amsterdam'}}]" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished: Reading data from Hopsworks, using ArrowFlight (8.65s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for New York:\n", + "Date: 2024-01-10; Air Quality: 7.2\n", + "Date: 2024-01-11; Air Quality: 5.9\n", + "Date: 2024-01-12; Air Quality: 10.8\n", + "Date: 2024-01-13; Air Quality: 5.9\n", + "Date: 2024-01-14; Air Quality: 5.1\n", + "\n", + "The minimum air quality from January 10 to January 14 in New York was on January 11, with an air quality of 5.9. This indicates that the air quality was generally good on that day, and it is safe to go outside for most activities.\n" + ] } ], "source": [ - "functions" + "QUESTION12 = \"When and what was the minimum air quality from 2024-01-10 till 2024-01-14 in New York?\"\n", + "\n", + "response12 = generate_response(\n", + " QUESTION12, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer, \n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", + "\n", + "print(response12)" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "ff8cfac2", + "execution_count": 14, + "id": "bd86f511", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (7.39s) \n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.70s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for London:\n", + "Date: 2024-03-07; Air Quality: 25.7\n", + "\n", + "Yesterday in London, the air quality was 25.7, which indicates that the air quality was poor. It is recommended to limit outdoor activities, especially for sensitive groups such as children, the elderly, and those with respiratory issues.\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datepm2_5
02024-02-236.70
12024-02-246.40
22024-02-256.32
32024-02-266.58
42024-02-276.58
\n", - "
" - ], - "text/plain": [ - " date pm2_5\n", - "0 2024-02-23 6.70\n", - "1 2024-02-24 6.40\n", - "2 2024-02-25 6.32\n", - "3 2024-02-26 6.58\n", - "4 2024-02-27 6.58" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "data_batch = invoke_function(functions[0], feature_view, model_air_quality)\n", - "data_batch.head()" - ] - }, - { - "cell_type": "markdown", - "id": "bbbb16e4", - "metadata": {}, - "source": [ - "## ๐Ÿงฌ Context Retrieval" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "5fb16d4d", - "metadata": {}, - "outputs": [], - "source": [ - "def get_context_data(user_query, model_llm, tokenizer, model_air_quality, encoder):\n", - " completion = generate_hermes(user_query, model_llm, tokenizer)\n", - " \n", - " functions = extract_function_calls(completion)\n", - " print(functions)\n", - " \n", - " if functions:\n", - " data = invoke_function(functions[0], feature_view, model_air_quality)\n", - " return '\\n'.join([f'Date: {row[1][\"date\"]}; Air Quality: {row[1][\"pm2_5\"]}' for row in data.iterrows()])\n", + "QUESTION2 = \"What was the air quality yesterday in London?\"\n", + "\n", + "response2 = generate_response(\n", + " QUESTION2, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer, \n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - " return completion" + "print(response2)" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "71bd91c1", + "execution_count": 15, + "id": "f62b1eec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'get_data_in_date_range', 'arguments': {'date_start': '2024-01-10', 'date_end': '2024-01-14', 'city_name': 'New York'}}]\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (7.53s) \n", - "Date: 2024-01-10; Air Quality: 7.2\n", - "Date: 2024-01-11; Air Quality: 5.9\n", - "Date: 2024-01-12; Air Quality: 10.8\n", - "Date: 2024-01-13; Air Quality: 5.9\n", - "Date: 2024-01-14; Air Quality: 5.1\n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.77s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for London:\n", + "Date: 2024-03-08; Air Quality: 24.3\n", + "Date: 2024-03-09; Air Quality: 16.71\n", + "Date: 2024-03-10; Air Quality: 11.18\n", + "\n", + "The air quality in London on March 10, 2024, was 11.18, which indicates that the air quality was unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is recommended to limit outdoor activities on that day for these groups.\n" ] } ], "source": [ - "QUESTION1 = \"What was the air quality from 2024-01-10 till 2024-01-14 in New York?\"\n", + "QUESTION3 = \"What will the air quality be like in London in 2024-03-10?\"\n", "\n", - "data_pred_q1 = get_context_data(QUESTION1, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q1)" + "response3 = generate_response(\n", + " QUESTION3, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer,\n", + " model_air_quality,\n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", + "\n", + "print(response3)" ] }, { "cell_type": "code", - "execution_count": 26, - "id": "7c2c755d", + "execution_count": 16, + "id": "926721f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'get_data_for_date', 'arguments': {'date': '2024-02-22', 'city_name': 'Amsterdam'}}]\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (7.49s) \n", - "Date: 2024-02-22; Air Quality: 5.2\n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.76s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for Chicago:\n", + "Date: 2024-03-08; Air Quality: 10.0\n", + "Date: 2024-03-09; Air Quality: 8.19\n", + "Date: 2024-03-10; Air Quality: 8.61\n", + "\n", + "The air quality in Chicago the day after tomorrow, on March 10, 2024, was 8.61. This indicates that the air quality was unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is recommended to limit outdoor activities on that day for these groups.\n" ] } ], "source": [ - "QUESTION2 = \"What was the air quality yesterday in Amsterdam?\"\n", + "QUESTION4 = \"What will the air quality be like in Chicago the day after tomorrow?\"\n", + "\n", + "response4 = generate_response(\n", + " QUESTION4, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer, \n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - "data_pred_q2 = get_context_data(QUESTION2, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q2)" + "print(response4)" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "e3221e42", + "execution_count": 17, + "id": "5f57ada0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-27', 'city_name': 'London'}}]\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (7.86s) \n", - "Date: 2024-02-23; Air Quality: 8.1\n", - "Date: 2024-02-24; Air Quality: 7.45\n", - "Date: 2024-02-25; Air Quality: 8.31\n", - "Date: 2024-02-26; Air Quality: 8.57\n", - "Date: 2024-02-27; Air Quality: 8.15\n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.83s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for London:\n", + "Date: 2024-03-08; Air Quality: 24.3\n", + "Date: 2024-03-09; Air Quality: 16.71\n", + "Date: 2024-03-10; Air Quality: 11.18\n", + "\n", + "On Sunday, the air quality in London is expected to be 16.71, which indicates that the air quality is unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is recommended to limit outdoor activities on that day for these groups.\n" ] } ], "source": [ - "QUESTION3 = \"What will the air quality be like in London in 2024-02-27?\"\n", + "QUESTION5 = \"What will the air quality be like in London on Sunday?\"\n", + "\n", + "response5 = generate_response(\n", + " QUESTION5, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer, \n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - "data_pred_q3 = get_context_data(QUESTION3, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q3)" + "print(response5)" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "5ed3ec9d", + "execution_count": 18, + "id": "b4f264b5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'get_future_data', 'arguments': {'date': '2024-02-25', 'city_name': 'Chicago'}}]\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (7.63s) \n", - "Date: 2024-02-23; Air Quality: 14.1\n", - "Date: 2024-02-24; Air Quality: 12.87\n", - "Date: 2024-02-25; Air Quality: 9.85\n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.80s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for London:\n", + "Date: 2024-03-08; Air Quality: 24.3\n", + "Date: 2024-03-09; Air Quality: 16.71\n", + "\n", + "On March 9 in London, the air quality was 16.71, which indicates that the air quality was unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is recommended to limit outdoor activities on that day for these groups.\n" ] } ], "source": [ - "QUESTION4 = \"What will the air quality be like in Chicago the day after tomorrow?\"\n", + "QUESTION7 = \"What will the air quality be like on March 9 in London?\"\n", "\n", - "data_pred_q4 = get_context_data(QUESTION4, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q4)" + "response7 = generate_response(\n", + " QUESTION7, \n", + " feature_view,\n", + " model_llm,\n", + " tokenizer, \n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", + "\n", + "print(response7)" ] }, { "cell_type": "code", - "execution_count": 29, - "id": "39665b4d", + "execution_count": 19, + "id": "4b47ef5d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'London'}}]\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (7.81s) \n", - "Date: 2024-02-23; Air Quality: 8.1\n", - "Date: 2024-02-24; Air Quality: 7.45\n", - "Date: 2024-02-25; Air Quality: 8.31\n", - "Date: 2024-02-26; Air Quality: 8.57\n", - "Date: 2024-02-27; Air Quality: 8.15\n", - "Date: 2024-02-28; Air Quality: 7.97\n", - "Date: 2024-02-29; Air Quality: 7.97\n", - "Date: 2024-03-01; Air Quality: 8.32\n" + "Finished: Reading data from Hopsworks, using ArrowFlight (8.76s) \n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– Air Quality Measurements for London:\n", + "Date: 2024-03-08; Air Quality: 24.3\n", + "Date: 2024-03-09; Air Quality: 16.71\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "On March 9 in London, the air quality was 16.71, which indicates that the air quality was unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is recommended to limit outdoor activities on that day for these groups.\n" ] } ], "source": [ - "QUESTION5 = \"What will the air quality be like in London next Friday?\"\n", + "QUESTION = \"Is this level safe or not?\"\n", + "\n", + "response = generate_response(\n", + " QUESTION7, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer,\n", + " model_air_quality,\n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - "data_pred_q5 = get_context_data(QUESTION5, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q5)" + "print(response)" ] }, { "cell_type": "code", - "execution_count": 30, - "id": "5b3d8312", + "execution_count": 20, + "id": "b011464a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "None\n", + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "\n", - "I am a machine learning model and I don't have feelings, but I am here to help you with your queries. How can I assist you today?\n" + "The air quality level of 16.71 in London is considered unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is not dangerous for everyone, but it is recommended to limit outdoor activities for sensitive groups.\n" ] } ], "source": [ - "QUESTION6 = \"How are you?\"\n", + "QUESTION = \"Is this air quality level dangerous?\"\n", + "\n", + "response = generate_response(\n", + " QUESTION, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer,\n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - "data_pred_q6 = get_context_data(QUESTION6, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q6)" + "print(response)" ] }, { "cell_type": "code", - "execution_count": 31, - "id": "d1af9d23", + "execution_count": 21, + "id": "d9004f08", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'name': 'get_future_data', 'arguments': {'date': '2024-03-01', 'city_name': 'Amsterdam'}}]\n", - "Finished: Reading data from Hopsworks, using ArrowFlight (7.76s) \n", - "Date: 2024-02-23; Air Quality: 6.7\n", - "Date: 2024-02-24; Air Quality: 6.4\n", - "Date: 2024-02-25; Air Quality: 6.32\n", - "Date: 2024-02-26; Air Quality: 6.58\n", - "Date: 2024-02-27; Air Quality: 6.58\n", - "Date: 2024-02-28; Air Quality: 6.58\n", - "Date: 2024-02-29; Air Quality: 6.54\n", - "Date: 2024-03-01; Air Quality: 6.58\n" + "๐Ÿ—“๏ธ Today's date: Friday, 2024-03-08\n", + "๐Ÿ“– \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Certainly! Air quality levels are usually measured using an index, such as the Air Quality Index (AQI) or the Pollution Standards Index (PSI). These indices provide a numerical value that represents the air quality at a particular location. The levels are usually categorized into different color-coded ranges, with each range representing a different level of air quality. Here's a general breakdown of the air quality levels:\n", + "\n", + "1. Good (AQI 0-50): The air quality is considered good, and it is safe for everyone to breathe.\n", + "2. Moderate (AQI 51-100): The air quality is generally fine, but sensitive groups might experience some discomfort.\n", + "3. Unhealthy for Sensitive Groups (AQI 101-150): The air quality is unhealthy for sensitive groups such as children, the elderly, and those with respiratory issues. It is recommended to limit outdoor activities for these groups.\n", + "4. Unhealthy (AQI 151-200): The air quality is unhealthy for everyone, and sensitive groups should avoid prolonged outdoor activities.\n", + "5. Very Unhealthy (AQI 201-300): The air quality is very unhealthy, and everyone should limit their outdoor activities, especially children, the elderly, and those with respiratory issues.\n", + "6. Hazardous (AQI >300): The air quality is hazardous, and everyone should avoid all outdoor activities.\n", + "\n", + "These categories may vary slightly depending on the specific air quality index being used, but the general idea remains the same.\n" ] } ], "source": [ - "QUESTION7 = \"What will the air quality be like on March 1 in Amsterdam?\"\n", + "QUESTION = \"Can you please explain different air quality levels?\"\n", + "\n", + "response = generate_response(\n", + " QUESTION, \n", + " feature_view, \n", + " model_llm, \n", + " tokenizer,\n", + " model_air_quality, \n", + " encoder,\n", + " llm_chain,\n", + " verbose=True,\n", + ")\n", "\n", - "data_pred_q7 = get_context_data(QUESTION7, model_llm, tokenizer, model_air_quality, encoder)\n", - "print(data_pred_q7)" + "print(response)" ] }, { "cell_type": "markdown", - "id": "0bf1e6de", + "id": "cbc3a09f", "metadata": {}, "source": [ "---" diff --git a/advanced_tutorials/air_quality/app.py b/advanced_tutorials/air_quality/app.py new file mode 100644 index 00000000..480ad366 --- /dev/null +++ b/advanced_tutorials/air_quality/app.py @@ -0,0 +1,100 @@ +import streamlit as st +import hopsworks +import joblib +from functions.llm_chain import load_model, get_llm_chain, generate_response +import warnings +warnings.filterwarnings('ignore') + +st.title("๐ŸŒค๏ธ AirQuality AI assistant ๐Ÿ’ฌ") + + +@st.cache_resource() +def connect_to_hopsworks(): + # Initialize Hopsworks feature store connection + project = hopsworks.login() + fs = project.get_feature_store() + + # Retrieve the model registry + mr = project.get_model_registry() + + # Retrieve the 'air_quality_fv' feature view + feature_view = fs.get_feature_view( + name="air_quality_fv", + version=1, + ) + + # Initialize batch scoring + feature_view.init_batch_scoring(1) + + # Retrieve the 'air_quality_xgboost_model' from the model registry + retrieved_model = mr.get_model( + name="air_quality_xgboost_model", + version=1, + ) + + # Download the saved model artifacts to a local directory + saved_model_dir = retrieved_model.download() + + # Load the XGBoost regressor model and label encoder from the saved model directory + model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl") + encoder = joblib.load(saved_model_dir + "/label_encoder.pkl") + + return feature_view, model_air_quality, encoder + + +@st.cache_resource() +def retrieve_llm_chain(): + + # Load the LLM and its corresponding tokenizer. + model_llm, tokenizer = load_model() + + # Create and configure a language model chain. + llm_chain = get_llm_chain( + model_llm, + tokenizer, + ) + + return model_llm, tokenizer, llm_chain + + +# Retrieve the feature view, air quality model and encoder for the city_name column +feature_view, model_air_quality, encoder = connect_to_hopsworks() + +# Load the LLM and its corresponding tokenizer and configure a language model chain +model_llm, tokenizer, llm_chain = retrieve_llm_chain() + +# Initialize chat history +if "messages" not in st.session_state: + st.session_state.messages = [] + +# Display chat messages from history on app rerun +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +# React to user input +if user_query := st.chat_input("How can I help you?"): + # Display user message in chat message container + st.chat_message("user").markdown(user_query) + # Add user message to chat history + st.session_state.messages.append({"role": "user", "content": user_query}) + + st.write('โš™๏ธ Generating Response...') + + # Generate a response to the user query + response = generate_response( + user_query, + feature_view, + model_llm, + tokenizer, + model_air_quality, + encoder, + llm_chain, + verbose=False, + ) + + # Display assistant response in chat message container + with st.chat_message("assistant"): + st.markdown(response) + # Add assistant response to chat history + st.session_state.messages.append({"role": "assistant", "content": response}) diff --git a/advanced_tutorials/air_quality/app_voice.py b/advanced_tutorials/air_quality/app_voice.py new file mode 100644 index 00000000..c6eb56a8 --- /dev/null +++ b/advanced_tutorials/air_quality/app_voice.py @@ -0,0 +1,39 @@ +import streamlit as st +import whisper +import tempfile + +# Function to load the model (adjust the model size as needed) +def load_whisper_model(model_name='base'): + model = whisper.load_model(model_name) + return model + +# Function to transcribe and translate audio +def transcribe_and_translate_audio(model, audio_file): + # Save the uploaded file to a temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: + tmp_file.write(audio_file.getvalue()) + tmp_filename = tmp_file.name + + # Transcribe and translate the audio file + result = model.transcribe(tmp_filename, task="translate") + return result['text'] + +# Streamlit app +def main(): + st.title('Speech to Text Translation with Whisper') + st.write("Upload an audio file, and the app will transcribe and translate the speech to English text using OpenAIโ€™s Whisper model.") + + # Load the Whisper model + model = load_whisper_model() + + # Audio file uploader + audio_file = st.file_uploader("Choose an audio file...", type=['wav', 'mp3', 'ogg']) + + if audio_file is not None: + # Display a message while the model is transcribing and translating the audio + with st.spinner('Transcribing and translating the audio...'): + transcription = transcribe_and_translate_audio(model, audio_file) + st.text_area("Transcribed and Translated Text", transcription, height=300) + +if __name__ == "__main__": + main() diff --git a/advanced_tutorials/air_quality/feature_pipeline.py b/advanced_tutorials/air_quality/feature_pipeline.py deleted file mode 100644 index 0cee9c43..00000000 --- a/advanced_tutorials/air_quality/feature_pipeline.py +++ /dev/null @@ -1,158 +0,0 @@ -import datetime -import time -import requests -import pandas as pd -import json -import hopsworks - -from functions import * - -import warnings -warnings.filterwarnings("ignore") - -from dotenv import load_dotenv -load_dotenv() - -import os - -# Get the value of the PARAMETER environment variable -continent = os.environ.get('CONTINENT') - - -file_path = os.path.join(os.getcwd(), 'advanced_tutorials', 'air_quality', 'target_cities.json') -with open(file_path) as json_file: - target_cities = json.load(json_file) - - -def get_batch_data_from_fs(td_version, date_threshold): - print(f"Retrieving the Batch data since {date_threshold}") - feature_view.init_batch_scoring(training_dataset_version=td_version) - - batch_data = feature_view.get_batch_data(start_time=date_threshold) - return batch_data - - -def parse_aq_data(last_dates_dict, today): - start_of_cell = time.time() - df_aq_raw = pd.DataFrame() - - print("Parsing started...") - # for continent in target_cities: - for city_name, coords in target_cities[continent].items(): - df_ = get_aqi_data_from_open_meteo(city_name=city_name, - coordinates=coords, - start_date=last_dates_dict[city_name], - end_date=str(today)) - df_aq_raw = pd.concat([df_aq_raw, df_]).reset_index(drop=True) - end_of_cell = time.time() - print("-" * 64) - print(f"Parsed new PM2.5 data for ALL locations up to {str(today)}.") - print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") - return df_aq_raw - - -def parse_weather(last_dates_dict, today): - df_weather_update = pd.DataFrame() - start_of_cell = time.time() - - print("Parsing started...") - # for continent in target_cities: - for city_name, coords in target_cities[continent].items(): - df_ = get_weather_data_from_open_meteo(city_name=city_name, - coordinates=coords, - start_date=last_dates_dict[city_name], - end_date=str(today), - forecast=True) - df_weather_update = pd.concat([df_weather_update, df_]).reset_index(drop=True) - - end_of_cell = time.time() - print(f"Parsed new weather data for ALL cities up to {str(today)}.") - print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") - return df_weather_update - - - -if __name__=="__main__": - project = hopsworks.login() - fs = project.get_feature_store() - print("โœ… Logged in successfully!") - - feature_view = fs.get_feature_view( - name='air_quality_fv', - version=1 - ) - - # I am going to load data for of last 60 days (for feature engineering) - today = datetime.date.today() - date_threshold = today - datetime.timedelta(days=60) - - print("Getting the batch data...") - batch_data = get_batch_data_from_fs(td_version=1, - date_threshold=date_threshold) - - print("Retreived batch data.") - - - last_dates_dict = batch_data[["date", "city_name"]].groupby("city_name").max() - last_dates_dict.date = last_dates_dict.date.astype(str) - # here is a dictionary with city names as keys and last updated date as values - last_dates_dict = last_dates_dict.to_dict()["date"] - - df_aq_raw = parse_aq_data(last_dates_dict, today) - - # we need the previous data to calculate aggregation functions - df_aq_update = pd.concat([ - batch_data[df_aq_raw.columns], - df_aq_raw - ]).reset_index(drop=True) - df_aq_update = df_aq_update.drop_duplicates(subset=['city_name', 'date']) - - print(df_aq_update.tail(7)) - - print('\n๐Ÿ›  Feature Engineering the PM2.5') - - ### - df_aq_update['date'] = pd.to_datetime(df_aq_update['date']) - df_aq_update = feature_engineer_aq(df_aq_update) - df_aq_update = df_aq_update.dropna() - - print(df_aq_update.groupby("city_name").max().tail(7)) - print("โœ… Success!") - ### - - print(3 * "-") - print('\n๐ŸŒค๐Ÿ“† Parsing Weather data') - - df_weather_update = parse_weather(last_dates_dict, today) - print(df_weather_update.groupby("city_name").max().tail(7)) - print("โœ… Successfully parsed!") - - df_aq_update.date = df_aq_update.date.astype(str) - df_weather_update.date = df_weather_update.date.astype(str) - - print("Connecting to feature groups...") - air_quality_fg = fs.get_or_create_feature_group( - name = 'air_quality', - version = 1 - ) - weather_fg = fs.get_or_create_feature_group( - name = 'weather', - version = 1 - ) - - df_aq_update.date = pd.to_datetime(df_aq_update.date) - df_weather_update.date = pd.to_datetime(df_weather_update.date) - - df_aq_update["unix_time"] = df_aq_update["date"].apply(convert_date_to_unix) - df_weather_update["unix_time"] = df_weather_update["date"].apply(convert_date_to_unix) - - df_aq_update.date = df_aq_update.date.astype(str) - df_weather_update.date = df_weather_update.date.astype(str) - - air_quality_fg.insert(df_aq_update) - print("Created job to insert parsed PM2.5 data into FS...") - print("Inserting into air_quality fg.") - - weather_fg.insert(df_weather_update) - print("Created job to insert parsed weather data into FS...") - print("Inserting into weather fg.") diff --git a/advanced_tutorials/air_quality/features/__init__.py b/advanced_tutorials/air_quality/features/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/advanced_tutorials/air_quality/features/air_quality.py b/advanced_tutorials/air_quality/features/air_quality.py index 40fc8d1e..4cd4fed0 100644 --- a/advanced_tutorials/air_quality/features/air_quality.py +++ b/advanced_tutorials/air_quality/features/air_quality.py @@ -18,7 +18,6 @@ def shift_pm_2_5(df: pd.DataFrame, days: int = 5) -> pd.DataFrame: """ for shift_value in range(1, days + 1): df[f'pm_2_5_previous_{shift_value}_day'] = df.groupby('city_name')['pm2_5'].shift(shift_value) - df = df.dropna() return df @@ -227,8 +226,9 @@ def feature_engineer_aq(df: pd.DataFrame) -> pd.DataFrame: for i in [7, 14, 28]: for func in [moving_std, exponential_moving_average, exponential_moving_std]: df_res = func(df_res, i) - - df_res = df_res.sort_values(by=["date", "pm2_5"]).dropna() + + + df_res = df_res.sort_values(by=["date", "pm2_5"]) df_res = df_res.reset_index(drop=True) df_res['year'] = year(df_res['date']) diff --git a/advanced_tutorials/air_quality/functions.py b/advanced_tutorials/air_quality/functions.py deleted file mode 100644 index e0d46205..00000000 --- a/advanced_tutorials/air_quality/functions.py +++ /dev/null @@ -1,392 +0,0 @@ -import os -import datetime -import time -import requests -import pandas as pd -import json - -from geopy.geocoders import Nominatim - - -def convert_date_to_unix(x): - """ - Convert datetime to unix time in milliseconds. - """ - dt_obj = datetime.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') - dt_obj = int(dt_obj.timestamp() * 1000) - return dt_obj - - -def get_city_coordinates(city_name: str): - """ - Takes city name and returns its latitude and longitude (rounded to 2 digits after dot). - """ - # Initialize Nominatim API (for getting lat and long of the city) - geolocator = Nominatim(user_agent="MyApp") - city = geolocator.geocode(city_name) - - latitude = round(city.latitude, 2) - longitude = round(city.longitude, 2) - - return latitude, longitude - - -##################################### EEA -def convert_to_daily(df, pollutant: str): - """ - Returns DataFrame where pollutant column is resampled to days and rounded. - """ - res_df = df.copy() - # convert dates in 'time' column - res_df["date"] = pd.to_datetime(res_df["date"]) - - # I want data daily, not hourly (mean per each day = 1 datarow per 1 day) - res_df = res_df.set_index('date') - res_df = res_df[pollutant].resample('1d').mean().reset_index() - res_df[pollutant] = res_df[pollutant].fillna(res_df[pollutant].median()) - res_df[pollutant] = res_df[pollutant].apply(lambda x: round(x, 0)) - - return res_df - - -def find_fullest_csv(csv_links: list, year: str): - candidates = [link for link in csv_links if str(year) in link] - biggest_df = pd.read_csv(candidates[0]) - for link in candidates[1:]: - _df = pd.read_csv(link) - if len(biggest_df) < len(_df): - biggest_df = _df - return biggest_df - - -def get_air_quality_from_eea( - city_name: str, - pollutant: str, - start_year: str, - end_year: str, - ): - """ - Takes city name, daterange and returns pandas DataFrame with daily air quality data. - It parses data by 1-year batches, so please specify years, not dates. (example: "2014", "2022"...) - - EEA means European Environmental Agency. So it has data for Europe Union countries ONLY. - """ - start_of_cell = time.time() - - params = { - 'CountryCode': '', - 'CityName': city_name, - 'Pollutant': pollutant.upper(), - 'Year_from': start_year, - 'Year_to': end_year, - 'Station': '', - 'Source': 'All', - 'Samplingpoint': '', - 'Output': 'TEXT', - 'UpdateDate': '', - 'TimeCoverage': 'Year' - } - - # observations endpoint - base_url = "https://fme.discomap.eea.europa.eu/fmedatastreaming/AirQualityDownload/AQData_Extract.fmw?" - try: - response = requests.get(base_url, params=params) - except ConnectionError: - response = requests.get(base_url, params=params) - - response.encoding = response.apparent_encoding - csv_links = response.text.split("\r\n") - - res_df = pd.DataFrame() - target_year = int(start_year) - - for year in range(int(start_year), int(end_year) + 1): - try: - # find the fullest, the biggest csv file with observations for this particular year - _df = find_fullest_csv(csv_links, year) - # append it to res_df - res_df = pd.concat([res_df, _df]) - except IndexError: - print(f"!! Missing data for {year} for {city} city.") - pass - - pollutant = pollutant.lower() - if pollutant == "pm2.5": - pollutant = "pm2_5" - - res_df = res_df.rename(columns={ - 'DatetimeBegin': 'date', - 'Concentration': pollutant - }) - - # cut timezones info - res_df['date'] = res_df['date'].apply(lambda x: x[:-6]) - # convert dates in 'time' column - res_df['date'] = pd.to_datetime(res_df['date']) - - res_df = convert_to_daily(res_df, pollutant) - - res_df['city_name'] = city_name - res_df = res_df[['city_name', 'date', pollutant.lower()]] - - end_of_cell = time.time() - - print(f"Processed {pollutant.upper()} for {city_name} since {start_year} till {end_year}.") - print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") - - return res_df - - - -##################################### USEPA -city_code_dict = {} -pollutant_dict = { - 'CO': '42101', - 'SO2': '42401', - 'NO2': '42602', - 'O3': '44201', - 'PM10': '81102', - 'PM2.5': '88101' -} - -def get_city_code(city_name: str): - "Encodes city name to be used later for data parsing using USEPA." - if city_code_dict: - city_full = [i for i in city_code_dict.keys() if city_name in i][0] - return city_code_dict[city_full] - else: - params = { - "email": "test@aqs.api", - "key": "test" - } - response = requests.get("https://aqs.epa.gov/data/api/list/cbsas?", params) - response_json = response.json() - data = response_json["Data"] - for item in data: - city_code_dict[item['value_represented']] = item['code'] - - return get_city_code(city_name) - - -def get_air_quality_from_usepa( - city_name: str, - pollutant: str, - start_date: str, - end_date: str - ): - """ - Takes city name, daterange and returns pandas DataFrame with daily air quality data. - - USEPA means United States Environmental Protection Agency. So it has data for US ONLY. - """ - start_of_cell = time.time() - res_df = pd.DataFrame() - - for start_date_, end_date_ in make_date_intervals(start_date, end_date): - params = { - "email": "test@aqs.api", - "key": "test", - "param": pollutant_dict[pollutant.upper().replace("_", ".")], # encoded pollutant - "bdate": start_date_, - "edate": end_date_, - "cbsa": get_city_code(city_name) # Core-based statistical area - } - - # observations endpoint - base_url = "https://aqs.epa.gov/data/api/dailyData/byCBSA?" - - response = requests.get(base_url, params=params) - response_json = response.json() - - df_ = pd.DataFrame(response_json["Data"]) - - pollutant = pollutant.lower() - if pollutant == "pm2.5": - pollutant = "pm2_5" - df_ = df_.rename(columns={ - 'date_local': 'date', - 'arithmetic_mean': pollutant - }) - - # convert dates in 'date' column - df_['date'] = pd.to_datetime(df_['date']) - df_['city_name'] = city_name - df_ = df_[['city_name', 'date', pollutant]] - res_df = pd.concat([res_df, df_]) - - # there are duplicated rows (several records for the same day and station). get rid of it. - res_df = res_df.groupby(['date', 'city_name'], as_index=False)[pollutant].mean() - res_df[pollutant] = round(res_df[pollutant], 1) - - end_of_cell = time.time() - print(f"Processed {pollutant.upper()} for {city_name} since {start_date} till {end_date}.") - print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") - - return res_df - - -def make_date_intervals(start_date, end_date): - start_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d') - end_dt = datetime.datetime.strptime(end_date, '%Y-%m-%d') - date_intervals = [] - for year in range(start_dt.year, end_dt.year + 1): - year_start = datetime.datetime(year, 1, 1) - year_end = datetime.datetime(year, 12, 31) - interval_start = max(start_dt, year_start) - interval_end = min(end_dt, year_end) - if interval_start < interval_end: - date_intervals.append((interval_start.strftime('%Y%m%d'), interval_end.strftime('%Y%m%d'))) - return date_intervals - -##################################### Weather Open Meteo -def get_weather_data_from_open_meteo( - city_name: str, - start_date: str, - end_date: str, - coordinates: list = None, - forecast: bool = False, - ): - """ - Takes [city name OR coordinates] and returns pandas DataFrame with weather data. - - Examples of arguments: - coordinates=(47.755, -122.2806), start_date="2023-01-01" - """ - start_of_cell = time.time() - - if coordinates: - latitude, longitude = coordinates - else: - latitude, longitude = get_city_coordinates(city_name=city_name) - - params = { - 'latitude': latitude, - 'longitude': longitude, - 'daily': ["temperature_2m_max", "temperature_2m_min", - "precipitation_sum", "rain_sum", "snowfall_sum", - "precipitation_hours", "windspeed_10m_max", - "windgusts_10m_max", "winddirection_10m_dominant"], - 'timezone': "Europe/London", - 'start_date': start_date, - 'end_date': end_date, - } - - if forecast: - # historical forecast endpoint - base_url = 'https://api.open-meteo.com/v1/forecast' - else: - # historical observations endpoint - base_url = 'https://archive-api.open-meteo.com/v1/archive' - - try: - response = requests.get(base_url, params=params) - time.sleep(2) - except ConnectionError: - response = requests.get(base_url, params=params) - - response_json = response.json() - - res_df = pd.DataFrame(response_json["daily"]) - res_df["city_name"] = city_name - - # rename columns - res_df = res_df.rename(columns={ - "time": "date", - "temperature_2m_max": "temperature_max", - "temperature_2m_min": "temperature_min", - "windspeed_10m_max": "wind_speed_max", - "winddirection_10m_dominant": "wind_direction_dominant", - "windgusts_10m_max": "wind_gusts_max" - }) - - # change columns order - res_df = res_df[ - ['city_name', 'date', 'temperature_max', 'temperature_min', - 'precipitation_sum', 'rain_sum', 'snowfall_sum', - 'precipitation_hours', 'wind_speed_max', - 'wind_gusts_max', 'wind_direction_dominant'] - ] - - # convert dates in 'date' column - res_df["date"] = pd.to_datetime(res_df["date"]) - end_of_cell = time.time() - print(f"Parsed weather for {city_name} since {start_date} till {end_date}.") - print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") - - return res_df - - -##################################### Air Quality data from Open Meteo -def get_aqi_data_from_open_meteo( - city_name: str, - start_date: str, - end_date: str, - coordinates: list = None, - pollutant: str = "pm2_5" - ): - """ - Takes [city name OR coordinates] and returns pandas DataFrame with AQI data. - - Examples of arguments: - ... - coordinates=(47.755, -122.2806), - start_date="2023-01-01", - pollutant="no2" - ... - """ - start_of_cell = time.time() - - if coordinates: - latitude, longitude = coordinates - else: - latitude, longitude = get_city_coordinates(city_name=city_name) - - pollutant = pollutant.lower() - if pollutant == "pm2.5": - pollutant = "pm2_5" - - # make it work with both "no2" and "nitrogen_dioxide" passed. - if pollutant == "no2": - pollutant = "nitrogen_dioxide" - - params = { - 'latitude': latitude, - 'longitude': longitude, - 'hourly': [pollutant], - 'start_date': start_date, - 'end_date': end_date, - 'timezone': "Europe/London" - } - - # base endpoint - base_url = "https://air-quality-api.open-meteo.com/v1/air-quality" - try: - response = requests.get(base_url, params=params) - except ConnectionError: - response = requests.get(base_url, params=params) - response_json = response.json() - res_df = pd.DataFrame(response_json["hourly"]) - - # convert dates - res_df["time"] = pd.to_datetime(res_df["time"]) - - # resample to days - res_df = res_df.groupby(res_df['time'].dt.date).mean(numeric_only=True).reset_index() - res_df[pollutant] = round(res_df[pollutant], 1) - - # rename columns - res_df = res_df.rename(columns={ - "time": "date" - }) - - res_df["city_name"] = city_name - - # change columns order - res_df = res_df[ - ['city_name', 'date', pollutant] - ] - end_of_cell = time.time() - print(f"Processed {pollutant.upper()} for {city_name} since {start_date} till {end_date}.") - print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") - - return res_df \ No newline at end of file diff --git a/advanced_tutorials/air_quality/functions/air_quality_data_retrieval.py b/advanced_tutorials/air_quality/functions/air_quality_data_retrieval.py new file mode 100644 index 00000000..8f9afd6c --- /dev/null +++ b/advanced_tutorials/air_quality/functions/air_quality_data_retrieval.py @@ -0,0 +1,164 @@ +import pandas as pd +from typing import Any, Dict, List +import datetime +import pandas as pd + + +def transform_data(data, encoder): + """ + Transform the input data by encoding the 'city_name' column and dropping unnecessary columns. + + Args: + - data (DataFrame): Input data to be transformed. + - encoder (LabelEncoder): Label encoder object to encode 'city_name'. + + Returns: + - data_transformed (DataFrame): Transformed data with 'city_name_encoded' and dropped columns. + """ + + # Create a copy of the input data to avoid modifying the original data + data_transformed = data.copy() + + # Transform the 'city_name' column in the batch data using the retrieved label encoder + data_transformed['city_name_encoded'] = encoder.transform(data_transformed['city_name']) + + # Drop unnecessary columns from the batch data + data_transformed = data_transformed.drop(columns=['unix_time', 'pm2_5', 'city_name', 'date']) + + return data_transformed + + +def get_data_for_date(date: str, city_name: str, feature_view, model, encoder) -> pd.DataFrame: + """ + Retrieve data for a specific date and city from a feature view. + + Args: + date (str): The date in the format "%Y-%m-%d". + city_name (str): The name of the city to retrieve data for. + feature_view: The feature view object. + model: The machine learning model used for prediction. + encoder (LabelEncoder): Label encoder object to encode 'city_name'. + + Returns: + pd.DataFrame: A DataFrame containing data for the specified date and city. + """ + # Convert date string to datetime object + date_datetime = datetime.datetime.strptime(date, "%Y-%m-%d").date() + + # Retrieve batch data for the specified date range + batch_data = feature_view.get_batch_data( + start_time=date_datetime, + end_time=date_datetime + datetime.timedelta(days=1), + ) + + # Filter batch data for the specified city + batch_data_filtered = batch_data[batch_data['city_name'] == city_name] + + return batch_data_filtered[['date', 'pm2_5']].sort_values('date').reset_index(drop=True) + + +def get_data_in_date_range(date_start: str, date_end: str, city_name: str, feature_view, model, encoder) -> pd.DataFrame: + """ + Retrieve data for a specific date range and city from a feature view. + + Args: + date_start (str): The start date in the format "%Y-%m-%d". + date_end (str): The end date in the format "%Y-%m-%d". + city_name (str): The name of the city to retrieve data for. + feature_view: The feature view object. + model: The machine learning model used for prediction. + encoder (LabelEncoder): Label encoder object to encode 'city_name'. + + Returns: + pd.DataFrame: A DataFrame containing data for the specified date range and city. + """ + # Convert date strings to datetime objects + date_start_dt = datetime.datetime.strptime(date_start, "%Y-%m-%d").date() + date_end_dt = datetime.datetime.strptime(date_end, "%Y-%m-%d").date() + + # Retrieve batch data for the specified date range + batch_data = feature_view.get_batch_data( + start_time=date_start_dt, + end_time=date_end_dt + datetime.timedelta(days=1), + ) + + # Filter batch data for the specified city + batch_data_filtered = batch_data[batch_data['city_name'] == city_name] + + return batch_data_filtered[['date', 'pm2_5']].sort_values('date').reset_index(drop=True) + + +def get_future_data(date: str, city_name: str, feature_view, model, encoder) -> pd.DataFrame: + """ + Predicts future PM2.5 data for a specified date and city using a given feature view and model. + + Args: + date (str): The target future date in the format 'YYYY-MM-DD'. + city_name (str): The name of the city for which the prediction is made. + feature_view: The feature view used to retrieve batch data. + model: The machine learning model used for prediction. + encoder (LabelEncoder): Label encoder object to encode 'city_name'. + + Returns: + pd.DataFrame: A DataFrame containing predicted PM2.5 values for each day starting from the target date. + + """ + # Get today's date + today = datetime.date.today() + + # Convert the target date string to a datetime object + date_in_future = datetime.datetime.strptime(date, "%Y-%m-%d").date() + + # Calculate the difference in days between today and the target date + difference_in_days = (date_in_future - today).days + + # Retrieve batch data for the specified date range + batch_data = feature_view.get_batch_data( + start_time=today, + end_time=today + datetime.timedelta(days=1), + ) + + # Filter batch data for the specified city + batch_data_filtered = batch_data[batch_data['city_name'] == city_name] + + # Transform batch data + batch_data_transformed = transform_data(batch_data_filtered, encoder) + + # Initialize a DataFrame to store predicted PM2.5 values + try: + pm2_5_value = batch_data_filtered['pm2_5'].values[0] + except (IndexError, TypeError): + # If accessing pm2_5 values fails, return a message indicating the feature pipeline needs updating + return "Data is not available. Ask user to run the feature pipeline to update data." + else: + # Initialize a DataFrame to store predicted PM2.5 values + predicted_pm2_5_df = pd.DataFrame({ + 'date': [today.strftime("%Y-%m-%d")], + 'pm2_5': pm2_5_value, + }) + + # Iterate through each day starting from tomorrow up to the target date + for day_number in range(1, difference_in_days + 1): + + # Calculate the date for the current future day + date_future_day = (today + datetime.timedelta(days=day_number)).strftime("%Y-%m-%d") + + # Predict PM2.5 for the current day + predicted_pm2_5 = model.predict(batch_data_transformed) + + # Update previous day PM2.5 values in the batch data for the next prediction + batch_data_transformed['pm_2_5_previous_7_day'] = batch_data_transformed['pm_2_5_previous_6_day'] + batch_data_transformed['pm_2_5_previous_6_day'] = batch_data_transformed['pm_2_5_previous_5_day'] + batch_data_transformed['pm_2_5_previous_5_day'] = batch_data_transformed['pm_2_5_previous_4_day'] + batch_data_transformed['pm_2_5_previous_4_day'] = batch_data_transformed['pm_2_5_previous_3_day'] + batch_data_transformed['pm_2_5_previous_3_day'] = batch_data_transformed['pm_2_5_previous_2_day'] + batch_data_transformed['pm_2_5_previous_2_day'] = batch_data_transformed['pm_2_5_previous_1_day'] + batch_data_transformed['pm_2_5_previous_1_day'] = predicted_pm2_5 + + # Append the predicted PM2.5 value for the current day to the DataFrame + predicted_pm2_5_df = predicted_pm2_5_df._append({ + 'date': date_future_day, + 'pm2_5': predicted_pm2_5[0], + }, ignore_index=True) + + return predicted_pm2_5_df diff --git a/advanced_tutorials/air_quality/functions/common_functions.py b/advanced_tutorials/air_quality/functions/common_functions.py new file mode 100644 index 00000000..98767a09 --- /dev/null +++ b/advanced_tutorials/air_quality/functions/common_functions.py @@ -0,0 +1,25 @@ +import datetime +from geopy.geocoders import Nominatim + + +def convert_date_to_unix(x): + """ + Convert datetime to unix time in milliseconds. + """ + dt_obj = datetime.datetime.strptime(str(x), '%Y-%m-%d %H:%M:%S') + dt_obj = int(dt_obj.timestamp() * 1000) + return dt_obj + + +def get_city_coordinates(city_name: str): + """ + Takes city name and returns its latitude and longitude (rounded to 2 digits after dot). + """ + # Initialize Nominatim API (for getting lat and long of the city) + geolocator = Nominatim(user_agent="MyApp") + city = geolocator.geocode(city_name) + + latitude = round(city.latitude, 2) + longitude = round(city.longitude, 2) + + return latitude, longitude \ No newline at end of file diff --git a/advanced_tutorials/air_quality/functions/context_engineering.py b/advanced_tutorials/air_quality/functions/context_engineering.py new file mode 100644 index 00000000..4b3bd4dc --- /dev/null +++ b/advanced_tutorials/air_quality/functions/context_engineering.py @@ -0,0 +1,191 @@ +import xml.etree.ElementTree as ET +import re +import inspect +from typing import get_type_hints +import json +import datetime +import torch +import sys +import pandas as pd +from functions.air_quality_data_retrieval import get_data_for_date, get_data_in_date_range, get_future_data +from typing import Any, Dict, List + + +def get_type_name(t: Any) -> str: + """Get the name of the type.""" + name = str(t) + if "list" in name or "dict" in name: + return name + else: + return t.__name__ + + +def serialize_function_to_json(func: Any) -> str: + """Serialize a function to JSON.""" + signature = inspect.signature(func) + type_hints = get_type_hints(func) + + function_info = { + "name": func.__name__, + "description": func.__doc__, + "parameters": { + "type": "object", + "properties": {} + }, + "returns": type_hints.get('return', 'void').__name__ + } + + for name, _ in signature.parameters.items(): + param_type = get_type_name(type_hints.get(name, type(None))) + function_info["parameters"]["properties"][name] = {"type": param_type} + + return json.dumps(function_info, indent=2) + + +def generate_hermes(prompt: str, model_llm, tokenizer) -> str: + """Retrieves a function name and extracts function parameters based on the user query.""" + fn = """{"name": "function_name", "arguments": {"arg_1": "value_1", "arg_2": value_2, ...}}""" + example = """{"name": "get_data_in_date_range", "arguments": {"date_start": "2024-01-10", "date_end": "2024-01-14", "city_name": "New York"}}""" + + prompt = f"""<|im_start|>system +You are a helpful assistant with access to the following functions: + +{serialize_function_to_json(get_data_for_date)} + +{serialize_function_to_json(get_data_in_date_range)} + +{serialize_function_to_json(get_future_data)} + +###INSTRUCTIONS: +- You need to choose one function to use and retrieve paramenters for this function from the user input. +- If the user query contains 'will', it is very likely that you will need to use the get_future_data function. +- Do not include feature_view, model and encoder parameters. +- Dates should be provided in the format YYYY-MM-DD. +- Generate an 'No Function needed' string if the user query does not require function calling. + +IMPORTANT: Today is {datetime.date.today().strftime("%A")}, {datetime.date.today()}. + +To use one of there functions respond STRICTLY with: + + {fn} + + +###EXAMPLES + +EXAMPLE 1: +- User: Hi! +- AI Assiatant: No Function needed. + +EXAMPLE 2: +- User: Is it good or bad? +- AI Assiatant: No Function needed. + +EXAMPLE 3: +- User: When and what was the minimum air quality from 2024-01-10 till 2024-01-14 in New York? +- AI Assistant: + + {example} + + +<|im_end|> +<|im_start|>user +{prompt}<|im_end|> +<|im_start|>assistant""" + + tokens = tokenizer(prompt, return_tensors="pt").to(model_llm.device) + input_size = tokens.input_ids.numel() + with torch.inference_mode(): + generated_tokens = model_llm.generate( + **tokens, + use_cache=True, + do_sample=True, + temperature=0.2, + top_p=1.0, + top_k=0, + max_new_tokens=512, + eos_token_id=tokenizer.eos_token_id, + pad_token_id=tokenizer.eos_token_id, + ) + + return tokenizer.decode( + generated_tokens.squeeze()[input_size:], + skip_special_tokens=True, + ) + + +def extract_function_calls(completion: str) -> List[Dict[str, Any]]: + """Extract function calls from completion.""" + completion = completion.strip() + pattern = r"((.*?))" + match = re.search(pattern, completion, re.DOTALL) + if not match: + return None + + multiplefn = match.group(1) + root = ET.fromstring(multiplefn) + functions = root.findall("functioncall") + + return [json.loads(fn.text) for fn in functions] + + +def invoke_function(function, feature_view, model, encoder) -> pd.DataFrame: + """Invoke a function with given arguments.""" + # Extract function name and arguments from input_data + function_name = function['name'] + arguments = function['arguments'] + + # Using Python's getattr function to dynamically call the function by its name and passing the arguments + function_output = getattr(sys.modules[__name__], function_name)( + **arguments, + feature_view=feature_view, + model=model, + encoder=encoder, + ) + + if type(function_output) == str: + return function_output + + # Round the 'pm2_5' value to 2 decimal places + function_output['pm2_5'] = function_output['pm2_5'].apply(round, ndigits=2) + return function_output + + +def get_context_data(user_query: str, feature_view, model_llm, tokenizer, model_air_quality, encoder) -> str: + """ + Retrieve context data based on user query. + + Args: + user_query (str): The user query. + feature_view: Feature View for data retrieval. + model_llm: The language model. + tokenizer: The tokenizer. + model_air_quality: The air quality model. + encoder: The encoder. + + Returns: + str: The context data. + """ + # Generate a response using LLM + completion = generate_hermes( + user_query, + model_llm, + tokenizer, + ) + + # Extract function calls from the completion + functions = extract_function_calls(completion) + + # If function calls were found + if functions: + # Invoke the function with provided arguments + data = invoke_function(functions[0], feature_view, model_air_quality, encoder) + # Return formatted data as string + if isinstance(data, pd.DataFrame): + return f'Air Quality Measurements for {functions[0]["arguments"]["city_name"]}:\n' + '\n'.join( + [f'Date: {row["date"]}; Air Quality: {row["pm2_5"]}' for _, row in data.iterrows()] + ) + # Return message if data is not updated + return data + + # If no function calls were found, return an empty string + return '' diff --git a/advanced_tutorials/air_quality/functions/llm_chain.py b/advanced_tutorials/air_quality/functions/llm_chain.py new file mode 100644 index 00000000..6d0833ac --- /dev/null +++ b/advanced_tutorials/air_quality/functions/llm_chain.py @@ -0,0 +1,202 @@ +import transformers +from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig +from langchain.llms import HuggingFacePipeline +from langchain.prompts import PromptTemplate +from langchain.chains.llm import LLMChain +from langchain.memory import ConversationBufferWindowMemory +import torch +import datetime +from typing import Any, Dict, Union +from functions.context_engineering import get_context_data + + +def load_model(model_id: str = "teknium/OpenHermes-2.5-Mistral-7B") -> tuple: + """ + Load the LLM and its corresponding tokenizer. + + Args: + model_id (str, optional): Identifier for the pre-trained model. Defaults to "teknium/OpenHermes-2.5-Mistral-7B". + + Returns: + tuple: A tuple containing the loaded model and tokenizer. + """ + + # Load the tokenizer for Mistral-7B-Instruct model + tokenizer = AutoTokenizer.from_pretrained( + model_id, + ) + + # Set the pad token to the unknown token to handle padding + tokenizer.pad_token = tokenizer.unk_token + + # Set the padding side to "right" to prevent warnings during tokenization + tokenizer.padding_side = "right" + + # BitsAndBytesConfig int-4 config + bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + ) + + # Load the Mistral-7B-Instruct model with quantization configuration + model_llm = AutoModelForCausalLM.from_pretrained( + model_id, + device_map="auto", + quantization_config=bnb_config, + ) + + # Configure the pad token ID in the model to match the tokenizer's pad token ID + model_llm.config.pad_token_id = tokenizer.pad_token_id + + return model_llm, tokenizer + + +def get_prompt_template(): + """ + Retrieve a template for generating prompts in a conversational AI system. + + Returns: + str: A string representing the template for generating prompts. + This template includes placeholders for system information, + instructions, previous conversation, context, date and user query. + """ + prompt_template = """<|im_start|>system +You are a helpful Air Quality assistant. +Provide your answers based on the provided context table which consists of the dates and air quality indicators for the city provided by user. + +INSTRUCTIONS: +- If you don't know the answer, you will respond politely that you cannot help. +- Use the provided table with air quality indicators for city provided by user to generate your answer. +- You answer should be at least one sentence. +- Do not show any calculations to the user. +- If the user asks for the air quality level in specific range, you can calculate an average air quality level. +- Make sure that you use correct air quality indicators for the required date. +- Add a description of the air quality level, such as whether it is safe, whether to go for a walk, etc. +- If user asks more general question, use your last responses in the chat history as a context. +<|im_end|> + +Previous conversation: +{chat_history} + +### CONTEXT: +{context} + +IMPORTANT: Today is {date_today}. + +<|im_start|>user +{question}<|im_end|> +<|im_start|>assistant""" + return prompt_template + + +def get_llm_chain(model_llm, tokenizer): + """ + Create and configure a language model chain. + + Args: + model_llm: The pre-trained language model for text generation. + tokenizer: The tokenizer corresponding to the language model. + + Returns: + LLMChain: The configured language model chain. + """ + # Create a text generation pipeline using the loaded model and tokenizer + text_generation_pipeline = transformers.pipeline( + model=model_llm, # The pre-trained language model for text generation + tokenizer=tokenizer, # The tokenizer corresponding to the language model + task="text-generation", # Specify the task as text generation + use_cache=True, + do_sample=True, + temperature=0.4, + top_p=1.0, + top_k=0, + max_new_tokens=512, + eos_token_id=tokenizer.eos_token_id, + pad_token_id=tokenizer.eos_token_id, + ) + + # Create a Hugging Face pipeline for Mistral LLM using the text generation pipeline + mistral_llm = HuggingFacePipeline( + pipeline=text_generation_pipeline, + ) + + # Create prompt from prompt template + prompt = PromptTemplate( + input_variables=["context", "question", "date_today", "chat_history"], + template=get_prompt_template(), + ) + + # Create a ConversationBufferWindowMemory with specified configuration + memory = ConversationBufferWindowMemory( + k=3, # Number of turns to remember in the conversation buffer + memory_key="chat_history", # Key to store the conversation history in memory + input_key="question", # Key to access the input question in the conversation + ) + + # Create LLM chain + llm_chain = LLMChain( + llm=mistral_llm, + prompt=prompt, + verbose=False, + memory=memory, + ) + + return llm_chain + + +def generate_response( + user_query: str, + feature_view, + model_llm, + tokenizer, + model_air_quality, + encoder, + llm_chain, + verbose: bool = False, +) -> str: + """ + Generate response to user query using LLM chain and context data. + + Args: + user_query (str): The user's query. + feature_view: Feature view for data retrieval. + model_llm: Language model for text generation. + tokenizer: Tokenizer for processing text. + model_air_quality: Model for predicting air quality. + encoder: Label Encoder for the city_name column. + llm_chain: LLM Chain. + verbose (bool): Whether to print verbose information. Defaults to False. + + Returns: + str: Generated response to the user query. + """ + + # Get context data based on user query + context = get_context_data( + user_query, + feature_view, + model_llm, + tokenizer, + model_air_quality, + encoder, + ) + + # Get today's date in a readable format + date_today = f'{datetime.date.today().strftime("%A")}, {datetime.date.today()}' + + # Print today's date and context information if verbose mode is enabled + if verbose: + print(f"๐Ÿ—“๏ธ Today's date: {date_today}") + print(f'๐Ÿ“– {context}') + + # Invoke the language model chain with relevant context + model_output = llm_chain.invoke({ + "context": context, + "date_today": date_today, + "question": user_query, + }) + + # Return the generated text from the model output + return model_output['text'] diff --git a/advanced_tutorials/air_quality/functions/parse_air_quality.py b/advanced_tutorials/air_quality/functions/parse_air_quality.py new file mode 100644 index 00000000..06dd41fe --- /dev/null +++ b/advanced_tutorials/air_quality/functions/parse_air_quality.py @@ -0,0 +1,79 @@ +import time +from functions.common_functions import * +import requests +import pandas as pd + + +def get_aqi_data_from_open_meteo( + city_name: str, + start_date: str, + end_date: str, + coordinates: list = None, + pollutant: str = "pm2_5" + ): + """ + Takes [city name OR coordinates] and returns pandas DataFrame with AQI data. + + Examples of arguments: + ... + coordinates=(47.755, -122.2806), + start_date="2023-01-01", + pollutant="no2" + ... + """ + start_of_cell = time.time() + + if coordinates: + latitude, longitude = coordinates + else: + latitude, longitude = get_city_coordinates(city_name=city_name) + + pollutant = pollutant.lower() + if pollutant == "pm2.5": + pollutant = "pm2_5" + + # make it work with both "no2" and "nitrogen_dioxide" passed. + if pollutant == "no2": + pollutant = "nitrogen_dioxide" + + params = { + 'latitude': latitude, + 'longitude': longitude, + 'hourly': [pollutant], + 'start_date': start_date, + 'end_date': end_date, + 'timezone': "Europe/London" + } + + # base endpoint + base_url = "https://air-quality-api.open-meteo.com/v1/air-quality" + try: + response = requests.get(base_url, params=params) + except ConnectionError: + response = requests.get(base_url, params=params) + response_json = response.json() + res_df = pd.DataFrame(response_json["hourly"]) + + # convert dates + res_df["time"] = pd.to_datetime(res_df["time"]) + + # resample to days + res_df = res_df.groupby(res_df['time'].dt.date).mean(numeric_only=True).reset_index() + res_df[pollutant] = round(res_df[pollutant], 1) + + # rename columns + res_df = res_df.rename(columns={ + "time": "date" + }) + + res_df["city_name"] = city_name + + # change columns order + res_df = res_df[ + ['city_name', 'date', pollutant] + ] + end_of_cell = time.time() + print(f"Processed {pollutant.upper()} for {city_name} since {start_date} till {end_date}.") + print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") + + return res_df \ No newline at end of file diff --git a/advanced_tutorials/air_quality/functions/parse_weather.py b/advanced_tutorials/air_quality/functions/parse_weather.py new file mode 100644 index 00000000..bbebc34c --- /dev/null +++ b/advanced_tutorials/air_quality/functions/parse_weather.py @@ -0,0 +1,81 @@ +import time +from functions.common_functions import * +import requests +import pandas as pd + + +def get_weather_data_from_open_meteo( + city_name: str, + start_date: str, + end_date: str, + coordinates: list = None, + forecast: bool = False, + ): + """ + Takes [city name OR coordinates] and returns pandas DataFrame with weather data. + + Examples of arguments: + coordinates=(47.755, -122.2806), start_date="2023-01-01" + """ + start_of_cell = time.time() + + if coordinates: + latitude, longitude = coordinates + else: + latitude, longitude = get_city_coordinates(city_name=city_name) + + params = { + 'latitude': latitude, + 'longitude': longitude, + 'daily': ["temperature_2m_max", "temperature_2m_min", + "precipitation_sum", "rain_sum", "snowfall_sum", + "precipitation_hours", "windspeed_10m_max", + "windgusts_10m_max", "winddirection_10m_dominant"], + 'timezone': "Europe/London", + 'start_date': start_date, + 'end_date': end_date, + } + + if forecast: + # historical forecast endpoint + base_url = 'https://api.open-meteo.com/v1/forecast' + else: + # historical observations endpoint + base_url = 'https://archive-api.open-meteo.com/v1/archive' + + try: + response = requests.get(base_url, params=params) + time.sleep(2) + except ConnectionError: + response = requests.get(base_url, params=params) + + response_json = response.json() + + res_df = pd.DataFrame(response_json["daily"]) + res_df["city_name"] = city_name + + # rename columns + res_df = res_df.rename(columns={ + "time": "date", + "temperature_2m_max": "temperature_max", + "temperature_2m_min": "temperature_min", + "windspeed_10m_max": "wind_speed_max", + "winddirection_10m_dominant": "wind_direction_dominant", + "windgusts_10m_max": "wind_gusts_max" + }) + + # change columns order + res_df = res_df[ + ['city_name', 'date', 'temperature_max', 'temperature_min', + 'precipitation_sum', 'rain_sum', 'snowfall_sum', + 'precipitation_hours', 'wind_speed_max', + 'wind_gusts_max', 'wind_direction_dominant'] + ] + + # convert dates in 'date' column + res_df["date"] = pd.to_datetime(res_df["date"]) + end_of_cell = time.time() + print(f"Parsed weather for {city_name} since {start_date} till {end_date}.") + print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n") + + return res_df \ No newline at end of file diff --git a/advanced_tutorials/air_quality/requirements.txt b/advanced_tutorials/air_quality/requirements.txt index 9d933db1..d85ffe99 100644 --- a/advanced_tutorials/air_quality/requirements.txt +++ b/advanced_tutorials/air_quality/requirements.txt @@ -1,7 +1,8 @@ -hopsworks geopy -pandas -numpy -streamlit -streamlit-folium -joblib \ No newline at end of file +transformers +sentencepiece +protobuf==3.20.0 +langchain +flask-sqlalchemy==3.1.1 +bitsandbytes==0.42.0 +accelerate==0.27.2 From ecfc5bd3f89c608ec05523e6ed549fe49b492119 Mon Sep 17 00:00:00 2001 From: Maksym Zhytnikov <63515947+Maxxx-zh@users.noreply.github.com> Date: Sun, 17 Mar 2024 16:08:29 +0200 Subject: [PATCH 3/3] AirQuality Apps --- .../1_air_quality_feature_backfill.ipynb | 6 +- .../2_air_quality_feature_pipeline.ipynb | 594 +++++++++--------- .../3_air_quality_training_pipeline.ipynb | 345 +++++++--- .../4_air_quality_batch_inference.ipynb | 348 +++------- .../air_quality/5_function_calling.ipynb | 239 +++---- advanced_tutorials/air_quality/app_gradio.py | 104 +++ .../air_quality/{app.py => app_streamlit.py} | 1 - advanced_tutorials/air_quality/app_voice.py | 39 -- .../air_quality/requirements.txt | 12 +- 9 files changed, 845 insertions(+), 843 deletions(-) create mode 100644 advanced_tutorials/air_quality/app_gradio.py rename advanced_tutorials/air_quality/{app.py => app_streamlit.py} (99%) delete mode 100644 advanced_tutorials/air_quality/app_voice.py diff --git a/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb b/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb index 75f1b875..1ce0301e 100644 --- a/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb +++ b/advanced_tutorials/air_quality/1_air_quality_feature_backfill.ipynb @@ -49,8 +49,8 @@ } ], "source": [ - "!pip install -U hopsworks --quiet\n", - "!pip install geopy folium streamlit-folium --q" + "!pip install -r requirements.txt --quiet\n", + "!pip install -U hopsworks --quiet" ] }, { @@ -1145,7 +1145,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb index 472718b5..8f159c75 100644 --- a/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb +++ b/advanced_tutorials/air_quality/2_air_quality_feature_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "fca940b7", + "id": "f16a717d", "metadata": {}, "source": [ "# **Hopsworks Feature Store** - Part 02: Feature Pipeline\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "092aa908", + "id": "37facd6e", "metadata": {}, "source": [ "### ๐Ÿ“ Imports" @@ -25,7 +25,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "1856e6c3", + "id": "77d2fbe5", "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "7f50400a", + "id": "c14c97e6", "metadata": { "tags": [] }, @@ -62,7 +62,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "32ca6a86", + "id": "5b67e039", "metadata": { "tags": [] }, @@ -70,7 +70,7 @@ { "data": { "text/plain": [ - "(datetime.date(2024, 3, 8), '2024-03-08')" + "(datetime.date(2024, 3, 17), '2024-03-17')" ] }, "execution_count": 3, @@ -88,7 +88,7 @@ }, { "cell_type": "markdown", - "id": "dcfc09fc", + "id": "0c5ebe2a", "metadata": {}, "source": [ "### ๐Ÿ”ฎ Connecting to Hopsworks Feature Store " @@ -97,7 +97,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "541fe4e1", + "id": "730eb857", "metadata": {}, "outputs": [ { @@ -121,7 +121,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "53669d5f", + "id": "ddd400ad", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ }, { "cell_type": "markdown", - "id": "c060b74c", + "id": "f992009d", "metadata": {}, "source": [ "## ๐ŸŒซ Filling gaps in Air Quality data (PM2.5)" @@ -147,7 +147,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "4166b886", + "id": "37058d7f", "metadata": { "tags": [] }, @@ -156,8 +156,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (2.89s) \n", - "Finished: Reading data from Hopsworks, using ArrowFlight (2.31s) \n" + "Finished: Reading data from Hopsworks, using ArrowFlight (2.30s) \n", + "Finished: Reading data from Hopsworks, using ArrowFlight (1.48s) \n" ] } ], @@ -170,7 +170,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "4097adfc", + "id": "cee48adb", "metadata": { "tags": [] }, @@ -190,7 +190,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "22967ce2", + "id": "49b9e259", "metadata": { "tags": [] }, @@ -199,8 +199,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "โ›ณ๏ธ Last update for Paris: 2024-03-07\n", - "โ›ณ๏ธ Last update for Columbus: 2024-03-07\n" + "โ›ณ๏ธ Last update for Paris: 2024-03-15\n", + "โ›ณ๏ธ Last update for Columbus: 2024-03-15\n" ] } ], @@ -219,7 +219,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "b4829636", + "id": "f658d581", "metadata": {}, "outputs": [], "source": [ @@ -231,7 +231,7 @@ }, { "cell_type": "markdown", - "id": "301fa83a", + "id": "f6df0f7f", "metadata": {}, "source": [ "### ๐Ÿง™๐Ÿผโ€โ™‚๏ธ Parsing PM2.5 data" @@ -240,7 +240,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "ed244952", + "id": "cc68ab56", "metadata": { "scrolled": true, "tags": [] @@ -250,144 +250,144 @@ "name": "stdout", "output_type": "stream", "text": [ - "Processed PM2_5 for Amsterdam since 2024-02-08 till 2024-03-08.\n", - "Took 0.12 sec.\n", - "\n", - "Processed PM2_5 for Athina since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Amsterdam since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Berlin since 2024-02-08 till 2024-03-08.\n", - "Took 0.1 sec.\n", - "\n", - "Processed PM2_5 for Gdansk since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Athina since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Krakรณw since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Berlin since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for London since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Gdansk since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Madrid since 2024-02-08 till 2024-03-08.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for Krakรณw since 2024-02-16 till 2024-03-17.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Marseille since 2024-02-08 till 2024-03-08.\n", - "Took 0.1 sec.\n", + "Processed PM2_5 for London since 2024-02-16 till 2024-03-17.\n", + "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Milano since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Madrid since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Mรผnchen since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Marseille since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Napoli since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Milano since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Paris since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Mรผnchen since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Sevilla since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Napoli since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Stockholm since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Paris since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tallinn since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Sevilla since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Varna since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Stockholm since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Wien since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Tallinn since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Albuquerque since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Varna since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Wien since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Albuquerque since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Atlanta since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Atlanta since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", + "\n", + "Processed PM2_5 for Chicago since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Chicago since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Columbus since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Columbus since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Dallas since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Dallas since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Denver since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Denver since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Houston since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Houston since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Los Angeles since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Los Angeles since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for New York since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for New York since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for Phoenix-Mesa since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Phoenix-Mesa since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Salt Lake City since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Salt Lake City since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for San Francisco since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for San Francisco since 2024-02-08 till 2024-03-08.\n", - "Took 0.26 sec.\n", + "Processed PM2_5 for Tampa since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Tampa since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Bellevue-SE 12th St since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Bellevue-SE 12th St since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for DARRINGTON - FIR ST (Darrington High School) since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for KENT - JAMES & CENTRAL since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for LAKE FOREST PARK TOWNE CENTER since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-02-16 till 2024-03-17.\n", + "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for NORTH BEND - NORTH BEND WAY since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - BEACON HILL since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - DUWAMISH since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for SEATTLE - SOUTH PARK #2 since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Seattle-10th & Weller since 2024-02-16 till 2024-03-17.\n", + "Took 0.13 sec.\n", + "\n", + "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-02-16 till 2024-03-17.\n", "Took 0.1 sec.\n", "\n", - "Processed PM2_5 for Seattle-10th & Weller since 2024-02-08 till 2024-03-08.\n", - "Took 0.11 sec.\n", + "Processed PM2_5 for TACOMA - L STREET since 2024-02-16 till 2024-03-17.\n", + "Took 0.66 sec.\n", "\n", - "Processed PM2_5 for TACOMA - ALEXANDER AVE since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Tacoma-S 36th St since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for TACOMA - L STREET since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Tukwila Allentown since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tacoma-S 36th St since 2024-02-08 till 2024-03-08.\n", + "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-02-16 till 2024-03-17.\n", "Took 0.11 sec.\n", "\n", - "Processed PM2_5 for Tukwila Allentown since 2024-02-08 till 2024-03-08.\n", - "Took 0.1 sec.\n", - "\n", - "Processed PM2_5 for Tulalip-Totem Beach Rd since 2024-02-08 till 2024-03-08.\n", - "Took 0.1 sec.\n", - "\n", "----------------------------------------------------------------\n", - "Parsed new PM2.5 data for ALL locations up to 2024-03-08.\n", - "Took 5.05 sec.\n", + "Parsed new PM2.5 data for ALL locations up to 2024-03-17.\n", + "Took 5.44 sec.\n", "\n" ] } @@ -427,7 +427,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "93a60ead", + "id": "09db1460", "metadata": {}, "outputs": [ { @@ -458,22 +458,22 @@ " \n", " \n", " \n", - " 1347\n", + " 1392\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-06\n", - " 6.3\n", + " 2024-03-15\n", + " 14.5\n", " \n", " \n", - " 1348\n", + " 1393\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-07\n", - " 8.8\n", + " 2024-03-16\n", + " 13.7\n", " \n", " \n", - " 1349\n", + " 1394\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-08\n", - " 11.0\n", + " 2024-03-17\n", + " 15.1\n", " \n", " \n", "\n", @@ -481,9 +481,9 @@ ], "text/plain": [ " city_name date pm2_5\n", - "1347 Tulalip-Totem Beach Rd 2024-03-06 6.3\n", - "1348 Tulalip-Totem Beach Rd 2024-03-07 8.8\n", - "1349 Tulalip-Totem Beach Rd 2024-03-08 11.0" + "1392 Tulalip-Totem Beach Rd 2024-03-15 14.5\n", + "1393 Tulalip-Totem Beach Rd 2024-03-16 13.7\n", + "1394 Tulalip-Totem Beach Rd 2024-03-17 15.1" ] }, "execution_count": 11, @@ -497,7 +497,7 @@ }, { "cell_type": "markdown", - "id": "77529eea", + "id": "7cbabf34", "metadata": { "tags": [] }, @@ -508,7 +508,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "528c475f", + "id": "100f0f2d", "metadata": { "tags": [] }, @@ -521,7 +521,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "da05c767", + "id": "91ded83e", "metadata": { "tags": [] }, @@ -572,76 +572,76 @@ " \n", " \n", " \n", - " 1347\n", - " London\n", - " 2024-03-08\n", - " 24.3\n", - " 25.7\n", - " 20.6\n", - " 12.3\n", - " 12.2\n", - " 19.4\n", - " 7.0\n", - " 6.5\n", + " 1392\n", + " Athina\n", + " 2024-03-17\n", + " 25.6\n", + " 25.8\n", + " 20.2\n", + " 14.8\n", + " 8.4\n", + " 10.3\n", + " 9.6\n", + " 17.2\n", " ...\n", - " 6.179256\n", + " 5.376253\n", " 2024\n", - " 8\n", + " 17\n", " 3\n", - " 4\n", - " 0\n", - " 0.920971\n", - " 0.38963\n", - " -0.433884\n", - " -0.900969\n", + " 6\n", + " 1\n", + " 0.970064\n", + " 0.24285\n", + " -0.781831\n", + " 0.62349\n", " \n", " \n", - " 1348\n", - " Milano\n", - " 2024-03-08\n", - " 27.9\n", - " 39.8\n", - " 15.0\n", - " 25.2\n", - " 13.1\n", - " 11.2\n", - " 18.4\n", - " 27.3\n", + " 1393\n", + " Los Angeles\n", + " 2024-03-17\n", + " 28.1\n", + " 21.5\n", + " 15.1\n", + " 12.7\n", + " 16.4\n", + " 11.0\n", + " 27.0\n", + " 36.3\n", " ...\n", - " 29.069146\n", + " 7.696575\n", " 2024\n", - " 8\n", + " 17\n", " 3\n", - " 4\n", - " 0\n", - " 0.920971\n", - " 0.38963\n", - " -0.433884\n", - " -0.900969\n", + " 6\n", + " 1\n", + " 0.970064\n", + " 0.24285\n", + " -0.781831\n", + " 0.62349\n", " \n", " \n", - " 1349\n", - " Krakรณw\n", - " 2024-03-08\n", - " 35.4\n", - " 21.7\n", - " 27.2\n", - " 38.0\n", - " 48.3\n", - " 55.4\n", - " 54.4\n", - " 41.3\n", + " 1394\n", + " Milano\n", + " 2024-03-17\n", + " 43.4\n", + " 23.6\n", + " 16.8\n", + " 46.6\n", + " 32.4\n", + " 17.0\n", + " 21.2\n", + " 10.4\n", " ...\n", - " 15.398092\n", + " 22.044394\n", " 2024\n", - " 8\n", + " 17\n", " 3\n", - " 4\n", - " 0\n", - " 0.920971\n", - " 0.38963\n", - " -0.433884\n", - " -0.900969\n", + " 6\n", + " 1\n", + " 0.970064\n", + " 0.24285\n", + " -0.781831\n", + " 0.62349\n", " \n", " \n", "\n", @@ -649,35 +649,35 @@ "" ], "text/plain": [ - " city_name date pm2_5 pm_2_5_previous_1_day \\\n", - "1347 London 2024-03-08 24.3 25.7 \n", - "1348 Milano 2024-03-08 27.9 39.8 \n", - "1349 Krakรณw 2024-03-08 35.4 21.7 \n", + " city_name date pm2_5 pm_2_5_previous_1_day \\\n", + "1392 Athina 2024-03-17 25.6 25.8 \n", + "1393 Los Angeles 2024-03-17 28.1 21.5 \n", + "1394 Milano 2024-03-17 43.4 23.6 \n", "\n", " pm_2_5_previous_2_day pm_2_5_previous_3_day pm_2_5_previous_4_day \\\n", - "1347 20.6 12.3 12.2 \n", - "1348 15.0 25.2 13.1 \n", - "1349 27.2 38.0 48.3 \n", + "1392 20.2 14.8 8.4 \n", + "1393 15.1 12.7 16.4 \n", + "1394 16.8 46.6 32.4 \n", "\n", " pm_2_5_previous_5_day pm_2_5_previous_6_day pm_2_5_previous_7_day \\\n", - "1347 19.4 7.0 6.5 \n", - "1348 11.2 18.4 27.3 \n", - "1349 55.4 54.4 41.3 \n", + "1392 10.3 9.6 17.2 \n", + "1393 11.0 27.0 36.3 \n", + "1394 17.0 21.2 10.4 \n", "\n", " ... exp_std_28_days year day_of_month month day_of_week \\\n", - "1347 ... 6.179256 2024 8 3 4 \n", - "1348 ... 29.069146 2024 8 3 4 \n", - "1349 ... 15.398092 2024 8 3 4 \n", + "1392 ... 5.376253 2024 17 3 6 \n", + "1393 ... 7.696575 2024 17 3 6 \n", + "1394 ... 22.044394 2024 17 3 6 \n", "\n", " is_weekend sin_day_of_year cos_day_of_year sin_day_of_week \\\n", - "1347 0 0.920971 0.38963 -0.433884 \n", - "1348 0 0.920971 0.38963 -0.433884 \n", - "1349 0 0.920971 0.38963 -0.433884 \n", + "1392 1 0.970064 0.24285 -0.781831 \n", + "1393 1 0.970064 0.24285 -0.781831 \n", + "1394 1 0.970064 0.24285 -0.781831 \n", "\n", " cos_day_of_week \n", - "1347 -0.900969 \n", - "1348 -0.900969 \n", - "1349 -0.900969 \n", + "1392 0.62349 \n", + "1393 0.62349 \n", + "1394 0.62349 \n", "\n", "[3 rows x 31 columns]" ] @@ -700,7 +700,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "1b5a24e7", + "id": "a387ac8a", "metadata": {}, "outputs": [ { @@ -722,7 +722,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "5c34d3e1", + "id": "bb4b8914", "metadata": { "tags": [] }, @@ -730,7 +730,7 @@ { "data": { "text/plain": [ - "(90, 31)" + "(135, 31)" ] }, "execution_count": 15, @@ -745,7 +745,7 @@ }, { "cell_type": "markdown", - "id": "834bef82", + "id": "fe63cf55", "metadata": {}, "source": [ "## ๐ŸŒฆ Filling gaps in Weather data" @@ -754,7 +754,7 @@ { "cell_type": "code", "execution_count": 16, - "id": "bf43c662", + "id": "36a7388e", "metadata": { "tags": [] }, @@ -773,7 +773,7 @@ }, { "cell_type": "markdown", - "id": "00ec9b25", + "id": "6c868dae", "metadata": { "tags": [] }, @@ -784,7 +784,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "6835d984", + "id": "0ad03c2d", "metadata": { "scrolled": true, "tags": [] @@ -794,144 +794,144 @@ "name": "stdout", "output_type": "stream", "text": [ - "Parsed weather for Amsterdam since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Amsterdam since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Athina since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Athina since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Berlin since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Berlin since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Gdansk since 2024-03-07 till 2024-03-08.\n", - "Took 2.1 sec.\n", - "\n", - "Parsed weather for Krakรณw since 2024-03-07 till 2024-03-08.\n", - "Took 2.11 sec.\n", - "\n", - "Parsed weather for London since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Gdansk since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Madrid since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Krakรณw since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Marseille since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for London since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Milano since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Madrid since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Mรผnchen since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Marseille since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Napoli since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Milano since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Paris since 2024-03-07 till 2024-03-08.\n", - "Took 2.11 sec.\n", + "Parsed weather for Mรผnchen since 2024-03-15 till 2024-03-17.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Sevilla since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Napoli since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Stockholm since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Paris since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Tallinn since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Sevilla since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Varna since 2024-03-07 till 2024-03-08.\n", - "Took 2.12 sec.\n", - "\n", - "Parsed weather for Wien since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Stockholm since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Albuquerque since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Tallinn since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Atlanta since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Varna since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for Chicago since 2024-03-07 till 2024-03-08.\n", - "Took 2.11 sec.\n", - "\n", - "Parsed weather for Columbus since 2024-03-07 till 2024-03-08.\n", - "Took 2.11 sec.\n", + "Parsed weather for Wien since 2024-03-15 till 2024-03-17.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for Dallas since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Albuquerque since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Denver since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Atlanta since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Houston since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Chicago since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Los Angeles since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Columbus since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for New York since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Dallas since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Phoenix-Mesa since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Denver since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Salt Lake City since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Houston since 2024-03-15 till 2024-03-17.\n", + "Took 2.11 sec.\n", + "\n", + "Parsed weather for Los Angeles since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for San Francisco since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for New York since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tampa since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Phoenix-Mesa since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Bellevue-SE 12th St since 2024-03-07 till 2024-03-08.\n", - "Took 2.12 sec.\n", + "Parsed weather for Salt Lake City since 2024-03-15 till 2024-03-17.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for San Francisco since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for KENT - JAMES & CENTRAL since 2024-03-07 till 2024-03-08.\n", - "Took 2.11 sec.\n", + "Parsed weather for Tampa since 2024-03-15 till 2024-03-17.\n", + "Took 2.1 sec.\n", "\n", - "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Bellevue-SE 12th St since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for DARRINGTON - FIR ST (Darrington High School) since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for KENT - JAMES & CENTRAL since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - BEACON HILL since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for LAKE FOREST PARK TOWNE CENTER since 2024-03-15 till 2024-03-17.\n", + "Took 2.12 sec.\n", + "\n", + "Parsed weather for MARYSVILLE - 7TH AVE (Marysville Junior High) since 2024-03-15 till 2024-03-17.\n", + "Took 2.12 sec.\n", + "\n", + "Parsed weather for NORTH BEND - NORTH BEND WAY since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - DUWAMISH since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for SEATTLE - BEACON HILL since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-03-07 till 2024-03-08.\n", - "Took 2.1 sec.\n", + "Parsed weather for SEATTLE - DUWAMISH since 2024-03-15 till 2024-03-17.\n", + "Took 2.11 sec.\n", "\n", - "Parsed weather for Seattle-10th & Weller since 2024-03-07 till 2024-03-08.\n", - "Took 2.1 sec.\n", + "Parsed weather for SEATTLE - SOUTH PARK #2 since 2024-03-15 till 2024-03-17.\n", + "Took 2.11 sec.\n", "\n", - "Parsed weather for TACOMA - ALEXANDER AVE since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Seattle-10th & Weller since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", - "Parsed weather for TACOMA - L STREET since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for TACOMA - ALEXANDER AVE since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tacoma-S 36th St since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for TACOMA - L STREET since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tukwila Allentown since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Tacoma-S 36th St since 2024-03-15 till 2024-03-17.\n", "Took 2.1 sec.\n", "\n", - "Parsed weather for Tulalip-Totem Beach Rd since 2024-03-07 till 2024-03-08.\n", + "Parsed weather for Tukwila Allentown since 2024-03-15 till 2024-03-17.\n", "Took 2.11 sec.\n", "\n", + "Parsed weather for Tulalip-Totem Beach Rd since 2024-03-15 till 2024-03-17.\n", + "Took 2.1 sec.\n", + "\n", "----------------------------------------------------------------\n", - "Parsed new weather data for ALL cities up to 2024-03-08.\n", - "Took 94.82 sec.\n", + "Parsed new weather data for ALL cities up to 2024-03-17.\n", + "Took 94.79 sec.\n", "\n" ] } @@ -975,7 +975,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "594213e0", + "id": "de4d4870", "metadata": { "tags": [] }, @@ -997,7 +997,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "6c799b26", + "id": "82d7bc88", "metadata": { "tags": [] }, @@ -1039,69 +1039,69 @@ " \n", " \n", " \n", - " 87\n", - " Tukwila Allentown\n", - " 2024-03-08\n", - " 12.1\n", - " 1.8\n", + " 132\n", + " Tulalip-Totem Beach Rd\n", + " 2024-03-15\n", + " 15.3\n", + " 4.2\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 9.4\n", - " 15.1\n", - " 217\n", - " 1709856000000\n", + " 10.8\n", + " 22.3\n", + " 343\n", + " 1710460800000\n", " \n", " \n", - " 88\n", + " 133\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-07\n", - " 8.2\n", - " 0.4\n", + " 2024-03-16\n", + " 21.3\n", + " 4.8\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 9.6\n", - " 12.2\n", - " 70\n", - " 1709769600000\n", + " 9.8\n", + " 25.9\n", + " 336\n", + " 1710547200000\n", " \n", " \n", - " 89\n", + " 134\n", " Tulalip-Totem Beach Rd\n", - " 2024-03-08\n", - " 10.7\n", - " 3.0\n", + " 2024-03-17\n", + " 22.0\n", + " 9.2\n", " 0.0\n", " 0.0\n", " 0.0\n", " 0.0\n", - " 11.5\n", - " 25.6\n", - " 145\n", - " 1709856000000\n", + " 11.4\n", + " 14.0\n", + " 93\n", + " 1710633600000\n", " \n", " \n", "\n", "" ], "text/plain": [ - " city_name date temperature_max temperature_min \\\n", - "87 Tukwila Allentown 2024-03-08 12.1 1.8 \n", - "88 Tulalip-Totem Beach Rd 2024-03-07 8.2 0.4 \n", - "89 Tulalip-Totem Beach Rd 2024-03-08 10.7 3.0 \n", + " city_name date temperature_max temperature_min \\\n", + "132 Tulalip-Totem Beach Rd 2024-03-15 15.3 4.2 \n", + "133 Tulalip-Totem Beach Rd 2024-03-16 21.3 4.8 \n", + "134 Tulalip-Totem Beach Rd 2024-03-17 22.0 9.2 \n", "\n", - " precipitation_sum rain_sum snowfall_sum precipitation_hours \\\n", - "87 0.0 0.0 0.0 0.0 \n", - "88 0.0 0.0 0.0 0.0 \n", - "89 0.0 0.0 0.0 0.0 \n", + " precipitation_sum rain_sum snowfall_sum precipitation_hours \\\n", + "132 0.0 0.0 0.0 0.0 \n", + "133 0.0 0.0 0.0 0.0 \n", + "134 0.0 0.0 0.0 0.0 \n", "\n", - " wind_speed_max wind_gusts_max wind_direction_dominant unix_time \n", - "87 9.4 15.1 217 1709856000000 \n", - "88 9.6 12.2 70 1709769600000 \n", - "89 11.5 25.6 145 1709856000000 " + " wind_speed_max wind_gusts_max wind_direction_dominant unix_time \n", + "132 10.8 22.3 343 1710460800000 \n", + "133 9.8 25.9 336 1710547200000 \n", + "134 11.4 14.0 93 1710633600000 " ] }, "execution_count": 19, @@ -1123,7 +1123,7 @@ }, { "cell_type": "markdown", - "id": "4f534824", + "id": "7b5640f9", "metadata": { "tags": [] }, @@ -1134,18 +1134,18 @@ { "cell_type": "code", "execution_count": 20, - "id": "2179deb0", + "id": "fd72be07", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7b77a556603345b999ca05a825ccb1f1", + "model_id": "ad45db9921ea44f392976d59e79f3999", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Uploading Dataframe: 0.00% | | Rows 0/90 | Elapsed Time: 00:00 | Remaining Time: ?" + "Uploading Dataframe: 0.00% | | Rows 0/135 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, @@ -1163,7 +1163,7 @@ { "data": { "text/plain": [ - "(, None)" + "(, None)" ] }, "execution_count": 20, @@ -1179,18 +1179,18 @@ { "cell_type": "code", "execution_count": 21, - "id": "8eb1a6dd", + "id": "cdffe4a9", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "37402c20f64f4d3bab027087f7f83eaf", + "model_id": "afe7919348224c17b0f680c8c8067507", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "Uploading Dataframe: 0.00% | | Rows 0/90 | Elapsed Time: 00:00 | Remaining Time: ?" + "Uploading Dataframe: 0.00% | | Rows 0/135 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, @@ -1208,7 +1208,7 @@ { "data": { "text/plain": [ - "(, None)" + "(, None)" ] }, "execution_count": 21, @@ -1223,7 +1223,7 @@ }, { "cell_type": "markdown", - "id": "7e88d41c", + "id": "d03605ea", "metadata": {}, "source": [ "---\n", diff --git a/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb b/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb index f10eeac3..2dc9af7b 100644 --- a/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb +++ b/advanced_tutorials/air_quality/3_air_quality_training_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "0468fa97", + "id": "3d8d5c4a", "metadata": { "tags": [] }, @@ -24,7 +24,7 @@ }, { "cell_type": "markdown", - "id": "0b486b05", + "id": "e89e0ed8", "metadata": {}, "source": [ "### ๐Ÿ“ Imports" @@ -33,26 +33,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "a8c7d91f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m" - ] - } - ], - "source": [ - "!pip install xgboost --q" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "13c54813", + "id": "7e858f8a", "metadata": { "tags": [] }, @@ -61,7 +42,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "2024-02-20 21:27:58,492 INFO: generated new fontManager\n" + "2024-03-12 15:53:54,685 INFO: generated new fontManager\n" ] } ], @@ -86,7 +67,7 @@ }, { "cell_type": "markdown", - "id": "361fb860", + "id": "e4d834bc", "metadata": {}, "source": [ "## ๐Ÿ“ก Connecting to Hopsworks Feature Store " @@ -94,8 +75,8 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "cfdf990e", + "execution_count": 2, + "id": "817cdef7", "metadata": {}, "outputs": [ { @@ -119,8 +100,8 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "2e8b9506", + "execution_count": 3, + "id": "dff51a06", "metadata": {}, "outputs": [], "source": [ @@ -137,7 +118,7 @@ }, { "cell_type": "markdown", - "id": "ade435c2", + "id": "45881fbc", "metadata": {}, "source": [ "## ๐Ÿ– Feature View Creation and Retrieval " @@ -145,8 +126,8 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "b01f3e2e", + "execution_count": 4, + "id": "0d4e6eba", "metadata": {}, "outputs": [], "source": [ @@ -159,8 +140,8 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "a75142b3", + "execution_count": 5, + "id": "1d5cf648", "metadata": { "scrolled": true, "tags": [] @@ -173,7 +154,7 @@ }, { "cell_type": "markdown", - "id": "c4ed7fa2", + "id": "82c5b7be", "metadata": {}, "source": [ "`Feature Views` stands between **Feature Groups** and **Training Dataset**. ะกombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n", @@ -197,19 +178,10 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "2ce37fac", + "execution_count": 6, + "id": "c0d7fec3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feature view created successfully, explore it at \n", - "https://snurran.hops.works/p/5242/fs/5190/fv/air_quality_fv/version/1\n" - ] - } - ], + "outputs": [], "source": [ "# Get or create the 'air_quality_fv' feature view\n", "feature_view = fs.get_or_create_feature_view(\n", @@ -221,7 +193,7 @@ }, { "cell_type": "markdown", - "id": "5442ee40", + "id": "8f12f3ac", "metadata": {}, "source": [ "For now, your `Feature View` is saved in Hopsworks and you can retrieve it using `FeatureStore.get_feature_view()`." @@ -229,7 +201,7 @@ }, { "cell_type": "markdown", - "id": "7fa4af05", + "id": "72aeb854", "metadata": {}, "source": [ "## ๐Ÿ‹๏ธ Training Dataset Creation\n", @@ -257,22 +229,22 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "2ad3def9", + "execution_count": 7, + "id": "317668a8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Finished: Reading data from Hopsworks, using ArrowFlight (8.70s) \n" + "Finished: Reading data from Hopsworks, using ArrowFlight (12.56s) \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "VersionWarning: Incremented version to `1`.\n" + "VersionWarning: Incremented version to `2`.\n" ] } ], @@ -284,7 +256,7 @@ }, { "cell_type": "markdown", - "id": "ff5fbe9b", + "id": "a18b3733", "metadata": {}, "source": [ "## ๐Ÿงฌ Modeling" @@ -292,8 +264,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "7106677e", + "execution_count": 8, + "id": "16c721ea", "metadata": {}, "outputs": [], "source": [ @@ -309,8 +281,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "d0ae6dc3", + "execution_count": 9, + "id": "ee1a5c8c", "metadata": { "tags": [] }, @@ -328,8 +300,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "9fb26419", + "execution_count": 10, + "id": "612ef824", "metadata": { "tags": [] }, @@ -341,12 +313,173 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "4f67fee0", + "execution_count": 11, + "id": "02950e70", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pm_2_5_previous_1_daypm_2_5_previous_2_daypm_2_5_previous_3_daypm_2_5_previous_4_daypm_2_5_previous_5_daypm_2_5_previous_6_daypm_2_5_previous_7_daymean_7_daysmean_14_daysmean_28_days...temperature_maxtemperature_minprecipitation_sumrain_sumsnowfall_sumprecipitation_hourswind_speed_maxwind_gusts_maxwind_direction_dominantcity_name_encoded
1058180.00.02.03.06.06.07.03.4285716.8571435.142857...5.13.71.61.60.06.027.247.9639
669794.73.73.98.015.314.210.68.6285718.8500009.428571...0.0-6.80.00.00.00.019.345.06036
12222310.012.04.010.03.04.04.06.71428610.00000010.964286...1.5-1.90.00.00.00.013.524.127911
\n", + "

3 rows ร— 38 columns

\n", + "
" + ], + "text/plain": [ + " pm_2_5_previous_1_day pm_2_5_previous_2_day pm_2_5_previous_3_day \\\n", + "105818 0.0 0.0 2.0 \n", + "66979 4.7 3.7 3.9 \n", + "122223 10.0 12.0 4.0 \n", + "\n", + " pm_2_5_previous_4_day pm_2_5_previous_5_day pm_2_5_previous_6_day \\\n", + "105818 3.0 6.0 6.0 \n", + "66979 8.0 15.3 14.2 \n", + "122223 10.0 3.0 4.0 \n", + "\n", + " pm_2_5_previous_7_day mean_7_days mean_14_days mean_28_days ... \\\n", + "105818 7.0 3.428571 6.857143 5.142857 ... \n", + "66979 10.6 8.628571 8.850000 9.428571 ... \n", + "122223 4.0 6.714286 10.000000 10.964286 ... \n", + "\n", + " temperature_max temperature_min precipitation_sum rain_sum \\\n", + "105818 5.1 3.7 1.6 1.6 \n", + "66979 0.0 -6.8 0.0 0.0 \n", + "122223 1.5 -1.9 0.0 0.0 \n", + "\n", + " snowfall_sum precipitation_hours wind_speed_max wind_gusts_max \\\n", + "105818 0.0 6.0 27.2 47.9 \n", + "66979 0.0 0.0 19.3 45.0 \n", + "122223 0.0 0.0 13.5 24.1 \n", + "\n", + " wind_direction_dominant city_name_encoded \n", + "105818 6 39 \n", + "66979 60 36 \n", + "122223 279 11 \n", + "\n", + "[3 rows x 38 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Split the data into training and testing sets using the train_test_split function\n", "X_train, X_test, y_train, y_test = train_test_split(\n", @@ -361,17 +494,31 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "2e564df8", + "execution_count": 12, + "id": "b4ddfaeb", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "105818 2.0\n", + "66979 9.8\n", + "122223 11.0\n", + "Name: pm2_5, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "y_train.head(3)" ] }, { "cell_type": "markdown", - "id": "ea5adfa8", + "id": "59e85ea3", "metadata": {}, "source": [ "## ๐Ÿƒ๐Ÿปโ€โ™‚๏ธ Model Training" @@ -379,14 +526,14 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "192e9cf5", + "execution_count": 13, + "id": "44a6893f", "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+       "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
        "             colsample_bylevel=None, colsample_bynode=None,\n",
        "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
        "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
@@ -396,7 +543,7 @@
        "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
        "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
        "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
-       "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.