diff --git a/.github/workflows/test_tutorials.yml b/.github/workflows/test_tutorials.yml index e1fcbf71..b7abd4ed 100644 --- a/.github/workflows/test_tutorials.yml +++ b/.github/workflows/test_tutorials.yml @@ -26,7 +26,7 @@ jobs: - name: Execute Python workflows from bash script env: HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_38 }} - WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }} + WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY38 }} run: ./scripts/test-notebooks.sh test_tutorials39: @@ -49,7 +49,7 @@ jobs: - name: Execute Python workflows from bash script env: HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_39 }} - WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }} + WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY39 }} run: ./scripts/test-notebooks.sh test_tutorials310: @@ -72,5 +72,5 @@ jobs: - name: execute python workflows from bash script env: HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_310 }} - WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }} + WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY310 }} run: ./scripts/test-notebooks.sh \ No newline at end of file diff --git a/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb b/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb index c6de6709..4811d8b1 100644 --- a/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb +++ b/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb @@ -514,7 +514,10 @@ }, "outputs": [], "source": [ - "citibike_usage_fg.insert(df_enhanced)" + "citibike_usage_fg.insert(\n", + " df_enhanced,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -543,7 +546,10 @@ }, "outputs": [], "source": [ - "citibike_stations_info_fg.insert(df_stations_info)" + "citibike_stations_info_fg.insert(\n", + " df_stations_info,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -569,7 +575,10 @@ "metadata": {}, "outputs": [], "source": [ - "us_holidays_fg.insert(df_holidays)" + "us_holidays_fg.insert(\n", + " df_holidays,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -634,7 +643,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb b/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb index b703f6fb..5bc95161 100644 --- a/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb +++ b/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb @@ -80,8 +80,11 @@ "metadata": {}, "outputs": [], "source": [ - "applications_df = pd.read_csv(\"https://repo.hops.works/dev/davit/credit_scores/applications.csv\")\n", - "applications_df.head()" + "applications_df = pd.read_csv(\n", + " \"https://repo.hops.works/dev/davit/credit_scores/applications.csv\",\n", + " parse_dates=['datetime'],\n", + ")\n", + "applications_df.head(3)" ] }, { @@ -111,7 +114,9 @@ "metadata": {}, "outputs": [], "source": [ - "bureau_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/bureau_balances.csv')\n", + "bureau_balances_df = pd.read_csv(\n", + " 'https://repo.hops.works/dev/davit/credit_scores/bureau_balances.csv',\n", + ")[:5_000]\n", "bureau_balances_df.head(3)" ] }, @@ -142,7 +147,10 @@ "metadata": {}, "outputs": [], "source": [ - "bureaus_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/bureaus.csv')\n", + "bureaus_df = pd.read_csv(\n", + " 'https://repo.hops.works/dev/davit/credit_scores/bureaus.csv',\n", + " parse_dates=['datetime'],\n", + ")[:5_000]\n", "bureaus_df.head(3)" ] }, @@ -173,7 +181,9 @@ "metadata": {}, "outputs": [], "source": [ - "credit_card_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/credit_card_balances.csv')\n", + "credit_card_balances_df = pd.read_csv(\n", + " 'https://repo.hops.works/dev/davit/credit_scores/credit_card_balances.csv',\n", + ")[:5_000]\n", "credit_card_balances_df.head(3)" ] }, @@ -204,7 +214,10 @@ "metadata": {}, "outputs": [], "source": [ - "installment_payments_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/installment_payments.csv')\n", + "installment_payments_df = pd.read_csv(\n", + " 'https://repo.hops.works/dev/davit/credit_scores/installment_payments.csv',\n", + " parse_dates=['datetime'],\n", + ")[:5_000]\n", "installment_payments_df.head(3)" ] }, @@ -237,7 +250,9 @@ "metadata": {}, "outputs": [], "source": [ - "pos_cash_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/pos_cash_balances.csv')\n", + "pos_cash_balances_df = pd.read_csv(\n", + " 'https://repo.hops.works/dev/davit/credit_scores/pos_cash_balances.csv'\n", + ")[:5_000]\n", "pos_cash_balances_df.head(3)" ] }, @@ -270,7 +285,10 @@ "metadata": {}, "outputs": [], "source": [ - "previous_applications_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/previous_applications.csv')\n", + "previous_applications_df = pd.read_csv(\n", + " 'https://repo.hops.works/dev/davit/credit_scores/previous_applications.csv',\n", + " parse_dates=['datetime'],\n", + ")[:5_000]\n", "previous_applications_df.head(3)" ] }, @@ -890,7 +908,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb b/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb index 87a91c89..7ae0dddb 100644 --- a/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb +++ b/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb @@ -37,7 +37,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install xgboost" + "!pip install xgboost --quiet" ] }, { @@ -619,7 +619,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb b/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb index 7edc82cb..65b9036c 100644 --- a/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb +++ b/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb @@ -275,7 +275,10 @@ }, "outputs": [], "source": [ - "meteorological_measurements_fg.insert(meteorological_measurements_df)" + "meteorological_measurements_fg.insert(\n", + " meteorological_measurements_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -306,7 +309,10 @@ }, "outputs": [], "source": [ - "electricity_prices_fg.insert(electricity_prices_df)" + "electricity_prices_fg.insert(\n", + " electricity_prices_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -376,7 +382,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb b/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb index feb24eb1..c8bd62c5 100644 --- a/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb +++ b/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb @@ -181,7 +181,10 @@ " statistics_config=True,\n", ")\n", "\n", - "rides_fg.insert(df_rides)" + "rides_fg.insert(\n", + " df_rides,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -243,7 +246,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb index 44427218..7920c66b 100644 --- a/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb +++ b/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb @@ -170,7 +170,7 @@ ") \n", "feature_group.insert(\n", " df_original, \n", - " wait=True,\n", + " write_options={\"wait_for_job\": True},\n", ")" ] }, diff --git a/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb index d9ec88fe..6a06260f 100644 --- a/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb +++ b/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb @@ -172,7 +172,10 @@ " primary_key=['city_name', 'date'],\n", " online_enabled=True,\n", ") \n", - "feature_group.insert(df_original)" + "feature_group.insert(\n", + " df_original,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -409,6 +412,9 @@ " # Create a new DataFrame with the encoded values\n", " encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])\n", "\n", + " # Reset the index of the original DataFrame\n", + " data = data.reset_index(drop=True)\n", + "\n", " # Concatenate the encoded DataFrame with the original DataFrame\n", " data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)\n", " \n", @@ -789,7 +795,7 @@ "outputs": [], "source": [ "# Initialise feature view to retrieve batch data\n", - "feature_view.init_batch_scoring(training_dataset_version=td_version)\n", + "feature_view.init_batch_scoring(1)\n", "\n", "# Retrieve batch data\n", "batch_data = feature_view.get_batch_data()\n", diff --git a/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb index 8e5a7fda..92ed644f 100644 --- a/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb +++ b/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb @@ -174,7 +174,10 @@ " primary_key=['city_name', 'date'],\n", " online_enabled=True,\n", ") \n", - "feature_group.insert(df_original)" + "feature_group.insert(\n", + " df_original,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -411,6 +414,9 @@ " # Create a new DataFrame with the encoded values\n", " encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])\n", "\n", + " # Reset the index of the original DataFrame\n", + " data = data.reset_index(drop=True)\n", + "\n", " # Concatenate the encoded DataFrame with the original DataFrame\n", " data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)\n", " \n", @@ -869,7 +875,7 @@ "outputs": [], "source": [ "# Initialise feature view to retrieve batch data\n", - "feature_view.init_batch_scoring(training_dataset_version=td_version)\n", + "feature_view.init_batch_scoring(1)\n", "\n", "# Retrieve batch data\n", "batch_data = feature_view.get_batch_data()\n", diff --git a/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb index b53737aa..13853b98 100644 --- a/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb +++ b/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb @@ -183,7 +183,10 @@ " primary_key=['city_name', 'date'],\n", " online_enabled=True,\n", ") \n", - "feature_group.insert(df_original)" + "feature_group.insert(\n", + " df_original,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -619,7 +622,7 @@ "outputs": [], "source": [ "# Initialise feature view to retrieve batch data\n", - "feature_view.init_batch_scoring(training_dataset_version=td_version)\n", + "feature_view.init_batch_scoring(1)\n", "\n", "# Retrieve batch data\n", "batch_data = feature_view.get_batch_data()\n", diff --git a/churn/1_churn_feature_pipeline.ipynb b/churn/1_churn_feature_pipeline.ipynb index 9288d9c9..36752b7e 100644 --- a/churn/1_churn_feature_pipeline.ipynb +++ b/churn/1_churn_feature_pipeline.ipynb @@ -86,8 +86,14 @@ "outputs": [], "source": [ "demography_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/demography.csv\")\n", - "customer_info_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/customer_info.csv\")\n", - "subscriptions_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/subscriptions.csv\")" + "customer_info_df = pd.read_csv(\n", + " \"https://repo.hops.works/dev/davit/churn/customer_info.csv\",\n", + " parse_dates=['datetime'],\n", + ")\n", + "subscriptions_df = pd.read_csv(\n", + " \"https://repo.hops.works/dev/davit/churn/subscriptions.csv\",\n", + " parse_dates=['datetime'],\n", + ")" ] }, { @@ -226,7 +232,10 @@ "outputs": [], "source": [ "# Insert data into feature group\n", - "customer_info_fg.insert(customer_info_df)" + "customer_info_fg.insert(\n", + " customer_info_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -268,7 +277,10 @@ " primary_key=['customerID'],\n", ")\n", "# Insert data into feature group\n", - "demography_fg.insert(demography_df)" + "demography_fg.insert(\n", + " demography_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -382,7 +394,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/churn/2_churn_training_pipeline.ipynb b/churn/2_churn_training_pipeline.ipynb index e4e324b6..b046f1f0 100644 --- a/churn/2_churn_training_pipeline.ipynb +++ b/churn/2_churn_training_pipeline.ipynb @@ -436,7 +436,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "---\n", "## ⏭️ **Next:** Part 03 \n", "\n", "In the following notebook you will use your model for batch inference.\n", @@ -464,7 +463,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/fraud_batch/1_fraud_batch_feature_pipeline.ipynb b/fraud_batch/1_fraud_batch_feature_pipeline.ipynb index d4cd4dc2..3fb87b2c 100755 --- a/fraud_batch/1_fraud_batch_feature_pipeline.ipynb +++ b/fraud_batch/1_fraud_batch_feature_pipeline.ipynb @@ -268,6 +268,76 @@ "window_aggs_df.datetime = window_aggs_df.datetime.values.astype(np.int64) // 10 ** 6" ] }, + { + "cell_type": "markdown", + "id": "70f69c73", + "metadata": {}, + "source": [ + "## 👮🏻‍♂️ Great Expectations " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5bf25c13", + "metadata": {}, + "outputs": [], + "source": [ + "import great_expectations as ge\n", + "from great_expectations.core import ExpectationSuite, ExpectationConfiguration\n", + "\n", + "# Convert the 'trans_df' DataFrame to a Great Expectations DataFrame\n", + "ge_trans_df = ge.from_pandas(trans_df)\n", + "\n", + "# Retrieve the expectation suite associated with the ge DataFrame\n", + "expectation_suite_transactions = ge_trans_df.get_expectation_suite()\n", + "\n", + "# Set the expectation suite name to \"transactions_suite\"\n", + "expectation_suite_transactions.expectation_suite_name = \"transactions_suite\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e420315", + "metadata": {}, + "outputs": [], + "source": [ + "# Check binary fraud_label column to be in set [0,1]\n", + "expectation_suite_transactions.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_distinct_values_to_be_in_set\",\n", + " kwargs={\n", + " \"column\": \"fraud_label\",\n", + " \"value_set\": [0, 1],\n", + " }\n", + " )\n", + ")\n", + "\n", + "# Check amount column to be not negative\n", + "expectation_suite_transactions.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_values_to_be_between\",\n", + " kwargs={\n", + " \"column\": \"amount\",\n", + " \"min_value\": 0.0,\n", + " }\n", + " )\n", + ")\n", + "\n", + "# Loop through specified columns ('tid', 'datetime', 'cc_num') and add expectations for null values\n", + "for column in ['tid', 'datetime', 'cc_num']:\n", + " expectation_suite_transactions.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_values_to_be_null\",\n", + " kwargs={\n", + " \"column\": column,\n", + " \"mostly\": 0.0,\n", + " }\n", + " )\n", + " )" + ] + }, { "cell_type": "markdown", "id": "21be72c5", @@ -336,6 +406,7 @@ " description=\"Transaction data\",\n", " primary_key=[\"cc_num\"],\n", " event_time=\"datetime\",\n", + " expectation_suite=expectation_suite_transactions,\n", ")" ] }, @@ -357,7 +428,10 @@ "outputs": [], "source": [ "# Insert data into feature group\n", - "trans_fg.insert(trans_df)" + "trans_fg.insert(\n", + " trans_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -431,7 +505,10 @@ "outputs": [], "source": [ "# Insert data into feature group\n", - "window_aggs_fg.insert(window_aggs_df)" + "window_aggs_fg.insert(\n", + " window_aggs_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -496,7 +573,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/fraud_online/1_fraud_online_feature_pipeline.ipynb b/fraud_online/1_fraud_online_feature_pipeline.ipynb index 63a98420..13edcf35 100755 --- a/fraud_online/1_fraud_online_feature_pipeline.ipynb +++ b/fraud_online/1_fraud_online_feature_pipeline.ipynb @@ -200,6 +200,122 @@ "trans_df.head(3)" ] }, + { + "cell_type": "markdown", + "id": "95d84907", + "metadata": {}, + "source": [ + "## 👮🏻‍♂️ Great Expectations " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f02da7b", + "metadata": {}, + "outputs": [], + "source": [ + "import great_expectations as ge\n", + "from great_expectations.core import ExpectationSuite, ExpectationConfiguration\n", + "\n", + "# Convert the 'trans_df' DataFrame to a Great Expectations DataFrame\n", + "ge_trans_df = ge.from_pandas(trans_df)\n", + "\n", + "# Retrieve the expectation suite associated with the ge DataFrame\n", + "expectation_suite_transactions = ge_trans_df.get_expectation_suite()\n", + "\n", + "# Set the expectation suite name to \"transactions_suite\"\n", + "expectation_suite_transactions.expectation_suite_name = \"transactions_suite\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0dfd28b6", + "metadata": {}, + "outputs": [], + "source": [ + "# Check binary fraud_label column to be in set [0,1]\n", + "expectation_suite_transactions.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_distinct_values_to_be_in_set\",\n", + " kwargs={\n", + " \"column\": \"fraud_label\",\n", + " \"value_set\": [0, 1],\n", + " }\n", + " )\n", + ")\n", + "\n", + "# Check amount column to be not negative\n", + "expectation_suite_transactions.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_values_to_be_between\",\n", + " kwargs={\n", + " \"column\": \"amount\",\n", + " \"min_value\": 0.0,\n", + " }\n", + " )\n", + ")\n", + "\n", + "# Loop through specified columns ('tid', 'datetime', 'cc_num') and add expectations for null values\n", + "for column in ['tid', 'datetime', 'cc_num']:\n", + " expectation_suite_transactions.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_values_to_be_null\",\n", + " kwargs={\n", + " \"column\": column,\n", + " \"mostly\": 0.0,\n", + " }\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd0eeba4", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert the 'profiles_df' DataFrame to a Great Expectations DataFrame\n", + "ge_profiles_df = ge.from_pandas(profiles_df)\n", + "\n", + "# Retrieve the expectation suite associated with the ge DataFrame\n", + "expectation_suite_profiles = ge_profiles_df.get_expectation_suite()\n", + "\n", + "# Set the expectation suite name to \"profiles_suite\"\n", + "expectation_suite_profiles.expectation_suite_name = \"profiles_suite\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "08eddf29", + "metadata": {}, + "outputs": [], + "source": [ + "# Check binary gender column to be in set ['M', 'F']\n", + "expectation_suite_profiles.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_distinct_values_to_be_in_set\",\n", + " kwargs={\n", + " \"column\": \"gender\",\n", + " \"value_set\": ['M', 'F'],\n", + " }\n", + " )\n", + ")\n", + "# Check for Nulls\n", + "expectation_suite_profiles.add_expectation(\n", + " ExpectationConfiguration(\n", + " expectation_type=\"expect_column_values_to_be_null\",\n", + " kwargs={\n", + " \"column\": 'cc_num',\n", + " \"mostly\": 0.0,\n", + " }\n", + " )\n", + " )" + ] + }, { "cell_type": "markdown", "id": "1a7e126d", @@ -277,6 +393,7 @@ " primary_key=['cc_num'],\n", " event_time='datetime',\n", " online_enabled=True,\n", + " expectation_suite=expectation_suite_transactions,\n", ")" ] }, @@ -346,6 +463,7 @@ " description=\"Credit card holder demographic data\",\n", " primary_key=['cc_num'],\n", " online_enabled=True,\n", + " expectation_suite=expectation_suite_profiles,\n", ")\n", "# Insert data into feature group\n", "profile_fg.insert(\n", @@ -463,7 +581,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/loan_approval/1-loan-approval-feature-pipeline.ipynb b/loan_approval/1-loan-approval-feature-pipeline.ipynb index 7654c220..f111f11c 100644 --- a/loan_approval/1-loan-approval-feature-pipeline.ipynb +++ b/loan_approval/1-loan-approval-feature-pipeline.ipynb @@ -914,8 +914,8 @@ " expectation_type=\"expect_column_values_to_be_between\",\n", " kwargs={\n", " \"column\":\"int_rate\", \n", - " \"min_value\":\"-2.0\",\n", - " \"max_value\":\"2000.0\",\n", + " \"min_value\":-2.0,\n", + " \"max_value\":2000.0,\n", " }\n", " )\n", ")" @@ -1017,7 +1017,10 @@ "metadata": {}, "outputs": [], "source": [ - "loans_fg.insert(loans_df)" + "loans_fg.insert(\n", + " loans_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -1027,7 +1030,10 @@ "metadata": {}, "outputs": [], "source": [ - "applicants_fg.insert(applicants_df)" + "applicants_fg.insert(\n", + " applicants_df,\n", + " write_options={\"wait_for_job\": True},\n", + ")" ] }, { @@ -1109,7 +1115,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.10.13" }, "papermill": { "default_parameters": {}, diff --git a/quickstart.ipynb b/quickstart.ipynb index 0bba9504..d9cb9546 100644 --- a/quickstart.ipynb +++ b/quickstart.ipynb @@ -258,7 +258,7 @@ "df_4h_mavg = df_4h_mavg.sort_index()\n", "\n", "# Moving standard deviation of transaction volume.\n", - "df_4h_std = pd.DataFrame(cc_group.mean())\n", + "df_4h_std = pd.DataFrame(cc_group.std())\n", "df_4h_std.columns = [\"trans_volume_mstd\", \"datetime\"]\n", "df_4h_std = df_4h_std.reset_index(level=[\"cc_num\"])\n", "df_4h_std = df_4h_std.drop(columns=[\"cc_num\", \"datetime\"])\n", @@ -266,8 +266,8 @@ "df_4h_std = df_4h_std.sort_index()\n", "window_aggs_df = df_4h_std.merge(df_4h_mavg, left_index=True, right_index=True)\n", "\n", - "# Moving average of transaction frequency.\n", - "df_4h_count = pd.DataFrame(cc_group.mean())\n", + "# Moving transaction frequency.\n", + "df_4h_count = pd.DataFrame(cc_group.count())\n", "df_4h_count.columns = [\"trans_freq\", \"datetime\"]\n", "df_4h_count = df_4h_count.reset_index(level=[\"cc_num\"])\n", "df_4h_count = df_4h_count.drop(columns=[\"cc_num\", \"datetime\"])\n", @@ -1137,7 +1137,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.18" + "version": "3.9.12" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/scripts/cleanup-tutorials.ipynb b/scripts/cleanup-tutorials.ipynb index c0272338..805c040e 100644 --- a/scripts/cleanup-tutorials.ipynb +++ b/scripts/cleanup-tutorials.ipynb @@ -233,52 +233,6 @@ " print(f\"Couldn't delete {fg} FG\")" ] }, - { - "cell_type": "markdown", - "id": "b10b786c", - "metadata": {}, - "source": [ - "## Cleanup Citibike" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eec9cdda", - "metadata": {}, - "outputs": [], - "source": [ - " \n", - "# Delete a model\n", - "models=[\"citibike_xgb_model\"]\n", - "for model in models:\n", - " try:\n", - " model = mr.get_model(f\"{model}\", version=1)\n", - " model.delete()\n", - " except:\n", - " print(f\"Couldn't delete {model} model\")\n", - "\n", - "# Delete the feature_views before the feature groups\n", - "\n", - "fvs=[\"citibike_fv\"]\n", - "\n", - "for fv in fvs:\n", - " try:\n", - " feature_view = fs.get_feature_view(name=f\"{fv}\", version=1)\n", - " feature_view.delete()\n", - " except:\n", - " print(f\"Couldn't delete {fv} feature view\")\n", - "\n", - "fgs=[\"citibike_usage\", \"citibike_stations_info\", \"us_holidays\", \"meteorological_measurements\"]\n", - "\n", - "for fg in fgs:\n", - " try:\n", - " fg = fs.get_feature_group(name=f\"{fg}\", version=1)\n", - " fg.delete()\n", - " except:\n", - " print(f\"Couldn't delete {fg} FG\")" - ] - }, { "cell_type": "markdown", "id": "5c68d708", diff --git a/scripts/test-notebooks.sh b/scripts/test-notebooks.sh index 1a5d1e98..99e544ea 100755 --- a/scripts/test-notebooks.sh +++ b/scripts/test-notebooks.sh @@ -6,7 +6,6 @@ set -e jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb # Loan Approval -jupyter nbconvert --to notebook --execute loan_approval/0-loan-approval-eda.ipynb jupyter nbconvert --to notebook --execute loan_approval/1-loan-approval-feature-pipeline.ipynb jupyter nbconvert --to notebook --execute loan_approval/2-loan-approval-training-pipeline.ipynb jupyter nbconvert --to notebook --execute loan_approval/3-loan-approval-batch-inference.ipynb @@ -32,9 +31,11 @@ jupyter nbconvert --to notebook --execute churn/1_churn_feature_pipeline.ipynb jupyter nbconvert --to notebook --execute churn/2_churn_training_pipeline.ipynb jupyter nbconvert --to notebook --execute churn/3_churn_batch_inference.ipynb +# Remove any FGs, FVs, Models, Deployments +jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb + # Great Expectations jupyter nbconvert --to notebook --execute integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb -jupyter nbconvert --to notebook --execute integrations/great_expectations/fraud_batch_data_validation.ipynb # Remove any FGs, FVs, Models, Deployments jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb @@ -42,15 +43,6 @@ jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb # Advanced Tutorials cd advanced_tutorials -# Citibike -jupyter nbconvert --to notebook --execute citibike/1_citibike_feature_backfill.ipynb -jupyter nbconvert --to notebook --execute citibike/2_citibike_feature_pipeline.ipynb -jupyter nbconvert --to notebook --execute citibike/3_citibike_training_pipeline.ipynb -jupyter nbconvert --to notebook --execute citibike/4_citibike_batch_inference.ipynb - -# Remove any FGs, FVs, Models, Deployments -jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb - # Credit Scores jupyter nbconvert --to notebook --execute credit_scores/1_credit_scores_feature_backfill.ipynb jupyter nbconvert --to notebook --execute credit_scores/2_credit_scores_feature_pipeline.ipynb @@ -60,12 +52,6 @@ jupyter nbconvert --to notebook --execute credit_scores/4_credit_scores_batch_in # Remove any FGs, FVs, Models, Deployments jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb -# Electricity -jupyter nbconvert --to notebook --execute electricity/1_electricity_feature_backfill.ipynb -jupyter nbconvert --to notebook --execute electricity/2_electricity_feature_pipeline.ipynb -jupyter nbconvert --to notebook --execute electricity/3_electricity_training_pipeline.ipynb -jupyter nbconvert --to notebook --execute electricity/4_electricity_batch_inference.ipynb - # Nyc Taxi Fares jupyter nbconvert --to notebook --execute nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb jupyter nbconvert --to notebook --execute nyc_taxi_fares/2_nyc_taxi_fares_feature_pipeline.ipynb @@ -75,6 +61,15 @@ jupyter nbconvert --to notebook --execute nyc_taxi_fares/4_nyc_taxi_fares_batch_ # Remove any FGs, FVs, Models, Deployments jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb +# Electricity +jupyter nbconvert --to notebook --execute electricity/1_electricity_feature_backfill.ipynb +jupyter nbconvert --to notebook --execute electricity/2_electricity_feature_pipeline.ipynb +jupyter nbconvert --to notebook --execute electricity/3_electricity_training_pipeline.ipynb +jupyter nbconvert --to notebook --execute electricity/4_electricity_batch_inference.ipynb + +# Remove any FGs, FVs, Models, Deployments +jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb + # Go to transformation_functions folder cd transformation_functions