diff --git a/.github/workflows/test_tutorials.yml b/.github/workflows/test_tutorials.yml
index e1fcbf71..b7abd4ed 100644
--- a/.github/workflows/test_tutorials.yml
+++ b/.github/workflows/test_tutorials.yml
@@ -26,7 +26,7 @@ jobs:
- name: Execute Python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_38 }}
- WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }}
+ WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY38 }}
run: ./scripts/test-notebooks.sh
test_tutorials39:
@@ -49,7 +49,7 @@ jobs:
- name: Execute Python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_39 }}
- WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }}
+ WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY39 }}
run: ./scripts/test-notebooks.sh
test_tutorials310:
@@ -72,5 +72,5 @@ jobs:
- name: execute python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_310 }}
- WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }}
+ WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY310 }}
run: ./scripts/test-notebooks.sh
\ No newline at end of file
diff --git a/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb b/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb
index c6de6709..4811d8b1 100644
--- a/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb
+++ b/advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb
@@ -514,7 +514,10 @@
},
"outputs": [],
"source": [
- "citibike_usage_fg.insert(df_enhanced)"
+ "citibike_usage_fg.insert(\n",
+ " df_enhanced,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -543,7 +546,10 @@
},
"outputs": [],
"source": [
- "citibike_stations_info_fg.insert(df_stations_info)"
+ "citibike_stations_info_fg.insert(\n",
+ " df_stations_info,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -569,7 +575,10 @@
"metadata": {},
"outputs": [],
"source": [
- "us_holidays_fg.insert(df_holidays)"
+ "us_holidays_fg.insert(\n",
+ " df_holidays,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -634,7 +643,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb b/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb
index b703f6fb..5bc95161 100644
--- a/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb
+++ b/advanced_tutorials/credit_scores/1_credit_scores_feature_backfill.ipynb
@@ -80,8 +80,11 @@
"metadata": {},
"outputs": [],
"source": [
- "applications_df = pd.read_csv(\"https://repo.hops.works/dev/davit/credit_scores/applications.csv\")\n",
- "applications_df.head()"
+ "applications_df = pd.read_csv(\n",
+ " \"https://repo.hops.works/dev/davit/credit_scores/applications.csv\",\n",
+ " parse_dates=['datetime'],\n",
+ ")\n",
+ "applications_df.head(3)"
]
},
{
@@ -111,7 +114,9 @@
"metadata": {},
"outputs": [],
"source": [
- "bureau_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/bureau_balances.csv')\n",
+ "bureau_balances_df = pd.read_csv(\n",
+ " 'https://repo.hops.works/dev/davit/credit_scores/bureau_balances.csv',\n",
+ ")[:5_000]\n",
"bureau_balances_df.head(3)"
]
},
@@ -142,7 +147,10 @@
"metadata": {},
"outputs": [],
"source": [
- "bureaus_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/bureaus.csv')\n",
+ "bureaus_df = pd.read_csv(\n",
+ " 'https://repo.hops.works/dev/davit/credit_scores/bureaus.csv',\n",
+ " parse_dates=['datetime'],\n",
+ ")[:5_000]\n",
"bureaus_df.head(3)"
]
},
@@ -173,7 +181,9 @@
"metadata": {},
"outputs": [],
"source": [
- "credit_card_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/credit_card_balances.csv')\n",
+ "credit_card_balances_df = pd.read_csv(\n",
+ " 'https://repo.hops.works/dev/davit/credit_scores/credit_card_balances.csv',\n",
+ ")[:5_000]\n",
"credit_card_balances_df.head(3)"
]
},
@@ -204,7 +214,10 @@
"metadata": {},
"outputs": [],
"source": [
- "installment_payments_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/installment_payments.csv')\n",
+ "installment_payments_df = pd.read_csv(\n",
+ " 'https://repo.hops.works/dev/davit/credit_scores/installment_payments.csv',\n",
+ " parse_dates=['datetime'],\n",
+ ")[:5_000]\n",
"installment_payments_df.head(3)"
]
},
@@ -237,7 +250,9 @@
"metadata": {},
"outputs": [],
"source": [
- "pos_cash_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/pos_cash_balances.csv')\n",
+ "pos_cash_balances_df = pd.read_csv(\n",
+ " 'https://repo.hops.works/dev/davit/credit_scores/pos_cash_balances.csv'\n",
+ ")[:5_000]\n",
"pos_cash_balances_df.head(3)"
]
},
@@ -270,7 +285,10 @@
"metadata": {},
"outputs": [],
"source": [
- "previous_applications_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/previous_applications.csv')\n",
+ "previous_applications_df = pd.read_csv(\n",
+ " 'https://repo.hops.works/dev/davit/credit_scores/previous_applications.csv',\n",
+ " parse_dates=['datetime'],\n",
+ ")[:5_000]\n",
"previous_applications_df.head(3)"
]
},
@@ -890,7 +908,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb b/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb
index 87a91c89..7ae0dddb 100644
--- a/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb
+++ b/advanced_tutorials/credit_scores/3_credit_scores_training_pipeline.ipynb
@@ -37,7 +37,7 @@
"metadata": {},
"outputs": [],
"source": [
- "!pip install xgboost"
+ "!pip install xgboost --quiet"
]
},
{
@@ -619,7 +619,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb b/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb
index 7edc82cb..65b9036c 100644
--- a/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb
+++ b/advanced_tutorials/electricity/1_electricity_feature_backfill.ipynb
@@ -275,7 +275,10 @@
},
"outputs": [],
"source": [
- "meteorological_measurements_fg.insert(meteorological_measurements_df)"
+ "meteorological_measurements_fg.insert(\n",
+ " meteorological_measurements_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -306,7 +309,10 @@
},
"outputs": [],
"source": [
- "electricity_prices_fg.insert(electricity_prices_df)"
+ "electricity_prices_fg.insert(\n",
+ " electricity_prices_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -376,7 +382,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb b/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb
index feb24eb1..c8bd62c5 100644
--- a/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb
+++ b/advanced_tutorials/nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb
@@ -181,7 +181,10 @@
" statistics_config=True,\n",
")\n",
"\n",
- "rides_fg.insert(df_rides)"
+ "rides_fg.insert(\n",
+ " df_rides,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -243,7 +246,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb
index 44427218..7920c66b 100644
--- a/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb
+++ b/advanced_tutorials/transformation_functions/custom/custom_transformation_functions.ipynb
@@ -170,7 +170,7 @@
") \n",
"feature_group.insert(\n",
" df_original, \n",
- " wait=True,\n",
+ " write_options={\"wait_for_job\": True},\n",
")"
]
},
diff --git a/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb
index d9ec88fe..6a06260f 100644
--- a/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb
+++ b/advanced_tutorials/transformation_functions/keras/keras_transformation_functions.ipynb
@@ -172,7 +172,10 @@
" primary_key=['city_name', 'date'],\n",
" online_enabled=True,\n",
") \n",
- "feature_group.insert(df_original)"
+ "feature_group.insert(\n",
+ " df_original,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -409,6 +412,9 @@
" # Create a new DataFrame with the encoded values\n",
" encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])\n",
"\n",
+ " # Reset the index of the original DataFrame\n",
+ " data = data.reset_index(drop=True)\n",
+ "\n",
" # Concatenate the encoded DataFrame with the original DataFrame\n",
" data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)\n",
" \n",
@@ -789,7 +795,7 @@
"outputs": [],
"source": [
"# Initialise feature view to retrieve batch data\n",
- "feature_view.init_batch_scoring(training_dataset_version=td_version)\n",
+ "feature_view.init_batch_scoring(1)\n",
"\n",
"# Retrieve batch data\n",
"batch_data = feature_view.get_batch_data()\n",
diff --git a/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb
index 8e5a7fda..92ed644f 100644
--- a/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb
+++ b/advanced_tutorials/transformation_functions/pytorch/pytorch_transformation_functions.ipynb
@@ -174,7 +174,10 @@
" primary_key=['city_name', 'date'],\n",
" online_enabled=True,\n",
") \n",
- "feature_group.insert(df_original)"
+ "feature_group.insert(\n",
+ " df_original,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -411,6 +414,9 @@
" # Create a new DataFrame with the encoded values\n",
" encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])\n",
"\n",
+ " # Reset the index of the original DataFrame\n",
+ " data = data.reset_index(drop=True)\n",
+ "\n",
" # Concatenate the encoded DataFrame with the original DataFrame\n",
" data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)\n",
" \n",
@@ -869,7 +875,7 @@
"outputs": [],
"source": [
"# Initialise feature view to retrieve batch data\n",
- "feature_view.init_batch_scoring(training_dataset_version=td_version)\n",
+ "feature_view.init_batch_scoring(1)\n",
"\n",
"# Retrieve batch data\n",
"batch_data = feature_view.get_batch_data()\n",
diff --git a/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb b/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb
index b53737aa..13853b98 100644
--- a/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb
+++ b/advanced_tutorials/transformation_functions/sklearn/sklearn_transformation_functions.ipynb
@@ -183,7 +183,10 @@
" primary_key=['city_name', 'date'],\n",
" online_enabled=True,\n",
") \n",
- "feature_group.insert(df_original)"
+ "feature_group.insert(\n",
+ " df_original,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -619,7 +622,7 @@
"outputs": [],
"source": [
"# Initialise feature view to retrieve batch data\n",
- "feature_view.init_batch_scoring(training_dataset_version=td_version)\n",
+ "feature_view.init_batch_scoring(1)\n",
"\n",
"# Retrieve batch data\n",
"batch_data = feature_view.get_batch_data()\n",
diff --git a/churn/1_churn_feature_pipeline.ipynb b/churn/1_churn_feature_pipeline.ipynb
index 9288d9c9..36752b7e 100644
--- a/churn/1_churn_feature_pipeline.ipynb
+++ b/churn/1_churn_feature_pipeline.ipynb
@@ -86,8 +86,14 @@
"outputs": [],
"source": [
"demography_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/demography.csv\")\n",
- "customer_info_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/customer_info.csv\")\n",
- "subscriptions_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/subscriptions.csv\")"
+ "customer_info_df = pd.read_csv(\n",
+ " \"https://repo.hops.works/dev/davit/churn/customer_info.csv\",\n",
+ " parse_dates=['datetime'],\n",
+ ")\n",
+ "subscriptions_df = pd.read_csv(\n",
+ " \"https://repo.hops.works/dev/davit/churn/subscriptions.csv\",\n",
+ " parse_dates=['datetime'],\n",
+ ")"
]
},
{
@@ -226,7 +232,10 @@
"outputs": [],
"source": [
"# Insert data into feature group\n",
- "customer_info_fg.insert(customer_info_df)"
+ "customer_info_fg.insert(\n",
+ " customer_info_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -268,7 +277,10 @@
" primary_key=['customerID'],\n",
")\n",
"# Insert data into feature group\n",
- "demography_fg.insert(demography_df)"
+ "demography_fg.insert(\n",
+ " demography_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -382,7 +394,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/churn/2_churn_training_pipeline.ipynb b/churn/2_churn_training_pipeline.ipynb
index e4e324b6..b046f1f0 100644
--- a/churn/2_churn_training_pipeline.ipynb
+++ b/churn/2_churn_training_pipeline.ipynb
@@ -436,7 +436,6 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "---\n",
"## ⏭️ **Next:** Part 03 \n",
"\n",
"In the following notebook you will use your model for batch inference.\n",
@@ -464,7 +463,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
}
},
"nbformat": 4,
diff --git a/fraud_batch/1_fraud_batch_feature_pipeline.ipynb b/fraud_batch/1_fraud_batch_feature_pipeline.ipynb
index d4cd4dc2..3fb87b2c 100755
--- a/fraud_batch/1_fraud_batch_feature_pipeline.ipynb
+++ b/fraud_batch/1_fraud_batch_feature_pipeline.ipynb
@@ -268,6 +268,76 @@
"window_aggs_df.datetime = window_aggs_df.datetime.values.astype(np.int64) // 10 ** 6"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "70f69c73",
+ "metadata": {},
+ "source": [
+ "## 👮🏻♂️ Great Expectations "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5bf25c13",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import great_expectations as ge\n",
+ "from great_expectations.core import ExpectationSuite, ExpectationConfiguration\n",
+ "\n",
+ "# Convert the 'trans_df' DataFrame to a Great Expectations DataFrame\n",
+ "ge_trans_df = ge.from_pandas(trans_df)\n",
+ "\n",
+ "# Retrieve the expectation suite associated with the ge DataFrame\n",
+ "expectation_suite_transactions = ge_trans_df.get_expectation_suite()\n",
+ "\n",
+ "# Set the expectation suite name to \"transactions_suite\"\n",
+ "expectation_suite_transactions.expectation_suite_name = \"transactions_suite\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8e420315",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check binary fraud_label column to be in set [0,1]\n",
+ "expectation_suite_transactions.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_distinct_values_to_be_in_set\",\n",
+ " kwargs={\n",
+ " \"column\": \"fraud_label\",\n",
+ " \"value_set\": [0, 1],\n",
+ " }\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Check amount column to be not negative\n",
+ "expectation_suite_transactions.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_values_to_be_between\",\n",
+ " kwargs={\n",
+ " \"column\": \"amount\",\n",
+ " \"min_value\": 0.0,\n",
+ " }\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Loop through specified columns ('tid', 'datetime', 'cc_num') and add expectations for null values\n",
+ "for column in ['tid', 'datetime', 'cc_num']:\n",
+ " expectation_suite_transactions.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_values_to_be_null\",\n",
+ " kwargs={\n",
+ " \"column\": column,\n",
+ " \"mostly\": 0.0,\n",
+ " }\n",
+ " )\n",
+ " )"
+ ]
+ },
{
"cell_type": "markdown",
"id": "21be72c5",
@@ -336,6 +406,7 @@
" description=\"Transaction data\",\n",
" primary_key=[\"cc_num\"],\n",
" event_time=\"datetime\",\n",
+ " expectation_suite=expectation_suite_transactions,\n",
")"
]
},
@@ -357,7 +428,10 @@
"outputs": [],
"source": [
"# Insert data into feature group\n",
- "trans_fg.insert(trans_df)"
+ "trans_fg.insert(\n",
+ " trans_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -431,7 +505,10 @@
"outputs": [],
"source": [
"# Insert data into feature group\n",
- "window_aggs_fg.insert(window_aggs_df)"
+ "window_aggs_fg.insert(\n",
+ " window_aggs_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -496,7 +573,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.10.13"
}
},
"nbformat": 4,
diff --git a/fraud_online/1_fraud_online_feature_pipeline.ipynb b/fraud_online/1_fraud_online_feature_pipeline.ipynb
index 63a98420..13edcf35 100755
--- a/fraud_online/1_fraud_online_feature_pipeline.ipynb
+++ b/fraud_online/1_fraud_online_feature_pipeline.ipynb
@@ -200,6 +200,122 @@
"trans_df.head(3)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "95d84907",
+ "metadata": {},
+ "source": [
+ "## 👮🏻♂️ Great Expectations "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7f02da7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import great_expectations as ge\n",
+ "from great_expectations.core import ExpectationSuite, ExpectationConfiguration\n",
+ "\n",
+ "# Convert the 'trans_df' DataFrame to a Great Expectations DataFrame\n",
+ "ge_trans_df = ge.from_pandas(trans_df)\n",
+ "\n",
+ "# Retrieve the expectation suite associated with the ge DataFrame\n",
+ "expectation_suite_transactions = ge_trans_df.get_expectation_suite()\n",
+ "\n",
+ "# Set the expectation suite name to \"transactions_suite\"\n",
+ "expectation_suite_transactions.expectation_suite_name = \"transactions_suite\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0dfd28b6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check binary fraud_label column to be in set [0,1]\n",
+ "expectation_suite_transactions.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_distinct_values_to_be_in_set\",\n",
+ " kwargs={\n",
+ " \"column\": \"fraud_label\",\n",
+ " \"value_set\": [0, 1],\n",
+ " }\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Check amount column to be not negative\n",
+ "expectation_suite_transactions.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_values_to_be_between\",\n",
+ " kwargs={\n",
+ " \"column\": \"amount\",\n",
+ " \"min_value\": 0.0,\n",
+ " }\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Loop through specified columns ('tid', 'datetime', 'cc_num') and add expectations for null values\n",
+ "for column in ['tid', 'datetime', 'cc_num']:\n",
+ " expectation_suite_transactions.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_values_to_be_null\",\n",
+ " kwargs={\n",
+ " \"column\": column,\n",
+ " \"mostly\": 0.0,\n",
+ " }\n",
+ " )\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd0eeba4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Convert the 'profiles_df' DataFrame to a Great Expectations DataFrame\n",
+ "ge_profiles_df = ge.from_pandas(profiles_df)\n",
+ "\n",
+ "# Retrieve the expectation suite associated with the ge DataFrame\n",
+ "expectation_suite_profiles = ge_profiles_df.get_expectation_suite()\n",
+ "\n",
+ "# Set the expectation suite name to \"profiles_suite\"\n",
+ "expectation_suite_profiles.expectation_suite_name = \"profiles_suite\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "08eddf29",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check binary gender column to be in set ['M', 'F']\n",
+ "expectation_suite_profiles.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_distinct_values_to_be_in_set\",\n",
+ " kwargs={\n",
+ " \"column\": \"gender\",\n",
+ " \"value_set\": ['M', 'F'],\n",
+ " }\n",
+ " )\n",
+ ")\n",
+ "# Check for Nulls\n",
+ "expectation_suite_profiles.add_expectation(\n",
+ " ExpectationConfiguration(\n",
+ " expectation_type=\"expect_column_values_to_be_null\",\n",
+ " kwargs={\n",
+ " \"column\": 'cc_num',\n",
+ " \"mostly\": 0.0,\n",
+ " }\n",
+ " )\n",
+ " )"
+ ]
+ },
{
"cell_type": "markdown",
"id": "1a7e126d",
@@ -277,6 +393,7 @@
" primary_key=['cc_num'],\n",
" event_time='datetime',\n",
" online_enabled=True,\n",
+ " expectation_suite=expectation_suite_transactions,\n",
")"
]
},
@@ -346,6 +463,7 @@
" description=\"Credit card holder demographic data\",\n",
" primary_key=['cc_num'],\n",
" online_enabled=True,\n",
+ " expectation_suite=expectation_suite_profiles,\n",
")\n",
"# Insert data into feature group\n",
"profile_fg.insert(\n",
@@ -463,7 +581,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.10.13"
}
},
"nbformat": 4,
diff --git a/loan_approval/1-loan-approval-feature-pipeline.ipynb b/loan_approval/1-loan-approval-feature-pipeline.ipynb
index 7654c220..f111f11c 100644
--- a/loan_approval/1-loan-approval-feature-pipeline.ipynb
+++ b/loan_approval/1-loan-approval-feature-pipeline.ipynb
@@ -914,8 +914,8 @@
" expectation_type=\"expect_column_values_to_be_between\",\n",
" kwargs={\n",
" \"column\":\"int_rate\", \n",
- " \"min_value\":\"-2.0\",\n",
- " \"max_value\":\"2000.0\",\n",
+ " \"min_value\":-2.0,\n",
+ " \"max_value\":2000.0,\n",
" }\n",
" )\n",
")"
@@ -1017,7 +1017,10 @@
"metadata": {},
"outputs": [],
"source": [
- "loans_fg.insert(loans_df)"
+ "loans_fg.insert(\n",
+ " loans_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -1027,7 +1030,10 @@
"metadata": {},
"outputs": [],
"source": [
- "applicants_fg.insert(applicants_df)"
+ "applicants_fg.insert(\n",
+ " applicants_df,\n",
+ " write_options={\"wait_for_job\": True},\n",
+ ")"
]
},
{
@@ -1109,7 +1115,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.10.13"
},
"papermill": {
"default_parameters": {},
diff --git a/quickstart.ipynb b/quickstart.ipynb
index 0bba9504..d9cb9546 100644
--- a/quickstart.ipynb
+++ b/quickstart.ipynb
@@ -258,7 +258,7 @@
"df_4h_mavg = df_4h_mavg.sort_index()\n",
"\n",
"# Moving standard deviation of transaction volume.\n",
- "df_4h_std = pd.DataFrame(cc_group.mean())\n",
+ "df_4h_std = pd.DataFrame(cc_group.std())\n",
"df_4h_std.columns = [\"trans_volume_mstd\", \"datetime\"]\n",
"df_4h_std = df_4h_std.reset_index(level=[\"cc_num\"])\n",
"df_4h_std = df_4h_std.drop(columns=[\"cc_num\", \"datetime\"])\n",
@@ -266,8 +266,8 @@
"df_4h_std = df_4h_std.sort_index()\n",
"window_aggs_df = df_4h_std.merge(df_4h_mavg, left_index=True, right_index=True)\n",
"\n",
- "# Moving average of transaction frequency.\n",
- "df_4h_count = pd.DataFrame(cc_group.mean())\n",
+ "# Moving transaction frequency.\n",
+ "df_4h_count = pd.DataFrame(cc_group.count())\n",
"df_4h_count.columns = [\"trans_freq\", \"datetime\"]\n",
"df_4h_count = df_4h_count.reset_index(level=[\"cc_num\"])\n",
"df_4h_count = df_4h_count.drop(columns=[\"cc_num\", \"datetime\"])\n",
@@ -1137,7 +1137,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.18"
+ "version": "3.9.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
diff --git a/scripts/cleanup-tutorials.ipynb b/scripts/cleanup-tutorials.ipynb
index c0272338..805c040e 100644
--- a/scripts/cleanup-tutorials.ipynb
+++ b/scripts/cleanup-tutorials.ipynb
@@ -233,52 +233,6 @@
" print(f\"Couldn't delete {fg} FG\")"
]
},
- {
- "cell_type": "markdown",
- "id": "b10b786c",
- "metadata": {},
- "source": [
- "## Cleanup Citibike"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "eec9cdda",
- "metadata": {},
- "outputs": [],
- "source": [
- " \n",
- "# Delete a model\n",
- "models=[\"citibike_xgb_model\"]\n",
- "for model in models:\n",
- " try:\n",
- " model = mr.get_model(f\"{model}\", version=1)\n",
- " model.delete()\n",
- " except:\n",
- " print(f\"Couldn't delete {model} model\")\n",
- "\n",
- "# Delete the feature_views before the feature groups\n",
- "\n",
- "fvs=[\"citibike_fv\"]\n",
- "\n",
- "for fv in fvs:\n",
- " try:\n",
- " feature_view = fs.get_feature_view(name=f\"{fv}\", version=1)\n",
- " feature_view.delete()\n",
- " except:\n",
- " print(f\"Couldn't delete {fv} feature view\")\n",
- "\n",
- "fgs=[\"citibike_usage\", \"citibike_stations_info\", \"us_holidays\", \"meteorological_measurements\"]\n",
- "\n",
- "for fg in fgs:\n",
- " try:\n",
- " fg = fs.get_feature_group(name=f\"{fg}\", version=1)\n",
- " fg.delete()\n",
- " except:\n",
- " print(f\"Couldn't delete {fg} FG\")"
- ]
- },
{
"cell_type": "markdown",
"id": "5c68d708",
diff --git a/scripts/test-notebooks.sh b/scripts/test-notebooks.sh
index 1a5d1e98..99e544ea 100755
--- a/scripts/test-notebooks.sh
+++ b/scripts/test-notebooks.sh
@@ -6,7 +6,6 @@ set -e
jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb
# Loan Approval
-jupyter nbconvert --to notebook --execute loan_approval/0-loan-approval-eda.ipynb
jupyter nbconvert --to notebook --execute loan_approval/1-loan-approval-feature-pipeline.ipynb
jupyter nbconvert --to notebook --execute loan_approval/2-loan-approval-training-pipeline.ipynb
jupyter nbconvert --to notebook --execute loan_approval/3-loan-approval-batch-inference.ipynb
@@ -32,9 +31,11 @@ jupyter nbconvert --to notebook --execute churn/1_churn_feature_pipeline.ipynb
jupyter nbconvert --to notebook --execute churn/2_churn_training_pipeline.ipynb
jupyter nbconvert --to notebook --execute churn/3_churn_batch_inference.ipynb
+# Remove any FGs, FVs, Models, Deployments
+jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb
+
# Great Expectations
jupyter nbconvert --to notebook --execute integrations/great_expectations/Great_Expectations_Hopsworks_Concepts.ipynb
-jupyter nbconvert --to notebook --execute integrations/great_expectations/fraud_batch_data_validation.ipynb
# Remove any FGs, FVs, Models, Deployments
jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb
@@ -42,15 +43,6 @@ jupyter nbconvert --to notebook --execute scripts/cleanup-tutorials.ipynb
# Advanced Tutorials
cd advanced_tutorials
-# Citibike
-jupyter nbconvert --to notebook --execute citibike/1_citibike_feature_backfill.ipynb
-jupyter nbconvert --to notebook --execute citibike/2_citibike_feature_pipeline.ipynb
-jupyter nbconvert --to notebook --execute citibike/3_citibike_training_pipeline.ipynb
-jupyter nbconvert --to notebook --execute citibike/4_citibike_batch_inference.ipynb
-
-# Remove any FGs, FVs, Models, Deployments
-jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb
-
# Credit Scores
jupyter nbconvert --to notebook --execute credit_scores/1_credit_scores_feature_backfill.ipynb
jupyter nbconvert --to notebook --execute credit_scores/2_credit_scores_feature_pipeline.ipynb
@@ -60,12 +52,6 @@ jupyter nbconvert --to notebook --execute credit_scores/4_credit_scores_batch_in
# Remove any FGs, FVs, Models, Deployments
jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb
-# Electricity
-jupyter nbconvert --to notebook --execute electricity/1_electricity_feature_backfill.ipynb
-jupyter nbconvert --to notebook --execute electricity/2_electricity_feature_pipeline.ipynb
-jupyter nbconvert --to notebook --execute electricity/3_electricity_training_pipeline.ipynb
-jupyter nbconvert --to notebook --execute electricity/4_electricity_batch_inference.ipynb
-
# Nyc Taxi Fares
jupyter nbconvert --to notebook --execute nyc_taxi_fares/1_nyc_taxi_fares_feature_backfill.ipynb
jupyter nbconvert --to notebook --execute nyc_taxi_fares/2_nyc_taxi_fares_feature_pipeline.ipynb
@@ -75,6 +61,15 @@ jupyter nbconvert --to notebook --execute nyc_taxi_fares/4_nyc_taxi_fares_batch_
# Remove any FGs, FVs, Models, Deployments
jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb
+# Electricity
+jupyter nbconvert --to notebook --execute electricity/1_electricity_feature_backfill.ipynb
+jupyter nbconvert --to notebook --execute electricity/2_electricity_feature_pipeline.ipynb
+jupyter nbconvert --to notebook --execute electricity/3_electricity_training_pipeline.ipynb
+jupyter nbconvert --to notebook --execute electricity/4_electricity_batch_inference.ipynb
+
+# Remove any FGs, FVs, Models, Deployments
+jupyter nbconvert --to notebook --execute ../scripts/cleanup-tutorials.ipynb
+
# Go to transformation_functions folder
cd transformation_functions