Skip to content
6 changes: 3 additions & 3 deletions .github/workflows/test_tutorials.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Execute Python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_38 }}
WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }}
WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY38 }}
run: ./scripts/test-notebooks.sh

test_tutorials39:
Expand All @@ -49,7 +49,7 @@ jobs:
- name: Execute Python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_39 }}
WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }}
WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY39 }}
run: ./scripts/test-notebooks.sh

test_tutorials310:
Expand All @@ -72,5 +72,5 @@ jobs:
- name: execute python workflows from bash script
env:
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY_310 }}
WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY }}
WEATHER_API_KEY: ${{ secrets.WEATHER_API_KEY310 }}
run: ./scripts/test-notebooks.sh
17 changes: 13 additions & 4 deletions advanced_tutorials/citibike/1_citibike_feature_backfill.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,10 @@
},
"outputs": [],
"source": [
"citibike_usage_fg.insert(df_enhanced)"
"citibike_usage_fg.insert(\n",
" df_enhanced,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -543,7 +546,10 @@
},
"outputs": [],
"source": [
"citibike_stations_info_fg.insert(df_stations_info)"
"citibike_stations_info_fg.insert(\n",
" df_stations_info,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand All @@ -569,7 +575,10 @@
"metadata": {},
"outputs": [],
"source": [
"us_holidays_fg.insert(df_holidays)"
"us_holidays_fg.insert(\n",
" df_holidays,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -634,7 +643,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that you have performed an array slicing on the read dataframe. From my understanding this was done so that the query in the training pipeline will executed (When I tried with the entire dataframes that query never finished executing). But maybe we can modify and optimize the query so that it runs instead of slicing the dataframe.

Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,11 @@
"metadata": {},
"outputs": [],
"source": [
"applications_df = pd.read_csv(\"https://repo.hops.works/dev/davit/credit_scores/applications.csv\")\n",
"applications_df.head()"
"applications_df = pd.read_csv(\n",
" \"https://repo.hops.works/dev/davit/credit_scores/applications.csv\",\n",
" parse_dates=['datetime'],\n",
")\n",
"applications_df.head(3)"
]
},
{
Expand Down Expand Up @@ -111,7 +114,9 @@
"metadata": {},
"outputs": [],
"source": [
"bureau_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/bureau_balances.csv')\n",
"bureau_balances_df = pd.read_csv(\n",
" 'https://repo.hops.works/dev/davit/credit_scores/bureau_balances.csv',\n",
")[:5_000]\n",
"bureau_balances_df.head(3)"
]
},
Expand Down Expand Up @@ -142,7 +147,10 @@
"metadata": {},
"outputs": [],
"source": [
"bureaus_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/bureaus.csv')\n",
"bureaus_df = pd.read_csv(\n",
" 'https://repo.hops.works/dev/davit/credit_scores/bureaus.csv',\n",
" parse_dates=['datetime'],\n",
")[:5_000]\n",
"bureaus_df.head(3)"
]
},
Expand Down Expand Up @@ -173,7 +181,9 @@
"metadata": {},
"outputs": [],
"source": [
"credit_card_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/credit_card_balances.csv')\n",
"credit_card_balances_df = pd.read_csv(\n",
" 'https://repo.hops.works/dev/davit/credit_scores/credit_card_balances.csv',\n",
")[:5_000]\n",
"credit_card_balances_df.head(3)"
]
},
Expand Down Expand Up @@ -204,7 +214,10 @@
"metadata": {},
"outputs": [],
"source": [
"installment_payments_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/installment_payments.csv')\n",
"installment_payments_df = pd.read_csv(\n",
" 'https://repo.hops.works/dev/davit/credit_scores/installment_payments.csv',\n",
" parse_dates=['datetime'],\n",
")[:5_000]\n",
"installment_payments_df.head(3)"
]
},
Expand Down Expand Up @@ -237,7 +250,9 @@
"metadata": {},
"outputs": [],
"source": [
"pos_cash_balances_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/pos_cash_balances.csv')\n",
"pos_cash_balances_df = pd.read_csv(\n",
" 'https://repo.hops.works/dev/davit/credit_scores/pos_cash_balances.csv'\n",
")[:5_000]\n",
"pos_cash_balances_df.head(3)"
]
},
Expand Down Expand Up @@ -270,7 +285,10 @@
"metadata": {},
"outputs": [],
"source": [
"previous_applications_df = pd.read_csv('https://repo.hops.works/dev/davit/credit_scores/previous_applications.csv')\n",
"previous_applications_df = pd.read_csv(\n",
" 'https://repo.hops.works/dev/davit/credit_scores/previous_applications.csv',\n",
" parse_dates=['datetime'],\n",
")[:5_000]\n",
"previous_applications_df.head(3)"
]
},
Expand Down Expand Up @@ -890,7 +908,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"metadata": {},
"outputs": [],
"source": [
"!pip install xgboost"
"!pip install xgboost --quiet"
]
},
{
Expand Down Expand Up @@ -619,7 +619,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,10 @@
},
"outputs": [],
"source": [
"meteorological_measurements_fg.insert(meteorological_measurements_df)"
"meteorological_measurements_fg.insert(\n",
" meteorological_measurements_df,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -306,7 +309,10 @@
},
"outputs": [],
"source": [
"electricity_prices_fg.insert(electricity_prices_df)"
"electricity_prices_fg.insert(\n",
" electricity_prices_df,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -376,7 +382,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,10 @@
" statistics_config=True,\n",
")\n",
"\n",
"rides_fg.insert(df_rides)"
"rides_fg.insert(\n",
" df_rides,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -243,7 +246,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
") \n",
"feature_group.insert(\n",
" df_original, \n",
" wait=True,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,10 @@
" primary_key=['city_name', 'date'],\n",
" online_enabled=True,\n",
") \n",
"feature_group.insert(df_original)"
"feature_group.insert(\n",
" df_original,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -409,6 +412,9 @@
" # Create a new DataFrame with the encoded values\n",
" encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])\n",
"\n",
" # Reset the index of the original DataFrame\n",
" data = data.reset_index(drop=True)\n",
"\n",
" # Concatenate the encoded DataFrame with the original DataFrame\n",
" data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)\n",
" \n",
Expand Down Expand Up @@ -789,7 +795,7 @@
"outputs": [],
"source": [
"# Initialise feature view to retrieve batch data\n",
"feature_view.init_batch_scoring(training_dataset_version=td_version)\n",
"feature_view.init_batch_scoring(1)\n",
"\n",
"# Retrieve batch data\n",
"batch_data = feature_view.get_batch_data()\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,10 @@
" primary_key=['city_name', 'date'],\n",
" online_enabled=True,\n",
") \n",
"feature_group.insert(df_original)"
"feature_group.insert(\n",
" df_original,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -411,6 +414,9 @@
" # Create a new DataFrame with the encoded values\n",
" encoded_df = pd.DataFrame(city_encoded, columns=one_hot_encoder.categories_[0])\n",
"\n",
" # Reset the index of the original DataFrame\n",
" data = data.reset_index(drop=True)\n",
"\n",
" # Concatenate the encoded DataFrame with the original DataFrame\n",
" data = pd.concat([data.drop('city_name', axis=1), encoded_df], axis=1)\n",
" \n",
Expand Down Expand Up @@ -869,7 +875,7 @@
"outputs": [],
"source": [
"# Initialise feature view to retrieve batch data\n",
"feature_view.init_batch_scoring(training_dataset_version=td_version)\n",
"feature_view.init_batch_scoring(1)\n",
"\n",
"# Retrieve batch data\n",
"batch_data = feature_view.get_batch_data()\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,10 @@
" primary_key=['city_name', 'date'],\n",
" online_enabled=True,\n",
") \n",
"feature_group.insert(df_original)"
"feature_group.insert(\n",
" df_original,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -619,7 +622,7 @@
"outputs": [],
"source": [
"# Initialise feature view to retrieve batch data\n",
"feature_view.init_batch_scoring(training_dataset_version=td_version)\n",
"feature_view.init_batch_scoring(1)\n",
"\n",
"# Retrieve batch data\n",
"batch_data = feature_view.get_batch_data()\n",
Expand Down
22 changes: 17 additions & 5 deletions churn/1_churn_feature_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,14 @@
"outputs": [],
"source": [
"demography_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/demography.csv\")\n",
"customer_info_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/customer_info.csv\")\n",
"subscriptions_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/subscriptions.csv\")"
"customer_info_df = pd.read_csv(\n",
" \"https://repo.hops.works/dev/davit/churn/customer_info.csv\",\n",
" parse_dates=['datetime'],\n",
")\n",
"subscriptions_df = pd.read_csv(\n",
" \"https://repo.hops.works/dev/davit/churn/subscriptions.csv\",\n",
" parse_dates=['datetime'],\n",
")"
]
},
{
Expand Down Expand Up @@ -226,7 +232,10 @@
"outputs": [],
"source": [
"# Insert data into feature group\n",
"customer_info_fg.insert(customer_info_df)"
"customer_info_fg.insert(\n",
" customer_info_df,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -268,7 +277,10 @@
" primary_key=['customerID'],\n",
")\n",
"# Insert data into feature group\n",
"demography_fg.insert(demography_df)"
"demography_fg.insert(\n",
" demography_df,\n",
" write_options={\"wait_for_job\": True},\n",
")"
]
},
{
Expand Down Expand Up @@ -382,7 +394,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
3 changes: 1 addition & 2 deletions churn/2_churn_training_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,6 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## <span style=\"color:#ff5f27;\">⏭️ **Next:** Part 03 </span>\n",
"\n",
"In the following notebook you will use your model for batch inference.\n",
Expand Down Expand Up @@ -464,7 +463,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.9.12"
}
},
"nbformat": 4,
Expand Down
Loading