Skip to content

Commit

Permalink
Merge branch 'geearl/6323-large-tables' of https://github.com/microso…
Browse files Browse the repository at this point in the history
…ft/PubSec-Info-Assistant into geearl/6323-large-tables
  • Loading branch information
georearl committed Jan 17, 2024
2 parents 4fdc07d + 0f8b348 commit c6792d7
Show file tree
Hide file tree
Showing 16 changed files with 155 additions and 96 deletions.
7 changes: 6 additions & 1 deletion app/enrichment/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,13 @@ def poll_queue() -> None:
i = 0

for chunk in chunks:
<<<<<<< HEAD
statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}", State.INDEXING)
# statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}", State.PROCESSING
=======

statusLog.update_document_state( blob_path, f"Indexing {i+1}/{len(chunks)}")
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
# open the file and extract the content
blob_path_plus_sas = utilities_helper.get_blob_and_sas(
ENV["AZURE_BLOB_STORAGE_CONTAINER"] + '/' + chunk.name)
Expand Down Expand Up @@ -430,7 +435,7 @@ def poll_queue() -> None:
backoff = random.randint(
int(ENV["EMBEDDING_REQUEUE_BACKOFF"]) * requeue_count, max_seconds)
queue_client.send_message(message_string, visibility_timeout=backoff)
statusLog.upsert_document(blob_path, f'Message requeued to embeddings queue, attempt {str(requeue_count)}. Visible in {str(backoff)} seconds. Error: {str(error)}.',
statusLog.upsert_document(blob_path, f'Message requed to embeddings queue, attempt {str(requeue_count)}. Visible in {str(backoff)} seconds. Error: {str(error)}.',
StatusClassification.ERROR,
State.QUEUED, additional_info={"Queue_Item": message_string })
else:
Expand Down
6 changes: 1 addition & 5 deletions app/frontend/src/api/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,10 @@ export type GetUploadStatusRequest = {
export const enum FileState {
All = "ALL",
Processing = "PROCESSING",
Indexing = "INDEXING",
Skipped = "SKIPPED",
Queued = "QUEUED",
Complete = "COMPLETE",
Error = "ERROR",
THROTTLED = "THROTTLED",
UPLOADED = "UPLOADED"
Error = "ERROR"
}


Expand Down Expand Up @@ -138,7 +135,6 @@ export const enum StatusLogClassification {
// shared code (functions/shared_code/status_log.py)
export const enum StatusLogState {
Processing = "Processing",
Indexing = "Indexing",
Skipped = "Skipped",
Queued = "Queued",
Complete = "Complete",
Expand Down
19 changes: 15 additions & 4 deletions app/frontend/src/components/FileStatus/DocumentsDetailList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -128,20 +128,28 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
ariaLabel: 'Column operations for state, Press to sort by states',
onColumnClick: onColumnClick,
data: 'string',
<<<<<<< HEAD
onRender: (item: IDocument) => (
<TooltipHost content={`${item.state} `}>
<span>{item.state}</span>
{item.state === 'Error' && <a href="javascript:void(0);" onClick={() => retryErroredFile(item)}> Retry File</a>}
</TooltipHost>
),
=======
onRender: (item: IDocument) => (
<TooltipHost content={`${item.state_description} `}>
<span>{item.state}</span>
</TooltipHost>
),
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
isPadded: true,
},
{
key: 'column4',
name: 'Submitted On',
fieldName: 'upload_timestamp',
minWidth: 90,
maxWidth: 120,
minWidth: 70,
maxWidth: 90,
isResizable: true,
isCollapsible: true,
ariaLabel: 'Column operations for submitted on date, Press to sort by submitted date',
Expand All @@ -156,8 +164,8 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
key: 'column5',
name: 'Last Updated',
fieldName: 'modified_timestamp',
minWidth: 90,
maxWidth: 120,
minWidth: 70,
maxWidth: 90,
isResizable: true,
isSorted: true,
isSortedDescending: false,
Expand All @@ -171,6 +179,7 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
return <span>{item.modified_timestamp}</span>;
},
},
<<<<<<< HEAD
{
key: 'column6',
name: 'Status Detail',
Expand All @@ -188,6 +197,8 @@ export const DocumentsDetailList = ({ items, onFilesSorted}: Props) => {
</TooltipHost>
),
}
=======
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
]);

return (
Expand Down
3 changes: 0 additions & 3 deletions app/frontend/src/components/FileStatus/FileStatus.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,8 @@ const dropdownFileStateOptions = [
{ key: FileState.Complete, text: 'Completed' },
{ key: FileState.Error, text: 'Error' },
{ key: FileState.Processing, text: 'Processing' },
{ key: FileState.Indexing, text: 'Indexing' },
{ key: FileState.Queued, text: 'Queued' },
{ key: FileState.Skipped, text: 'Skipped'},
{ key: FileState.UPLOADED, text: 'Uploaded'},
{ key: FileState.THROTTLED, text: 'Throttled'},
];

interface Props {
Expand Down
4 changes: 4 additions & 0 deletions docs/costestimator.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ The Azure pricing calculator helps estimate costs by considering the amount of d
---
### Azure Services

<<<<<<< HEAD
The following list of Azure Services will be deployed for IA Accelerator, version 1.0:
=======
The following list of Azure Services will be deployed for IA Accelerator, version 0.4 delta:
>>>>>>> 3faba43c904db668697c16df33a11c67629bd490
- App Service [:link:](https://azure.microsoft.com/en-ca/pricing/details/app-service/linux/)
- Azure Function(App Service plan) [:link:](https://azure.microsoft.com/en-ca/pricing/details/functions/#pricing)
Expand Down
16 changes: 16 additions & 0 deletions docs/deployment/autoscale_sku.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,31 @@ More information can be found [here.](https://azure.microsoft.com/en-us/pricing/
- **Capacity:** `1`

### Enrichment Message Dequeue Parameter
<<<<<<< HEAD
<<<<<<< HEAD
There exist a property that can be set in the local.env file called `DEQUEUE_MESSAGE_BATCH_SIZE` and is defaulted in the `infra/main.bicep` and `app/enrichment/app.py` to the value of **3**. This means the app will process 3 messages from the queue at a time. This is found to be the most opitmal with the existing configuration but can be increased if you also increase the enrichment app service SKU. It is important to note that there will be issues if it is increased more than the app service SKU can handle.
=======
There exist a property that can be set in the local.env file called `DEQUEUE_MESSAGE_BATCH_SIZE` and is defaulted in the `infra/main.bicep` and `app/enrichment/app.py` to the value of **3**. This means the app will process 3 messages from the queue at a time. This is found to be the most optimal with the existing configuration but can be increased if you also increase the enrichment app service SKU. It is important to note that there will be issues if it is increased more than the app service SKU can handle.
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
=======
There exist a property that can be set in the local.env file called `DEQUEUE_MESSAGE_BATCH_SIZE` and is defaulted in the `infra/main.bicep` and `app/enrichment/app.py` to the value of **3**. This means the app will process 3 messages from the queue at a time. This is found to be the most optimal with the existing configuration but can be increased if you also increase the enrichment app service SKU. It is important to note that there will be issues if it is increased more than the app service SKU can handle.
>>>>>>> 3faba43c904db668697c16df33a11c67629bd490
### Customization

To customize the App Service Plans SKU settings, modify the `sku` parameters in the specified Bicep file and run the `make deploy` or `make infrastructure`command.

This can also be adjusted in the Azure Portal.

<<<<<<< HEAD
<<<<<<< HEAD
**Note:** Adjusting the scale or Tier can cause outages until the redeployment occurrs.
=======
**Note:** Adjusting the scale or Tier can cause outages until the redeployment occurs.
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
=======
**Note:** Adjusting the scale or Tier can cause outages until the redeployment occurs.
>>>>>>> 3faba43c904db668697c16df33a11c67629bd490

### Steps to Scale Up:
Expand Down
17 changes: 17 additions & 0 deletions docs/deployment/statusdb_cosmos.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,31 @@
<<<<<<< HEAD
=======
# Investigating File Processing Errors in CosmosDB Logs

>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
## Navigating to Azure Resource Group and Opening Cosmos Account Resource

>1. Log in to the Azure portal.
>2. In the left-hand menu, click on "Resource groups".
>3. Select the desired resource group from the list.
>4. In the resource group overview, locate and click on the Cosmos account resource.
<<<<<<< HEAD
![Alt text](/docs/images/cosmos_account.png)
=======
![CosmosDB Azure Portal Blade View](/docs/images/cosmos_account.png)
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
## Accessing Data Explorer

>1. Once you are on the Cosmos account resource page, navigate to the left-hand menu.
>2. Under the "Settings" section, click on "Data Explorer".
<<<<<<< HEAD
![Alt text](/docs/images/data_explorer.png)
=======
![CosmosDB Azure Portal Data Explorer View](/docs/images/data_explorer.png)
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
## Expanding the Database

Expand All @@ -29,4 +42,8 @@
>1. Once you are on the "items" table page, you will see a list of items (documents) in the table.
>2. Each item represents a file being processed.
>3. Look for the "status" field to see the status of each file being processed.
<<<<<<< HEAD
>4. If there are any associated errors, they will be displayed in the "errors" field.
=======
>4. If there are any associated errors, they will be displayed in the "errors" field.
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
19 changes: 18 additions & 1 deletion docs/deployment/troubleshooting.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
# Troubleshooting

<<<<<<< HEAD

## Infrastructure Deployment
Please see below sections for troubleshooting the solution depending on what area of the process that is giving issue.

=======
## Infrastructure Deployment

Please see below sections for troubleshooting the solution depending on what area of the process that is giving issue.
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
If you are having issues with infrastructure deployment then the errors should be apparent in the make deploy output.

You can also navigate to the Subscription in Azure portal, click the option for "Deployments" and find your deployment and related details and errors there.

<<<<<<< HEAD
Take the full error and logs and post them to this github repo Issues tab with your configuration used.
=======
Take the full error and logs and post them to this GitHub repo Issues tab with your configuration used.
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
More info can be found [here](https://learn.microsoft.com/en-us/azure/azure-resource-manager/templates/deployment-history?tabs=azure-portal)

Expand All @@ -20,12 +30,19 @@ Check out this section for more details [CosmosDB Usage](/docs/deployment/status

For more information on how to use Cosmos, look [here](https://learn.microsoft.com/en-us/azure/cosmos-db/data-explorer).

<<<<<<< HEAD

=======
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
## Log Analytics Workbook

WebApp logs, Function logs and App Service logs can be found in Log Analytics Workspace.

There exist in this solution a workbook with default queries that can be used to explore and troubleshoot further.
Check out the section [Workbook Usage](/docs/deployment/worbook_usage.md).

For more information on log analytics and kusto query language, look [here](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/queries?tabs=groupby).
<<<<<<< HEAD
For more information on log analytics and kusto query language, look [here](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/queries?tabs=groupby).
=======
For more information on log analytics and Kusto query language, look [here](https://learn.microsoft.com/en-us/azure/azure-monitor/logs/queries?tabs=groupby).
>>>>>>> c3dca962f39fa8834aa70895953aef3409b92870
Binary file removed docs/images/frontend-watch.png
Binary file not shown.
Binary file removed docs/images/vite-debug.png
Binary file not shown.
Binary file removed docs/images/webapp-backend.png
Binary file not shown.
23 changes: 17 additions & 6 deletions docs/knownissues.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ Error: This subscription cannot create CognitiveServices until you agree to Resp
***IMPORTANT:*** In some instances, an older subscription that has already had the "Responsible AI Notice" accepted for the old "Cognitive Services multi-service account" may require this process to be repeated for the new "Azure AI Service".
<<<<<<< HEAD
=======
***IMPORTANT:*** In some instances, an older subscription that has already had the "Responsible AI Notice" accepted for the old "Cognitive Services multi-service account" may require this process to be repeated for the new "Azure AI Service".
>>>>>>> 3faba43c904db668697c16df33a11c67629bd490
---
## Error "Your administrator has configured the application infoasst_web_access_xxxxx to block users..."
Expand Down Expand Up @@ -105,25 +108,32 @@ InvalidApiSetId - The account type 'OpenAI' is either invalid or unavailable in
### Solution:
Deploy Azure OpenAI Service only in the supported regions. Review the local.env file and update the location as per supported models and [region availability](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#model-summary-table-and-region-availability)
## Error: jq parse error: Expected value before ','
If you see a jq parse error while doing deployments, it means one of the makefile scripts to extract environment variables is failing to find a value it expects to be there. The files related would be the main.parameters.json file which is the variables from bicep output from the infrastructure create. The other would be the env file used during build and deploy time
### Solution:
To resolve carefully check your deployment .env file for any missing but required values. There are rare times when ARM has issues and output values are not written. In which case simply double check your configuration and rerun the ```make deploy``` and/or ```make extract-env``` command so that the bicep outputs can be written again
## Error: Creation of new Media Service accounts are not allowed as the resource has been deprecated
## Error:
### Solution:
Media Services is scheduled for 30th June 2024. This is the [guide](https://learn.microsoft.com/en-us/azure/media-services/latest/azure-media-services-retirement). On deeper investigation Video Indexer, which is the service we use that sits on top of Media Services, will switch away from this before the end date....
If you encounter an error similar to the one below that indicates your device must be managed.
```
Is Azure Video Indexer being retired?
No, Azure Video Indexer isn't part of the Media Services retirement. Although Video Indexer currently relies on a Media Services account as part of its workflow, this dependency will be eliminated before Media Services is retired on June 30, 2024. See the following for more [impact of Media Services retirement for Video Indexer](https://aka.ms/vi-ams-retirement-announcement)
ERROR: AADSTS530003: Your device is required to be managed to access this resource.
Trace ID: xxxxxxxx-xxxx-xxxx-xxxxxxxx
Correlation ID: xxxxxxxx-xxxx-xxxx-xxxxxxxx
Timestamp: 2023-10-05 19:54:05Z
Interactive authentication is needed. Please run:
az login --scope https://graph.microsoft.com//.default
make: *** [Makefile:18: infrastructure] Error 1
```
As of today, Video Indexer still requires a Media Services service to be created, and so we can't remove it from bicep deployment. We will need to assess closer to the date if VI is working without the service and we can then remove the dependency.
### Solution
<<<<<<< HEAD
=======
The error is interesting as it seems to indicate the media service cannot be created. This is not the case, it does work in regions where VI and Media Services are available. I have updated this to an enhancement and we will add a ticket to the board to action this when VI can be deployed without this supporting service.
## Error: Token limit often exceeded with PDF files
Expand Down Expand Up @@ -185,4 +195,5 @@ make: *** [Makefile:18: infrastructure] Error 1
### Solution
>>>>>>> 3faba43c904db668697c16df33a11c67629bd490
You will need to open your GitHub Codespaces in VSCode on your managed device. Please read more about opening your [GitHub Codespaces using VSCode](/docs/deployment/developing_in_a_codespaces.md#using-github-codespaces-in-visual-studio-code).
18 changes: 5 additions & 13 deletions docs/webapp_debug.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,15 @@ The app consists of two layers, namely the frontend user interface components an

To debug the webapp, both frontend and backend, first set breakpoints in your code under the frontend and/or backend. Select the 'Run & Debug' tab from the sidebar in VS Code. Select `Python: WebApp backend` from the dropdown and hit run. This will initiate local debugging of the backend code.

Next verify you have a virtual environment created, which should be seen as a folder called .venv under the root of your workspace. If this doesn't exists you can create one by following these steps:
![backend debugging](/docs/images/webapp_debug_1.png)

1. Opening the command palette (Ctrl+Shift+P)
1. Select the command Python: Create Environment
1. Next select Venv
1. Now select the latest version of Python from the list
1. Finally enter check marks next to all requirements.txt files listed and hit OK
Next, you will need to initiate debugging of the frontend code. To do this select 'Vite: Debug' from the drop down and hit run.

This will initiate frontend running and debugging. A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, your code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend (flask or vite) from the debug dropdown.

Now initiate debugging of the front end code by selecting 'Frontend: watch' and then hitting run
![backend debugging](/docs/images/frontend-watch.png)
![frontend debugging](/docs/images/webapp_debug_2.png)

Finally hit Vite: Debug
![backend debugging](/docs/images/vite-debug.png)
This will initiate frontend running and debugging. A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, your code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend (flask or vite) from the debug dropdown.

A browser will open and show the web app running under localhost:5000. Next proceed to interact with the web app, by asking a question. In the VS Code interface, you code will hit the breakpoints, frontend or backend, and you will be able to view variable, trace logic etc. You can switch between the two running debuggers by selecting frontend or backend (flask or vite) from the debug dropdown.
![frontend debugging](/docs/images/webapp_debug_3.png)

## Known Issues

Expand Down
2 changes: 1 addition & 1 deletion functions/TextEnrichment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def main(msg: func.QueueMessage) -> None:

statusLog.upsert_document(
blob_path,
f"{FUNCTION_NAME} - Text enrichment is complete, message sent to embeddings queue",
f"{FUNCTION_NAME} - Text enrichment is complete",
StatusClassification.DEBUG,
State.QUEUED,
)
Expand Down

0 comments on commit c6792d7

Please sign in to comment.