Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions application/single_app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
EXECUTOR_TYPE = 'thread'
EXECUTOR_MAX_WORKERS = 30
SESSION_TYPE = 'filesystem'
VERSION = "0.229.063"
VERSION = "0.229.098"


SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production')
Expand Down Expand Up @@ -121,9 +121,9 @@
CLIENTS_LOCK = threading.Lock()

ALLOWED_EXTENSIONS = {
'txt', 'pdf', 'docx', 'xlsx', 'xls', 'csv', 'pptx', 'html', 'jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif', 'heif', 'md', 'json',
'txt', 'pdf', 'doc', 'docm', 'docx', 'xlsx', 'xls', 'xlsm','csv', 'pptx', 'html', 'jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif', 'heif', 'md', 'json',
Copy link

Copilot AI Nov 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing space after comma between 'xlsm' and 'csv' in the ALLOWED_EXTENSIONS set. Should be 'xlsm', 'csv' for consistency with other items in the set.

Suggested change
'txt', 'pdf', 'doc', 'docm', 'docx', 'xlsx', 'xls', 'xlsm','csv', 'pptx', 'html', 'jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif', 'heif', 'md', 'json',
'txt', 'pdf', 'doc', 'docm', 'docx', 'xlsx', 'xls', 'xlsm', 'csv', 'pptx', 'html', 'jpg', 'jpeg', 'png', 'bmp', 'tiff', 'tif', 'heif', 'md', 'json',

Copilot uses AI. Check for mistakes.
'mp4', 'mov', 'avi', 'mkv', 'flv', 'mxf', 'gxf', 'ts', 'ps', '3gp', '3gpp', 'mpg', 'wmv', 'asf', 'm4a', 'm4v', 'isma', 'ismv',
'dvr-ms', 'wav'
'dvr-ms', 'wav', 'xml', 'yaml', 'yml', 'log'
}
ALLOWED_EXTENSIONS_IMG = {'png', 'jpg', 'jpeg'}
MAX_CONTENT_LENGTH = 5000 * 1024 * 1024 # 5000 MB AKA 5 GB
Expand Down
25 changes: 22 additions & 3 deletions application/single_app/functions_authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,34 @@ def get_valid_access_token_for_plugins(scopes=None):

def get_video_indexer_account_token(settings, video_id=None):
"""
For ARM-based VideoIndexer accounts:
Get Video Indexer access token using managed identity authentication.

This function authenticates with Azure Video Indexer using the App Service's
managed identity. The managed identity must have Contributor role on the
Video Indexer resource.

Authentication flow:
1. Acquire ARM access token using DefaultAzureCredential (managed identity)
2. Call ARM generateAccessToken API to get Video Indexer access token
3. Use Video Indexer access token for all API operations
"""
from functions_debug import debug_print

debug_print(f"[VIDEO INDEXER AUTH] Starting token acquisition using managed identity for video_id: {video_id}")
debug_print(f"[VIDEO INDEXER AUTH] Azure environment: {AZURE_ENVIRONMENT}")

return get_video_indexer_managed_identity_token(settings, video_id)

def get_video_indexer_managed_identity_token(settings, video_id=None):
"""
For ARM-based VideoIndexer accounts using managed identity:
1) Acquire an ARM token with DefaultAzureCredential
2) POST to the ARM generateAccessToken endpoint
3) Return the account-level accessToken
"""
from functions_debug import debug_print

debug_print(f"[VIDEO INDEXER AUTH] Starting token acquisition for video_id: {video_id}")
debug_print(f"[VIDEO INDEXER AUTH] Azure environment: {AZURE_ENVIRONMENT}")
debug_print(f"[VIDEO INDEXER AUTH] Using managed identity authentication")

# 1) ARM token
if AZURE_ENVIRONMENT == "usgovernment":
Expand Down
2 changes: 1 addition & 1 deletion application/single_app/functions_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def extract_table_file(file_path, file_ext):
try:
if file_ext == '.csv':
df = pandas.read_csv(file_path)
elif file_ext in ['.xls', '.xlsx']:
elif file_ext in ['.xls', '.xlsx', '.xlsm']:
df = pandas.read_excel(file_path)
else:
raise ValueError("Unsupported file extension for table extraction.")
Expand Down
1,248 changes: 1,162 additions & 86 deletions application/single_app/functions_documents.py

Large diffs are not rendered by default.

56 changes: 32 additions & 24 deletions application/single_app/functions_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,22 @@ def hybrid_search(query, user_id, document_id=None, top_n=12, doc_scope="all", a
select=["id", "chunk_text", "chunk_id", "file_name", "user_id", "version", "chunk_sequence", "upload_date", "document_classification", "page_number", "author", "chunk_keywords", "title", "chunk_summary"]
)

group_results = search_client_group.search(
search_text=query,
vector_queries=[vector_query],
filter=(
f"(group_id eq '{active_group_id}' or shared_group_ids/any(g: g eq '{active_group_id},approved')) and document_id eq '{document_id}'"
),
query_type="semantic",
semantic_configuration_name="nexus-group-index-semantic-configuration",
query_caption="extractive",
query_answer="extractive",
select=["id", "chunk_text", "chunk_id", "file_name", "group_id", "version", "chunk_sequence", "upload_date", "document_classification", "page_number", "author", "chunk_keywords", "title", "chunk_summary"]
)
# Only search group index if active_group_id is provided
if active_group_id:
group_results = search_client_group.search(
search_text=query,
vector_queries=[vector_query],
filter=(
f"(group_id eq '{active_group_id}' or shared_group_ids/any(g: g eq '{active_group_id},approved')) and document_id eq '{document_id}'"
),
query_type="semantic",
semantic_configuration_name="nexus-group-index-semantic-configuration",
query_caption="extractive",
query_answer="extractive",
select=["id", "chunk_text", "chunk_id", "file_name", "group_id", "version", "chunk_sequence", "upload_date", "document_classification", "page_number", "author", "chunk_keywords", "title", "chunk_summary"]
)
else:
group_results = []

# Get visible public workspace IDs from user settings
visible_public_workspace_ids = get_user_visible_public_workspace_ids_from_settings(user_id)
Expand Down Expand Up @@ -97,18 +101,22 @@ def hybrid_search(query, user_id, document_id=None, top_n=12, doc_scope="all", a
select=["id", "chunk_text", "chunk_id", "file_name", "user_id", "version", "chunk_sequence", "upload_date", "document_classification", "page_number", "author", "chunk_keywords", "title", "chunk_summary"]
)

group_results = search_client_group.search(
search_text=query,
vector_queries=[vector_query],
filter=(
f"(group_id eq '{active_group_id}' or shared_group_ids/any(g: g eq '{active_group_id},approved'))"
),
query_type="semantic",
semantic_configuration_name="nexus-group-index-semantic-configuration",
query_caption="extractive",
query_answer="extractive",
select=["id", "chunk_text", "chunk_id", "file_name", "group_id", "version", "chunk_sequence", "upload_date", "document_classification", "page_number", "author", "chunk_keywords", "title", "chunk_summary"]
)
# Only search group index if active_group_id is provided
if active_group_id:
group_results = search_client_group.search(
search_text=query,
vector_queries=[vector_query],
filter=(
f"(group_id eq '{active_group_id}' or shared_group_ids/any(g: g eq '{active_group_id},approved'))"
),
query_type="semantic",
semantic_configuration_name="nexus-group-index-semantic-configuration",
query_caption="extractive",
query_answer="extractive",
select=["id", "chunk_text", "chunk_id", "file_name", "group_id", "version", "chunk_sequence", "upload_date", "document_classification", "page_number", "author", "chunk_keywords", "title", "chunk_summary"]
)
else:
group_results = []

# Get visible public workspace IDs from user settings
visible_public_workspace_ids = get_user_visible_public_workspace_ids_from_settings(user_id)
Expand Down
7 changes: 5 additions & 2 deletions application/single_app/functions_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ def get_settings():
'number_of_historical_messages_to_summarize': 10,
'enable_summarize_content_history_beyond_conversation_history_limit': False,

# Multi-Modal Vision Analysis
'enable_multimodal_vision': False,
'multimodal_vision_model': '',

# Document Classification
'enable_document_classification': False,
'document_classification_categories': [
Expand Down Expand Up @@ -215,11 +219,10 @@ def get_settings():
'video_indexer_endpoint': video_indexer_endpoint,
'video_indexer_location': '',
'video_indexer_account_id': '',
'video_indexer_api_key': '',
'video_indexer_resource_group': '',
'video_indexer_subscription_id': '',
'video_indexer_account_name': '',
'video_indexer_arm_api_version': '2021-11-10-preview',
'video_indexer_arm_api_version': '2024-01-01',
'video_index_timeout': 600,

# Audio file settings with Azure speech service
Expand Down
Loading
Loading