In [1]:
from azure.ai.ml import MLClient
from azure.identity import ClientSecretCredential
from azure.mgmt.storage import StorageManagementClient
import os
from dotenv import load_dotenv
import subprocess
import time

# Load environment variables
load_dotenv()

print("="*60)
print("FIXING STORAGE AUTHORIZATION ISSUES")
print("="*60)

client_id = os.getenv("AZURE_CLIENT_ID")
client_secret = os.getenv("AZURE_CLIENT_SECRET")
tenant_id = os.getenv("AZURE_TENANT_ID")
subscription_id = os.getenv("SUBSCRIPTION_ID")
resource_group = os.getenv("RESOURCE_GROUP")
workspace_name = os.getenv("AZUREML_WORKSPACE_NAME")

print(f"Service Principal: {client_id}")
print(f"Workspace: {workspace_name}")
print()

# Create credential
credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

# Initialize ML Client
ml_client = MLClient(
    credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name
)


FIXING STORAGE AUTHORIZATION ISSUES
Service Principal: e77fcc8e-5551-47b6-a600-1d5633c81e31
Workspace: strategicai-mlw-uks-dev-01



In [2]:
# ==============================================================================
# FIX 1: Sync Storage Keys (Most Important)
# ==============================================================================
print("🔧 FIX 1: Syncing Storage Keys...")
print("-"*40)
print("This resolves stale SAS token issues")
print()

try:
    # Using Azure CLI (most reliable)
    result = subprocess.run(
        ["az", "ml", "workspace", "sync-keys",
         "--resource-group", resource_group,
         "--workspace-name", workspace_name],
        capture_output=True,
        text=True
    )
    
    if result.returncode == 0:
        print("✅ Storage keys synced successfully!")
        print("   Waiting 30 seconds for propagation...")
        time.sleep(30)
    else:
        print(f"⚠️ Failed to sync keys via CLI: {result.stderr}")
        print("   Try running manually:")
        print(f"   az ml workspace sync-keys --resource-group {resource_group} --workspace-name {workspace_name}")
        
except Exception as e:
    print(f"⚠️ Error syncing keys: {str(e)}")

🔧 FIX 1: Syncing Storage Keys...
----------------------------------------
This resolves stale SAS token issues

✅ Storage keys synced successfully!
   Waiting 30 seconds for propagation...


In [7]:
from azure.ai.ml import MLClient, command, Input, Output
from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import Environment, UserIdentityConfiguration
from azure.ai.ml.constants import AssetTypes
from azure.identity import ClientSecretCredential
import os
import subprocess
import json
from dotenv import load_dotenv
from pathlib import Path

# Load environment variables
load_dotenv()

print("="*60)
print("STORAGE ACCOUNT CHECK AND PIPELINE SUBMISSION")
print("="*60)

client_id = os.getenv("AZURE_CLIENT_ID")
client_secret = os.getenv("AZURE_CLIENT_SECRET")
tenant_id = os.getenv("AZURE_TENANT_ID")
subscription_id = os.getenv("SUBSCRIPTION_ID")
resource_group = os.getenv("RESOURCE_GROUP")
workspace_name = os.getenv("AZUREML_WORKSPACE_NAME")

# Create credential
credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

# Initialize ML Client
ml_client = MLClient(
    credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name
)

# ==============================================================================
# Check Storage Account Using Azure CLI (More Reliable)
# ==============================================================================
print("🔍 Checking Workspace Storage Configuration...")
print("-"*40)

try:
    # Use Azure CLI to get workspace details
    result = subprocess.run(
        ["az", "ml", "workspace", "show",
         "--workspace-name", workspace_name,
         "--resource-group", resource_group,
         "--query", "storage_account",
         "--output", "tsv"],
        capture_output=True,
        text=True
    )
    
    if result.returncode == 0:
        storage_resource_id = result.stdout.strip()
        # Extract storage account name from resource ID
        if storage_resource_id:
            storage_account_name = storage_resource_id.split('/')[-1]
            print(f"✓ Workspace uses storage account: {storage_account_name}")
            
            if storage_account_name != "strategicaistuksdev02":
                print(f"\n⚠️ IMPORTANT: Storage account mismatch!")
                print(f"   Error message mentions: strategicaistuksdev02")
                print(f"   But workspace uses: {storage_account_name}")
                print(f"\n   Make sure your SP has permissions on '{storage_account_name}':")
                print(f"""
az role assignment create \\
    --assignee {client_id} \\
    --role "Storage Blob Data Contributor" \\
    --scope {storage_resource_id}
                """)
            else:
                print("✓ Storage account matches the error message")
    else:
        print(f"Could not get storage account via CLI: {result.stderr}")
        
except Exception as e:
    print(f"Error checking via CLI: {str(e)}")

# Alternative method: Check datastores
print("\n🔍 Checking Datastores...")
print("-"*40)

try:
    # List all datastores
    datastores = list(ml_client.datastores.list())
    print(f"Found {len(datastores)} datastores:")
    
    for ds in datastores:
        print(f"  - {ds.name} (type: {ds.type})")
        
        # Try to get more details for blob datastores
        if ds.type == "AzureBlob":
            try:
                # Get the full datastore object
                full_ds = ml_client.datastores.get(name=ds.name)
                
                # Try different attribute names (SDK versions vary)
                storage_name = None
                if hasattr(full_ds, 'account_name'):
                    storage_name = full_ds.account_name
                elif hasattr(full_ds, 'storage_account_name'):
                    storage_name = full_ds.storage_account_name
                elif hasattr(full_ds.credentials, 'account_name'):
                    storage_name = full_ds.credentials.account_name
                    
                if storage_name:
                    print(f"    Storage: {storage_name}")
                    if ds.name == "workspaceblobstore":
                        print(f"    ^ This is the default datastore")
            except:
                pass
                
except Exception as e:
    print(f"Note: Could not get detailed datastore info: {str(e)}")


STORAGE ACCOUNT CHECK AND PIPELINE SUBMISSION
🔍 Checking Workspace Storage Configuration...
----------------------------------------

🔍 Checking Datastores...
----------------------------------------
Found 7 datastores:
  - landing_pa (type: AzureBlob)
    Storage: strategicaistuksdev02
  - azureml_landing (type: AzureBlob)
    Storage: strategicaistuksdev02
  - azureml_globaldatasets (type: AzureBlob)
    Storage: mmstorageuksouth
  - workspaceworkingdirectory (type: AzureFile)
  - workspaceblobstore (type: AzureBlob)
    Storage: strategicaistuksdev02
    ^ This is the default datastore
  - workspacefilestore (type: AzureFile)
  - workspaceartifactstore (type: AzureBlob)
    Storage: strategicaistuksdev02
