# EDC-MCP-LLM - Notebook di Test

**Autore:** Lorenzo - Principal Data Architect @ NTT Data Italia

Notebook per testare interattivamente le funzionalita del progetto EDC-MCP-LLM.

**Prerequisiti:**
- Essere nella directory `lineageAI/notebooks/`
- Aver attivato il venv del progetto
- Aver configurato `.env` con le credenziali EDC

---

## 1. Setup e Import

In [1]:
# Setup path per importare i moduli del progetto
import sys
from pathlib import Path

# Aggiungi la root del progetto al path
PROJECT_ROOT = Path.cwd().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print(f"Project root: {PROJECT_ROOT}")
print(f"Python: {sys.executable}")

Project root: c:\Dev\ai-training\lineageAI
Python: c:\Dev\ai-training\lineageAI\venv\Scripts\python.exe


In [2]:
# Per eseguire codice async in Jupyter
import nest_asyncio
nest_asyncio.apply()

import asyncio
import warnings
warnings.filterwarnings('ignore')

print("[OK] nest_asyncio attivato")

[OK] nest_asyncio attivato


In [3]:
# Import dei moduli del progetto
from src.config.settings import settings, LLMProvider
from src.edc.client import EDCClient
from src.edc.lineage import LineageBuilder
from src.edc.models import TreeNode, LineageDirection
from src.llm.factory import LLMFactory, LLMConfig

print("[OK] Moduli importati")
print(f"    EDC URL: {settings.edc_base_url}")
print(f"    LLM Provider default: {settings.default_llm_provider.value}")

[OK] Settings loaded successfully from .env
   EDC URL: https://edc.servizi.allitude.it:9086/access
[OK] Moduli importati
    EDC URL: https://edc.servizi.allitude.it:9086/access
    LLM Provider default: gemma3


## 2. Inizializzazione Client

In [4]:
# Inizializza EDC Client
edc_client = EDCClient()
print("[OK] EDC Client inizializzato")

# Inizializza LineageBuilder
lineage_builder = LineageBuilder()
print("[OK] LineageBuilder inizializzato")

[OK] EDC Client inizializzato
[OK] LineageBuilder inizializzato


In [5]:
# Inizializza LLM Client
# Puoi cambiare provider: TINYLLAMA, CLAUDE, GEMMA3
CURRENT_LLM_PROVIDER = settings.default_llm_provider

def init_llm_client(provider: LLMProvider = None):
    """Inizializza o cambia il client LLM."""
    global llm_client, CURRENT_LLM_PROVIDER
    
    if provider:
        CURRENT_LLM_PROVIDER = provider
    
    if CURRENT_LLM_PROVIDER == LLMProvider.TINYLLAMA:
        config = LLMConfig(
            provider=LLMProvider.TINYLLAMA,
            model_name=settings.tinyllama_model,
            base_url=settings.tinyllama_base_url,
            max_tokens=settings.tinyllama_max_tokens,
            temperature=settings.tinyllama_temperature
        )
    elif CURRENT_LLM_PROVIDER == LLMProvider.GEMMA3:
        config = LLMConfig(
            provider=LLMProvider.GEMMA3,
            model_name=settings.gemma3_model,
            base_url=settings.gemma3_base_url,
            max_tokens=settings.gemma3_max_tokens,
            temperature=settings.gemma3_temperature
        )
    elif CURRENT_LLM_PROVIDER == LLMProvider.CLAUDE:
        config = LLMConfig(
            provider=LLMProvider.CLAUDE,
            model_name=settings.claude_model,
            api_key=settings.claude_api_key,
            max_tokens=settings.claude_max_tokens,
            temperature=settings.claude_temperature
        )
    else:
        raise ValueError(f"Provider non supportato: {CURRENT_LLM_PROVIDER}")
    
    llm_client = LLMFactory.create_llm_client(config)
    print(f"[OK] LLM Client: {type(llm_client).__name__} ({CURRENT_LLM_PROVIDER.value})")
    return llm_client

llm_client = init_llm_client()

[OK] LLM Client: Gemma3Client (gemma3)


---

## 3. Funzioni di Test

### 3.1 Search Assets

In [6]:
async def search_assets(
    resource_name: str,
    name_filter: str = "",
    asset_type: str = "com.infa.ldm.relational.Table",
    max_results: int = 10
):
    """
    Cerca asset nel catalogo EDC.
    
    Args:
        resource_name: Nome della risorsa EDC (es: DataPlatform)
        name_filter: Filtro sul nome (case-insensitive)
        asset_type: Tipo di asset (Table, View, Column, etc.)
        max_results: Numero massimo di risultati
    """
    print(f"\n{'='*60}")
    print(f"SEARCH ASSETS")
    print(f"{'='*60}")
    print(f"Resource: {resource_name}")
    print(f"Filter: {name_filter or '(nessuno)'}")
    print(f"Type: {asset_type}")
    print(f"{'='*60}\n")
    
    results = await edc_client.bulk_search_assets(
        resource_name=resource_name,
        name_filter=name_filter if name_filter else None,
        asset_type_filter=asset_type
    )
    
    results = results[:max_results]
    
    print(f"Trovati {len(results)} asset:\n")
    
    for i, asset in enumerate(results, 1):
        print(f"{i}. {asset.get('name', 'N/A')}")
        print(f"   Type: {asset.get('classType', 'N/A')}")
        print(f"   ID: {asset.get('id', 'N/A')}")
        print()
    
    return results

In [7]:
# TEST: Cerca tabelle con GARANZIE nel nome
results = await search_assets(
    resource_name="DataPlatform",
    name_filter="GARANZIE",
    asset_type="com.infa.ldm.relational.Table",
    max_results=10
)

INFO:edc_client:Bulk search: {'resourceName': 'DataPlatform', 'classTypes': 'com.infa.ldm.relational.Table', 'facts': 'id,core.name,core.classType', 'includeRefObjects': 'true'}



SEARCH ASSETS
Resource: DataPlatform
Filter: GARANZIE
Type: com.infa.ldm.relational.Table



INFO:edc_client:Response status: 200
INFO:edc_client:Response Content-Type: application/octet-stream;charset=UTF-8
INFO:edc_client:Received 546540 characters
INFO:edc_client:Parsed 6422 items from CSV
INFO:edc_client:Filtered to 56 items


Trovati 10 asset:

1. DIM_GARANZIE_STATALI
   Type: Table
   ID: DataPlatform://ORAP51/DATAMARTREP/DIM_GARANZIE_STATALI

2. STTB_SBS_RIPARTO_GARANZIE_SDV
   Type: Table
   ID: DataPlatform://ORAP51/DHSTAGE/STTB_SBS_RIPARTO_GARANZIE_SDV

3. STTB_SID_GARANZIE_INGRESSO_DEFAULT_STORICO
   Type: Table
   ID: DataPlatform://ORAP51/DHSTAGE/STTB_SID_GARANZIE_INGRESSO_DEFAULT_STORICO

4. STTB_SBS_MIS_COLL_GARANZIE_RAPPORTI_ANACREDIT
   Type: Table
   ID: DataPlatform://ORAP51/DHSTAGE/STTB_SBS_MIS_COLL_GARANZIE_RAPPORTI_ANACREDIT

5. STTB_SBS_ANAGRAFICA_GARANZIE_SDV
   Type: Table
   ID: DataPlatform://ORAP51/DHSTAGE/STTB_SBS_ANAGRAFICA_GARANZIE_SDV

6. STTB_SBS_MIS_GARANZIE_ANACREDIT
   Type: Table
   ID: DataPlatform://ORAP51/DHSTAGE/STTB_SBS_MIS_GARANZIE_ANACREDIT

7. STTB_SID_GARANZIE_INGRESSO_SOFFERENZA_STORICO
   Type: Table
   ID: DataPlatform://ORAP51/DHSTAGE/STTB_SID_GARANZIE_INGRESSO_SOFFERENZA_STORICO

8. CREDMTTP_FIDO_GARANZIE
   Type: Table
   ID: DataPlatform://ORAP51/DWHEVO/CREDMT

In [8]:
# TEST: Cerca View
results = await search_assets(
    resource_name="DataPlatform",
    name_filter="GARANZIE",
    asset_type="com.infa.ldm.relational.View",
    max_results=5
)

INFO:edc_client:Bulk search: {'resourceName': 'DataPlatform', 'classTypes': 'com.infa.ldm.relational.View', 'facts': 'id,core.name,core.classType', 'includeRefObjects': 'true'}



SEARCH ASSETS
Resource: DataPlatform
Filter: GARANZIE
Type: com.infa.ldm.relational.View



INFO:edc_client:Response status: 200
INFO:edc_client:Response Content-Type: application/octet-stream;charset=UTF-8
INFO:edc_client:Received 661900 characters
INFO:edc_client:Parsed 7342 items from CSV
INFO:edc_client:Filtered to 124 items


Trovati 5 asset:

1. AMLVW_AS_GARANZIE
   Type: View
   ID: DataPlatform://ORAP51/DATALAYER/AMLVW_AS_GARANZIE

2. AMLVW_AS_CODICIGARANZIE
   Type: View
   ID: DataPlatform://ORAP51/DATALAYER/AMLVW_AS_CODICIGARANZIE

3. AMLVW_GARANZIE
   Type: View
   ID: DataPlatform://ORAP51/DATALAYER/AMLVW_GARANZIE

4. AMLVW_GARANZIEFINEMESE
   Type: View
   ID: DataPlatform://ORAP51/DATALAYER/AMLVW_GARANZIEFINEMESE

5. AMLVW_GARANZIEDIRITTI
   Type: View
   ID: DataPlatform://ORAP51/DATALAYER/AMLVW_GARANZIEDIRITTI



### 3.2 Get Asset Details

In [9]:
async def get_asset_details(asset_id: str, enhance_with_ai: bool = True):
    """
    Recupera dettagli di un asset con enhancement AI opzionale.
    """
    print(f"\n{'='*60}")
    print(f"GET ASSET DETAILS")
    print(f"{'='*60}")
    print(f"Asset: {asset_id}")
    print(f"AI Enhancement: {enhance_with_ai}")
    print(f"{'='*60}\n")
    
    details = await edc_client.get_asset_details(asset_id)
    
    print(f"Nome: {details['name']}")
    print(f"Tipo: {details['classType']}")
    print(f"Descrizione: {details['description'] or '(nessuna)'}")
    print(f"\nUpstream links: {len(details['src_links'])}")
    print(f"Downstream links: {len(details['dst_links'])}")
    print(f"Facts: {len(details['facts'])} items")
    
    if details['src_links']:
        print(f"\nUpstream (prime 5):")
        for link in details['src_links'][:5]:
            print(f"  <- {link['name']} ({link['classType']})")
    
    if details['dst_links']:
        print(f"\nDownstream (prime 5):")
        for link in details['dst_links'][:5]:
            print(f"  -> {link['name']} ({link['classType']})")
    
    if enhance_with_ai and llm_client:
        print(f"\n--- AI Enhancement ({CURRENT_LLM_PROVIDER.value}) ---")
        enhanced = await llm_client.enhance_description(
            asset_name=details['name'],
            technical_desc=details['description'],
            schema_context="",
            column_info=[]
        )
        print(f"\n{enhanced}")
    
    return details

In [10]:
# TEST: Dettagli asset
asset_id = "DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO"

details = await get_asset_details(asset_id, enhance_with_ai=True)

INFO:edc_client:Fetching asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO



GET ASSET DETAILS
Asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
AI Enhancement: True



INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: IMP_ACCANTONAMENTO (Unknown) - 2 src, 4 dst


Nome: IMP_ACCANTONAMENTO
Tipo: Unknown
Descrizione: (nessuna)

Upstream links: 2
Downstream links: 4
Facts: 35 items

Upstream (prime 5):
  <- IMP_ACCANTONAMENTO (com.infa.ldm.relational.Column)
  <- ACCMLTD_IMPRMNT_INSTRMNT (com.infa.ldm.relational.Column)

Downstream (prime 5):
  -> ACCMLTD_IMPRMNT_INSTRMNT (com.infa.ldm.relational.ViewColumn)
  -> IMP_ACCANTONAMENTO (com.infa.ldm.relational.Column)
  -> IMP_ACCANTONAMENTO (com.infa.ldm.relational.ViewColumn)
  -> ACCMLTD_IMPRMNT_INSTRMNT (com.infa.ldm.relational.ViewColumn)

--- AI Enhancement (gemma3) ---

L'asset IMP_ACCANTONAMENTO rappresenta il repository centralizzato dei dati relativi all'accantonamento di risorse aziendali, inclusi investimenti, fondi di riserva e allocazioni specifiche per progetti strategici. Il suo scopo principale è garantire la visibilità e il controllo su tali risorse, supportando una gestione finanziaria accurata e trasparente.  Questo asset fornisce valore business attraverso una migliore pianificazio

### 3.3 Get Immediate Lineage

In [11]:
async def get_immediate_lineage(asset_id: str, direction: str = "both"):
    """
    Recupera lineage immediato (1 livello).
    
    Args:
        asset_id: ID dell'asset
        direction: "upstream", "downstream", "both"
    """
    print(f"\n{'='*60}")
    print(f"GET IMMEDIATE LINEAGE")
    print(f"{'='*60}")
    print(f"Asset: {asset_id}")
    print(f"Direction: {direction}")
    print(f"{'='*60}\n")
    
    lineage = await lineage_builder.get_immediate_lineage(asset_id, direction)
    
    upstream = [l for l in lineage if l['direction'] == 'upstream']
    downstream = [l for l in lineage if l['direction'] == 'downstream']
    
    if direction in ['upstream', 'both']:
        print(f"UPSTREAM: {len(upstream)} asset")
        print("-" * 40)
        for link in upstream:
            print(f"  <- {link['name']}")
            print(f"     Type: {link['classType']}")
            print(f"     ID: {link['asset_id']}")
            print()
    
    if direction in ['downstream', 'both']:
        print(f"\nDOWNSTREAM: {len(downstream)} asset")
        print("-" * 40)
        for link in downstream:
            print(f"  -> {link['name']}")
            print(f"     Type: {link['classType']}")
            print(f"     ID: {link['asset_id']}")
            print()
    
    return lineage

In [13]:
# TEST: Lineage immediato
asset_id = "DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO"

lineage = await get_immediate_lineage(asset_id, direction="both")

INFO:edc_client:Fetching asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: IMP_ACCANTONAMENTO (Unknown) - 2 src, 4 dst



GET IMMEDIATE LINEAGE
Asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
Direction: both

UPSTREAM: 2 asset
----------------------------------------
  <- IMP_ACCANTONAMENTO
     Type: com.infa.ldm.relational.Column
     ID: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO

  <- ACCMLTD_IMPRMNT_INSTRMNT
     Type: com.infa.ldm.relational.Column
     ID: DataPlatform://ORAP51/DWHEVO/OSIWRKTBP_LDT_INSTR_PRELAV_06/ACCMLTD_IMPRMNT_INSTRMNT


DOWNSTREAM: 4 asset
----------------------------------------
  -> ACCMLTD_IMPRMNT_INSTRMNT
     Type: com.infa.ldm.relational.ViewColumn
     ID: DataPlatform://ORAP51/OSI/LDT_SME_INSTRUMENT/ACCMLTD_IMPRMNT_INSTRMNT

  -> IMP_ACCANTONAMENTO
     Type: com.infa.ldm.relational.Column
     ID: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO

  -> IMP_ACCANTONAMENTO
     Type: com.infa.ldm.relational.ViewColumn
     ID: DataPlatform://ORAP51/OSI/LDT_REP_INSTRUMENT/IMP_ACCANTONAMENTO

  -> ACCMLTD_IMPRMN

### 3.4 Build Lineage Tree

In [14]:
def print_tree(node: TreeNode, indent: int = 0):
    """Stampa albero in modo leggibile."""
    prefix = "  " * indent
    type_short = node.class_type.split('.')[-1] if '.' in node.class_type else node.class_type
    print(f"{prefix}[{node.code}] {node.name} ({type_short})")
    for child in node.children:
        print_tree(child, indent + 1)


async def build_lineage_tree(asset_id: str, max_depth: int = 3):
    """
    Costruisce albero di lineage upstream.
    """
    print(f"\n{'='*60}")
    print(f"BUILD LINEAGE TREE")
    print(f"{'='*60}")
    print(f"Asset: {asset_id}")
    print(f"Max Depth: {max_depth}")
    print(f"{'='*60}\n")
    
    import time
    start = time.time()
    
    # Pulisci cache per evitare cicli da run precedenti
    lineage_builder.clear_cache()
    
    root = await lineage_builder.build_tree(
        node_id=asset_id,
        code="001",
        depth=0,
        max_depth=max_depth
    )
    
    elapsed = time.time() - start
    
    if root:
        stats = root.get_statistics()
        print(f"Costruito in {elapsed:.2f}s")
        print(f"\nStatistiche:")
        print(f"  - Nodi totali: {stats['total_nodes']}")
        print(f"  - Profondita max: {stats['max_depth']}")
        print(f"  - Nodi terminali: {stats['terminal_nodes']}")
        print(f"\nStruttura albero:\n")
        print_tree(root)
    else:
        print("Nessun lineage trovato.")
    
    return root

In [15]:
# TEST: Costruisci albero lineage
asset_id = "DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO"

tree = await build_lineage_tree(asset_id, max_depth=5)

INFO:edc_client:Cache cleared
INFO:edc_client:Fetching asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: IMP_ACCANTONAMENTO (Unknown) - 2 src, 4 dst
INFO:edc_client:Fetching asset: DataPlatform://ORAP51/DWHEVO/OSIWRKTBP_LDT_INSTR_PRELAV_06/ACCMLTD_IMPRMNT_INSTRMNT



BUILD LINEAGE TREE
Asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
Max Depth: 5



INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: ACCMLTD_IMPRMNT_INSTRMNT (Unknown) - 7 src, 1 dst
INFO:edc_client:Fetching asset: DataPlatform://ORAP51/S2A/GBK_DATI_TRIMESTRALI_CREDITI_ALL/IMP_ACCANTON_DUBBIO_ESITO_EUR
INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: IMP_ACCANTON_DUBBIO_ESITO_EUR (Unknown) - 1 src, 4 dst
INFO:edc_client:Fetching asset: DataPlatform://ORAP51/S2A/GBK_DATI_TRIMESTRALI_CREDITI_T/IMP_ACCANTON_DUBBIO_ESITO_EUR
INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: IMP_ACCANTON_DUBBIO_ESITO_EUR (Unknown) - 2 src, 1 dst
INFO:edc_client:Fetching asset: DataPlatform_SP_S2A_PKG$$AdvancedDatabaseObjectsScanner$$SRVPS2A/DWHTRANSITO://~proxy~/WRK_STOVW_CREODSTIMPEGNIMARGINITRIMESTRALE/IMP_ACCANTONAMENTO_DUBBIO
INFO:edc_client:Response status: 200
INFO:edc_client:Asset found: IMP_ACCANTONAMENTO_DUBBIO (Unknown) - 0 src, 2 dst
INFO:edc_client:Fetching asset: DataPlatform_SP_S2A_PKG$$AdvancedDatabaseObjectsScanner$$SRVPS2A/

Costruito in 2.68s

Statistiche:
  - Nodi totali: 22
  - Profondita max: 5
  - Nodi terminali: 9

Struttura albero:

[001] IMP_ACCANTONAMENTO (Unknown)
  [001002] ACCMLTD_IMPRMNT_INSTRMNT (Unknown)
    [001002001] IMP_ACCANTON_DUBBIO_ESITO_EUR (Unknown)
      [001002001001] IMP_ACCANTON_DUBBIO_ESITO_EUR (Unknown)
        [001002001001001] IMP_ACCANTONAMENTO_DUBBIO (Unknown)
        [001002001001002] IMP_ACCANTONAMENTO_DUBBIO (Unknown)
    [001002002] SVALUTAZIONE_SENZA_TEMPO (Unknown)
      [001002002001] RA5FSC (Unknown)
    [001002003] PERDITA_ATTUALIZZAZIONE (Unknown)
      [001002003001] RA5FPA (Unknown)
    [001002004] IMPORTO_GAIN_LOSS_POCI (Unknown)
      [001002004001] RA5LGP (Unknown)
    [001002005] MARGINE (Unknown)
      [001002005001] MARGINE (Unknown)
        [001002005001001] MARGINE (Unknown)
    [001002006] COD_FIDO_SEGNALAZIONE (Unknown)
      [001002006001] COD_FIDO_SEGNALAZIONE (Unknown)
        [001002006001001] COD_FIDO_SEGNALAZIONE (Unknown)
    [001002007] IMP_A

### 3.5 Analyze Change Impact

In [16]:
async def analyze_change_impact(
    asset_id: str,
    change_type: str,
    change_description: str
):
    """
    Analizza l'impatto di una modifica usando AI.
    
    Args:
        asset_id: Asset da modificare
        change_type: column_drop, data_type_change, deprecation, schema_change, etc.
        change_description: Descrizione della modifica
    """
    print(f"\n{'='*60}")
    print(f"ANALYZE CHANGE IMPACT")
    print(f"{'='*60}")
    print(f"Asset: {asset_id}")
    print(f"Change Type: {change_type}")
    print(f"Description: {change_description}")
    print(f"LLM: {CURRENT_LLM_PROVIDER.value}")
    print(f"{'='*60}\n")
    
    # Recupera lineage downstream
    lineage = await lineage_builder.get_immediate_lineage(asset_id, "downstream")
    downstream = [l for l in lineage if l['direction'] == 'downstream']
    
    print(f"Asset downstream impattati: {len(downstream)}")
    
    # Analisi AI
    impact = await llm_client.analyze_change_impact(
        source_asset=asset_id,
        change_type=change_type,
        change_details={'description': change_description},
        affected_lineage={'downstream': downstream}
    )
    
    print(f"\n{'='*40}")
    print(f"RISULTATO ANALISI")
    print(f"{'='*40}")
    print(f"\nLIVELLO RISCHIO: {impact['risk_level']}")
    
    print(f"\nImpatto Business:")
    print(f"  {impact['business_impact']}")
    
    print(f"\nImpatto Tecnico:")
    print(f"  {impact['technical_impact']}")
    
    print(f"\nRaccomandazioni:")
    for i, rec in enumerate(impact.get('recommendations', []), 1):
        print(f"  {i}. {rec}")
    
    print(f"\nStrategia Testing:")
    for i, test in enumerate(impact.get('testing_strategy', []), 1):
        print(f"  {i}. {test}")
    
    return impact

In [17]:
# TEST: Analisi impatto
impact = await analyze_change_impact(
    asset_id="DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO",
    change_type="column_drop",
    change_description="Eliminazione colonna FLAG_ATTIVO per deprecazione"
)

INFO:edc_client:Cache hit for asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO



ANALYZE CHANGE IMPACT
Asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
Change Type: column_drop
Description: Eliminazione colonna FLAG_ATTIVO per deprecazione
LLM: gemma3

Asset downstream impattati: 4

RISULTATO ANALISI

LIVELLO RISCHIO: MEDIUM

Impatto Business:
  * L'eliminazione della colonna `FLAG_ATTIVO` potrebbe interrompere processi di reporting esistenti che utilizzano questa informazione per filtrare o aggregare i dati relativi agli asset in attività. * Potrebbe richiedere la modifica di report, dashboard e processi decisionali che si basano sull'utilizzo di questa colonna, con conseguenti costi di sviluppo e potenziali ritardi nell'implementazione di nuove analisi. * La mancata comunicazione chiara di questa modifica potrebbe generare confusione tra gli utenti aziendali che utilizzano i dati, portando a interpretazioni errate e a decisioni sbagliate.

Impatto Tecnico:
  * La rimozione della colonna `FLAG_ATTIVO` da una tabella di dimensioni significat

### 3.6 Generate Change Checklist

In [18]:
async def generate_change_checklist(
    asset_id: str,
    change_type: str,
    change_description: str
):
    """
    Genera checklist operativa per una modifica.
    """
    print(f"\n{'='*60}")
    print(f"GENERATE CHANGE CHECKLIST")
    print(f"{'='*60}")
    print(f"Asset: {asset_id}")
    print(f"Modifica: {change_type} - {change_description}")
    print(f"{'='*60}\n")
    
    # Prima genera impact analysis
    lineage = await lineage_builder.get_immediate_lineage(asset_id, "downstream")
    downstream = [l for l in lineage if l['direction'] == 'downstream']
    
    impact = await llm_client.analyze_change_impact(
        source_asset=asset_id,
        change_type=change_type,
        change_details={'description': change_description},
        affected_lineage={'downstream': downstream}
    )
    
    # Genera checklist
    checklist = await llm_client.generate_change_checklist(impact)
    
    sections = [
        ('governance_tasks', 'GOVERNANCE E APPROVAZIONI'),
        ('pre_change_tasks', 'PREPARAZIONE PRE-MODIFICA'),
        ('execution_tasks', 'ESECUZIONE'),
        ('validation_tasks', 'VALIDAZIONE E TEST'),
        ('rollback_procedures', 'PROCEDURE DI ROLLBACK'),
        ('stakeholder_notifications', 'NOTIFICHE STAKEHOLDER'),
        ('monitoring_tasks', 'MONITORING POST-IMPLEMENTAZIONE')
    ]
    
    print("CHECKLIST OPERATIVA")
    print("=" * 40)
    
    for key, title in sections:
        tasks = checklist.get(key, [])
        if tasks:
            print(f"\n{title}:")
            for i, task in enumerate(tasks, 1):
                print(f"  [ ] {i}. {task}")
    
    return checklist

In [None]:
# TEST: Genera checklist
checklist = await generate_change_checklist(
    asset_id="DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO",
    change_type="schema_change",
    change_description="Aggiunta nuova colonna DATA_SCADENZA"
)

INFO:edc_client:Cache hit for asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO



GENERATE CHANGE CHECKLIST
Asset: DataPlatform://ORAP51/OSI/LDT_FULL_INSTRUMENT/IMP_ACCANTONAMENTO
Modifica: schema_change - Aggiunta nuova colonna DATA_SCADENZA



### 3.7 Enhance Asset Documentation

In [None]:
async def enhance_asset_documentation(asset_id: str, business_domain: str = ""):
    """
    Arricchisce la documentazione di un asset con AI.
    """
    print(f"\n{'='*60}")
    print(f"ENHANCE ASSET DOCUMENTATION")
    print(f"{'='*60}")
    print(f"Asset: {asset_id}")
    print(f"Domain: {business_domain or '(auto)'}")
    print(f"LLM: {CURRENT_LLM_PROVIDER.value}")
    print(f"{'='*60}\n")
    
    # Recupera metadati
    metadata = await lineage_builder.get_asset_metadata(asset_id)
    
    print(f"Asset: {metadata['name']}")
    print(f"Tipo: {metadata['classType']}")
    print(f"Descrizione originale: {metadata['description'] or '(nessuna)'}")
    
    # Recupera lineage per contesto
    lineage = await lineage_builder.get_immediate_lineage(asset_id, "upstream")
    upstream = [l for l in lineage if l['direction'] == 'upstream']
    
    # Enhancement AI
    enhanced = await llm_client.enhance_documentation(
        asset_info=metadata,
        lineage_context={'upstream': upstream},
        business_context={'domain': business_domain}
    )
    
    print(f"\n{'='*40}")
    print(f"DOCUMENTAZIONE ARRICCHITA")
    print(f"{'='*40}")
    
    print(f"\nDescrizione Arricchita:")
    print(f"  {enhanced['enhanced_description']}")
    
    print(f"\nScopo Business:")
    print(f"  {enhanced['business_purpose']}")
    
    print(f"\nTag Suggeriti:")
    print(f"  {', '.join(enhanced['suggested_tags'])}")
    
    print(f"\nRegole Qualita Suggerite:")
    for i, rule in enumerate(enhanced.get('suggested_quality_rules', []), 1):
        print(f"  {i}. {rule}")
    
    print(f"\nNote Compliance:")
    print(f"  {enhanced['compliance_notes']}")
    
    return enhanced

In [None]:
# TEST: Arricchisci documentazione
enhanced = await enhance_asset_documentation(
    asset_id="DataPlatform://ORAC51/DWHEVO/IFR_WK_GARANZIE_SOFFERENZE_DT_AP",
    business_domain="GARANZIE"
)

---

## 4. Utility

In [None]:
def show_statistics():
    """Mostra statistiche del sistema."""
    print(f"\n{'='*60}")
    print(f"STATISTICHE SISTEMA")
    print(f"{'='*60}")
    
    stats = lineage_builder.get_statistics()
    
    print(f"\nEDC Client:")
    print(f"  - Total API calls: {stats['total_requests']}")
    print(f"  - Cache hits: {stats['cache_hits']}")
    print(f"  - API errors: {stats['api_errors']}")
    print(f"  - Nodi creati: {stats['nodes_created']}")
    print(f"  - Cicli prevenuti: {stats['cycles_prevented']}")
    
    if stats['total_requests'] > 0:
        cache_rate = stats['cache_hits'] / stats['total_requests'] * 100
        print(f"  - Cache hit rate: {cache_rate:.1f}%")
    
    print(f"\nLLM Provider: {CURRENT_LLM_PROVIDER.value}")


show_statistics()

In [None]:
def switch_llm(provider: str):
    """
    Cambia provider LLM.
    
    Args:
        provider: "tinyllama", "claude", "gemma3"
    """
    provider_map = {
        'tinyllama': LLMProvider.TINYLLAMA,
        'claude': LLMProvider.CLAUDE,
        'gemma3': LLMProvider.GEMMA3
    }
    
    if provider.lower() not in provider_map:
        print(f"Provider non valido. Usa: {list(provider_map.keys())}")
        return
    
    init_llm_client(provider_map[provider.lower()])


# Esempio: switch_llm("claude")

In [None]:
def clear_cache():
    """Pulisce la cache."""
    lineage_builder.clear_cache()
    print("[OK] Cache pulita")


# clear_cache()

---

## 5. Cleanup

In [None]:
# Chiudi connessioni alla fine
async def cleanup():
    await lineage_builder.close()
    print("[OK] Connessioni chiuse")

# Decommentare per chiudere:
# await cleanup()

---

## Note

### Asset ID Format
```
DataPlatform://CONNECTION/SCHEMA/TABLE_NAME
```

### Provider LLM disponibili
- `tinyllama` - Via Ollama locale (veloce, meno preciso)
- `gemma3` - Via Ollama locale (bilanciato)
- `claude` - Anthropic API (preciso, richiede API key)

### Cambiare provider
```python
switch_llm("claude")  # o "tinyllama" o "gemma3"
```