In [2]:
# # Gera√ß√£o de Dataset com M√©tricas do cAdvisor
# Notebook para coletar todas as m√©tricas dispon√≠veis do cAdvisor via Prometheus

# ## 1. Instala√ß√£o de Depend√™ncias

# !pip install requests pandas

# ## 2. Imports

import requests
import pandas as pd
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

# ## 3. Configura√ß√£o

# **CONFIGURE AQUI:**
PROMETHEUS_URL = 'http://localhost:9090'  # URL do seu Prometheus
POD_FILTER = 'memory-stress-.*'  # Filtro de pods (ou None para todos)
NAMESPACE = None  # Namespace espec√≠fico (ou None para todos)
DURATION_MINUTES = 30  # Dura√ß√£o da coleta em minutos
STEP = '30s'  # Intervalo entre pontos

# ## 4. Fun√ß√£o de Coleta

def collect_cadvisor_metrics(prometheus_url, duration_minutes=30, step='30s', 
                             pod_filter=None, namespace=None):
    """
    Coleta todas as m√©tricas do cAdvisor dispon√≠veis no Prometheus
    
    Args:
        prometheus_url: URL do Prometheus
        duration_minutes: Dura√ß√£o da coleta em minutos
        step: Intervalo entre pontos
        pod_filter: Regex para filtrar pods
        namespace: Namespace espec√≠fico
    
    Returns:
        DataFrame com todas as m√©tricas
    """
    
    # Calcula timestamps
    end_time = datetime.now()
    start_time = end_time - timedelta(minutes=duration_minutes)
    start_ts = int(start_time.timestamp())
    end_ts = int(end_time.timestamp())
    
    api_url = f"{prometheus_url.rstrip('/')}/api/v1"
    
    # Lista completa de m√©tricas do cAdvisor
    cadvisor_metrics = [
        # CPU
        'container_cpu_usage_seconds_total',
        'container_cpu_user_seconds_total',
        'container_cpu_system_seconds_total',
        'container_cpu_cfs_periods_total',
        'container_cpu_cfs_throttled_periods_total',
        'container_cpu_cfs_throttled_seconds_total',
        
        # Mem√≥ria
        'container_memory_usage_bytes',
        'container_memory_working_set_bytes',
        'container_memory_rss',
        'container_memory_cache',
        'container_memory_swap',
        'container_memory_mapped_file',
        'container_memory_max_usage_bytes',
        'container_memory_failcnt',
        'container_memory_failures_total',
        
        # Especifica√ß√µes e Limites
        'container_spec_memory_limit_bytes',
        'container_spec_memory_swap_limit_bytes',
        'container_spec_memory_reservation_limit_bytes',
        'container_spec_cpu_quota',
        'container_spec_cpu_period',
        'container_spec_cpu_shares',
        
        # Rede
        'container_network_receive_bytes_total',
        'container_network_receive_packets_total',
        'container_network_receive_packets_dropped_total',
        'container_network_receive_errors_total',
        'container_network_transmit_bytes_total',
        'container_network_transmit_packets_total',
        'container_network_transmit_packets_dropped_total',
        'container_network_transmit_errors_total',
        'container_network_tcp_usage_total',
        'container_network_udp_usage_total',
        
        # Disco/Filesystem
        'container_fs_usage_bytes',
        'container_fs_limit_bytes',
        'container_fs_reads_bytes_total',
        'container_fs_reads_total',
        'container_fs_read_seconds_total',
        'container_fs_writes_bytes_total',
        'container_fs_writes_total',
        'container_fs_write_seconds_total',
        'container_fs_io_time_seconds_total',
        'container_fs_io_time_weighted_seconds_total',
        'container_fs_reads_merged_total',
        'container_fs_writes_merged_total',
        'container_fs_sector_reads_total',
        'container_fs_sector_writes_total',
        'container_fs_inodes_free',
        'container_fs_inodes_total',
        
        # Processos e Tarefas
        'container_processes',
        'container_threads',
        'container_threads_max',
        'container_file_descriptors',
        'container_sockets',
        'container_tasks_state',
        
        # OOM
        'container_oom_events_total',
        
        # Outras
        'container_start_time_seconds',
        'container_last_seen',
    ]
    
    all_data = []
    total_metrics = len(cadvisor_metrics)
    
    print(f"üîç Coletando {total_metrics} m√©tricas do cAdvisor...")
    print(f"üìÖ Per√≠odo: {start_time.strftime('%Y-%m-%d %H:%M:%S')} at√© {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"‚è±Ô∏è  Step: {step}\n")
    
    for idx, metric in enumerate(cadvisor_metrics, 1):
        # Monta filtros
        filters = []
        if pod_filter:
            filters.append(f'pod=~"{pod_filter}"')
        if namespace:
            filters.append(f'namespace="{namespace}"')
        
        # Monta query
        if filters:
            query = f'{metric}{{{",".join(filters)}}}'
        else:
            query = metric
        
        print(f"[{idx}/{total_metrics}] {metric}...", end=' ')
        
        try:
            # Faz requisi√ß√£o
            response = requests.get(
                f"{api_url}/query_range",
                params={
                    'query': query,
                    'start': start_ts,
                    'end': end_ts,
                    'step': step
                },
                timeout=60
            )
            response.raise_for_status()
            result = response.json()
            
            # Processa resultados
            if result['status'] == 'success' and result['data']['result']:
                count = 0
                for item in result['data']['result']:
                    for timestamp, value in item['values']:
                        # Cria registro
                        record = {
                            'timestamp': datetime.fromtimestamp(timestamp),
                            'metric_name': metric,
                            'value': float(value) if value != 'NaN' else None,
                        }
                        
                        # Adiciona todas as labels
                        for label, label_value in item['metric'].items():
                            record[label] = label_value
                        
                        all_data.append(record)
                        count += 1
                
                print(f"‚úÖ {count} registros")
            else:
                print("‚ö†Ô∏è  Sem dados")
        
        except Exception as e:
            print(f"‚ùå Erro: {str(e)[:50]}")
    
    # Cria DataFrame
    if all_data:
        df = pd.DataFrame(all_data)
        print(f"\n‚ú® Dataset gerado com sucesso!")
        print(f"   Total de registros: {len(df):,}")
        print(f"   M√©tricas coletadas: {df['metric_name'].nunique()}")
        return df
    else:
        print("\n‚ö†Ô∏è  Nenhum dado foi coletado!")
        return pd.DataFrame()

# ## 5. Coleta dos Dados

print("="*70)
print("INICIANDO COLETA DE M√âTRICAS DO CADVISOR")
print("="*70 + "\n")

df = collect_cadvisor_metrics(
    prometheus_url=PROMETHEUS_URL,
    duration_minutes=DURATION_MINUTES,
    step=STEP,
    pod_filter=POD_FILTER,
    namespace=NAMESPACE
)

# ## 6. Informa√ß√µes do Dataset

if not df.empty:
    print("\n" + "="*70)
    print("INFORMA√á√ïES DO DATASET")
    print("="*70)
    
    print(f"\nüìä Shape: {df.shape}")
    print(f"üìã Colunas: {list(df.columns)}")
    print(f"\n‚è∞ Per√≠odo dos dados:")
    print(f"   In√≠cio: {df['timestamp'].min()}")
    print(f"   Fim: {df['timestamp'].max()}")
    
    print(f"\nüìà M√©tricas coletadas ({df['metric_name'].nunique()}):")
    metrics_count = df.groupby('metric_name').size().sort_values(ascending=False)
    for metric, count in metrics_count.items():
        print(f"   ‚Ä¢ {metric}: {count:,} registros")
    
    if 'pod' in df.columns:
        print(f"\nüéØ Pods monitorados ({df['pod'].nunique()}):")
        for pod in sorted(df['pod'].unique()):
            pod_metrics = df[df['pod'] == pod]['metric_name'].nunique()
            pod_records = len(df[df['pod'] == pod])
            print(f"   ‚Ä¢ {pod}: {pod_metrics} m√©tricas, {pod_records:,} registros")
    
    if 'namespace' in df.columns:
        print(f"\nüì¶ Namespaces: {df['namespace'].nunique()}")
        for ns in sorted(df['namespace'].unique()):
            print(f"   ‚Ä¢ {ns}")
    
    print("\nüìã Primeiras linhas do dataset:")
    display(df.head(10))
    
    print("\nüìä Tipos de dados:")
    display(df.dtypes)
    
    print("\nüìà Estat√≠sticas dos valores:")
    display(df['value'].describe())

# ## 7. Salvar Dataset

if not df.empty:
    # CSV
    csv_file = 'cadvisor_metrics_dataset.csv'
    df.to_csv(csv_file, index=False)
    print(f"\n‚úÖ Dataset salvo em CSV: {csv_file}")
    
    # Parquet (mais eficiente)
    parquet_file = 'cadvisor_metrics_dataset.parquet'
    df.to_parquet(parquet_file, index=False)
    print(f"‚úÖ Dataset salvo em Parquet: {parquet_file}")
    
    # Excel (opcional - pode ser lento para datasets grandes)
    if len(df) < 100000:  # Apenas se n√£o for muito grande
        excel_file = 'cadvisor_metrics_dataset.xlsx'
        df.to_excel(excel_file, index=False)
        print(f"‚úÖ Dataset salvo em Excel: {excel_file}")
    
    print(f"\nüì¶ Tamanho dos arquivos:")
    import os
    print(f"   ‚Ä¢ CSV: {os.path.getsize(csv_file) / 1024 / 1024:.2f} MB")
    print(f"   ‚Ä¢ Parquet: {os.path.getsize(parquet_file) / 1024 / 1024:.2f} MB")

print("\n" + "="*70)
print("‚ú® COLETA FINALIZADA COM SUCESSO!")
print("="*70)

INICIANDO COLETA DE M√âTRICAS DO CADVISOR

üîç Coletando 56 m√©tricas do cAdvisor...
üìÖ Per√≠odo: 2025-10-02 07:47:51 at√© 2025-10-02 08:17:51
‚è±Ô∏è  Step: 30s

[1/56] container_cpu_usage_seconds_total... ‚úÖ 212 registros
[2/56] container_cpu_user_seconds_total... ‚ö†Ô∏è  Sem dados
[3/56] container_cpu_system_seconds_total... ‚ö†Ô∏è  Sem dados
[4/56] container_cpu_cfs_periods_total... ‚úÖ 151 registros
[5/56] container_cpu_cfs_throttled_periods_total... ‚úÖ 151 registros
[6/56] container_cpu_cfs_throttled_seconds_total... ‚ö†Ô∏è  Sem dados
[7/56] container_memory_usage_bytes... ‚úÖ 212 registros
[8/56] container_memory_working_set_bytes... ‚úÖ 212 registros
[9/56] container_memory_rss... ‚úÖ 212 registros
[10/56] container_memory_cache... ‚úÖ 212 registros
[11/56] container_memory_swap... ‚ö†Ô∏è  Sem dados
[12/56] container_memory_mapped_file... ‚ö†Ô∏è  Sem dados
[13/56] container_memory_max_usage_bytes... ‚úÖ 212 registros
[14/56] container_memory_failcnt... ‚úÖ 212 registros
[15

Unnamed: 0,timestamp,metric_name,value,__name__,container,cpu,endpoint,id,image,instance,job,metrics_path,name,namespace,node,pod,service,failure_type,scope,interface,device
0,2025-10-02 07:47:51,container_cpu_usage_seconds_total,2.10248,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
1,2025-10-02 07:48:21,container_cpu_usage_seconds_total,2.386242,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
2,2025-10-02 07:48:51,container_cpu_usage_seconds_total,2.916546,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
3,2025-10-02 07:49:21,container_cpu_usage_seconds_total,3.438816,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
4,2025-10-02 07:49:51,container_cpu_usage_seconds_total,3.719793,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
5,2025-10-02 07:50:21,container_cpu_usage_seconds_total,4.149523,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
6,2025-10-02 07:50:51,container_cpu_usage_seconds_total,4.378999,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
7,2025-10-02 07:51:21,container_cpu_usage_seconds_total,4.597126,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
8,2025-10-02 07:51:51,container_cpu_usage_seconds_total,5.02798,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,
9,2025-10-02 07:52:21,container_cpu_usage_seconds_total,5.456284,container_cpu_usage_seconds_total,memory-stress,total,https-metrics,/kubepods/burstable/podc90feebe-478d-40e3-a037...,docker.io/library/memory-stress:latest,192.168.242.134:10250,kubelet,/metrics/cadvisor,0300a3ee943145f38e9ce433dc486332bc89218d2edc7b...,memory-stress-test,x86,memory-stress-app-664c7bc4ff-ptfl6,kube-prometheus-stack-kubelet,,,,



üìä Tipos de dados:


timestamp       datetime64[ns]
metric_name             object
value                  float64
__name__                object
container               object
cpu                     object
endpoint                object
id                      object
image                   object
instance                object
job                     object
metrics_path            object
name                    object
namespace               object
node                    object
pod                     object
service                 object
failure_type            object
scope                   object
interface               object
device                  object
dtype: object


üìà Estat√≠sticas dos valores:


count    5.141000e+03
mean     1.978193e+08
std      4.867007e+08
min      0.000000e+00
25%      0.000000e+00
50%      1.110000e+02
75%      1.015808e+06
max      1.759393e+09
Name: value, dtype: float64


‚úÖ Dataset salvo em CSV: cadvisor_metrics_dataset.csv


ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.