In [None]:
# Import required modules
import sys
print(f"Python version: {sys.version}")

# Import SMF structures
from smf30_structures import SMF30SampleGenerator
from smf30_parser import SMF30Parser

print("‚úì Modules imported successfully")

## Step 1: Generate Sample SMF-30 Records

In [None]:
# Generate sample records for all subtypes
all_records = SMF30SampleGenerator.generate_all_records()

print("Generated SMF-30 Sample Records:")
print("="*60)

subtype_names = {
    1: "Job Step Termination",
    2: "Job Termination",
    3: "Step Initiation",
    4: "Job Initiation",
    5: "NetStep Completion",
}

total = 0
for subtype, records in sorted(all_records.items()):
    count = len(records)
    total += count
    print(f"Subtype {subtype} ({subtype_names[subtype]:.<30}) {count:>3} records")

print(f"\nTOTAL: {total} records")

## Step 2: Generate CSV and JSON Reports

In [None]:
# Create parser and generate reports
parser = SMF30Parser(output_dir="./reports")
parser.records_by_subtype = all_records
parser.generate_all_reports()

print("\n‚úì CSV and JSON reports generated in ./reports/")

## Step 3: Display Sample Records

In [None]:
import pandas as pd

# Display sample from Subtype 1 (Job Step Termination)
print("\nSubtype 1 - Job Step Termination Sample:")
print("="*80)
df1 = pd.DataFrame([r.to_dict() for r in all_records[1]])
display(df1.head())

print("\nKey Statistics:")
print(df1[['cpu_time_ms', 'elapsed_time_ms', 'io_count', 'return_code']].describe())

In [None]:
# Display sample from Subtype 2 (Job Termination)
print("Subtype 2 - Job Termination Sample:")
print("="*80)
df2 = pd.DataFrame([r.to_dict() for r in all_records[2]])
display(df2.head())

print("\nKey Statistics:")
print(df2[['cpu_time_ms', 'total_steps', 'failed_steps', 'memory_allocated_mb', 'memory_max_used_mb']].describe())

## Step 4: Install Visualization Dependencies

In [None]:
# Install required packages for visualizations
import subprocess
import sys

print("Installing visualization dependencies...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "matplotlib", "numpy"])
print("‚úì Dependencies installed")

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import numpy as np

# Configure matplotlib for notebook
%matplotlib inline
plt.style.use('default')
print("‚úì Matplotlib configured")

## Step 5: Subtype 1 - Job Step Termination Analysis

In [None]:
# Subtype 1 Visualization
df1 = pd.DataFrame([r.to_dict() for r in all_records[1]])
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8']

fig = plt.figure(figsize=(15, 10))
fig.suptitle('SMF Type 30 Subtype 1: Job Step Termination Analysis', fontsize=16, fontweight='bold')
gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)

# CPU Time Distribution
ax1 = fig.add_subplot(gs[0, 0])
ax1.hist(df1['cpu_time_ms'], bins=10, color=colors[0], edgecolor='black', alpha=0.7)
ax1.set_xlabel('CPU Time (ms)')
ax1.set_ylabel('Frequency')
ax1.set_title('CPU Time Distribution')
ax1.grid(True, alpha=0.3)

# Return Codes
ax2 = fig.add_subplot(gs[0, 1])
rc_counts = df1['return_code'].value_counts()
ax2.bar(rc_counts.index.astype(str), rc_counts.values, color=colors[1], alpha=0.7)
ax2.set_xlabel('Return Code')
ax2.set_ylabel('Count')
ax2.set_title('Return Code Distribution')
ax2.grid(True, alpha=0.3, axis='y')

# IO Count vs CPU Time
ax3 = fig.add_subplot(gs[0, 2])
ax3.scatter(df1['cpu_time_ms'], df1['io_count'], color=colors[2], s=100, alpha=0.6)
ax3.set_xlabel('CPU Time (ms)')
ax3.set_ylabel('IO Count')
ax3.set_title('CPU Time vs IO Count')
ax3.grid(True, alpha=0.3)

# Pages Read vs Written
ax4 = fig.add_subplot(gs[1, 0])
x_pos = np.arange(len(df1))
width = 0.35
ax4.bar(x_pos - width/2, df1['pages_read'], width, label='Pages Read', color=colors[0], alpha=0.7)
ax4.bar(x_pos + width/2, df1['pages_written'], width, label='Pages Written', color=colors[1], alpha=0.7)
ax4.set_xlabel('Step Index')
ax4.set_ylabel('Pages')
ax4.set_title('Pages Read vs Written')
ax4.legend()
ax4.grid(True, alpha=0.3, axis='y')

# Elapsed Time
ax5 = fig.add_subplot(gs[1, 1])
ax5.barh(range(len(df1)), df1['elapsed_time_ms'].values, color=colors[3], alpha=0.7)
ax5.set_xlabel('Elapsed Time (ms)')
ax5.set_ylabel('Step Index')
ax5.set_title('Elapsed Time per Step')
ax5.grid(True, alpha=0.3, axis='x')

# Service Units
ax6 = fig.add_subplot(gs[1, 2])
ax6.bar(range(len(df1)), df1['service_units'].values, color=colors[4], alpha=0.7)
ax6.set_xlabel('Step Index')
ax6.set_ylabel('Service Units')
ax6.set_title('Service Units Consumed')
ax6.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('./reports/smf30_subtype1_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nStatistics Summary:")
print(f"  CPU Time: min={df1['cpu_time_ms'].min()}, max={df1['cpu_time_ms'].max()}, avg={df1['cpu_time_ms'].mean():.0f}")
print(f"  Success Rate: {(df1['return_code']==0).sum()}/{len(df1)} ({(df1['return_code']==0).sum()/len(df1)*100:.1f}%)")

## Step 6: Subtype 2 - Job Termination Analysis

In [None]:
# Subtype 2 Visualization
df2 = pd.DataFrame([r.to_dict() for r in all_records[2]])

fig = plt.figure(figsize=(15, 10))
fig.suptitle('SMF Type 30 Subtype 2: Job Termination Analysis', fontsize=16, fontweight='bold')
gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)

# Total CPU Time
ax1 = fig.add_subplot(gs[0, 0])
ax1.bar(range(len(df2)), df2['cpu_time_ms'].values, color=colors[0], alpha=0.7)
ax1.set_xlabel('Job Index')
ax1.set_ylabel('Total CPU Time (ms)')
ax1.set_title('Total CPU Time per Job')
ax1.grid(True, alpha=0.3, axis='y')

# Total Steps vs Failed Steps
ax2 = fig.add_subplot(gs[0, 1])
x_pos = np.arange(len(df2))
width = 0.35
ax2.bar(x_pos - width/2, df2['total_steps'], width, label='Total Steps', color=colors[1], alpha=0.7)
ax2.bar(x_pos + width/2, df2['failed_steps'], width, label='Failed Steps', color=colors[0], alpha=0.7)
ax2.set_xlabel('Job Index')
ax2.set_ylabel('Steps')
ax2.set_title('Total vs Failed Steps')
ax2.legend()
ax2.grid(True, alpha=0.3, axis='y')

# Memory Allocated vs Used
ax3 = fig.add_subplot(gs[0, 2])
x_pos = np.arange(len(df2))
ax3.bar(x_pos - width/2, df2['memory_allocated_mb'], width, label='Allocated', color=colors[2], alpha=0.7)
ax3.bar(x_pos + width/2, df2['memory_max_used_mb'], width, label='Max Used', color=colors[3], alpha=0.7)
ax3.set_xlabel('Job Index')
ax3.set_ylabel('Memory (MB)')
ax3.set_title('Memory Allocation vs Usage')
ax3.legend()
ax3.grid(True, alpha=0.3, axis='y')

# Total EXCP Operations
ax4 = fig.add_subplot(gs[1, 0])
ax4.bar(range(len(df2)), df2['total_excp_count'].values, color=colors[4], alpha=0.7)
ax4.set_xlabel('Job Index')
ax4.set_ylabel('EXCP Count')
ax4.set_title('Total EXCP Operations')
ax4.grid(True, alpha=0.3, axis='y')

# Total Pages
ax5 = fig.add_subplot(gs[1, 1])
x_pos = np.arange(len(df2))
ax5.bar(x_pos - width/2, df2['total_pages_read'], width, label='Pages Read', color=colors[0], alpha=0.7)
ax5.bar(x_pos + width/2, df2['total_pages_written'], width, label='Pages Written', color=colors[1], alpha=0.7)
ax5.set_xlabel('Job Index')
ax5.set_ylabel('Pages')
ax5.set_title('Total Pages Read vs Written')
ax5.legend()
ax5.grid(True, alpha=0.3, axis='y')

# Job Class Distribution
ax6 = fig.add_subplot(gs[1, 2])
class_counts = df2['job_class'].value_counts()
ax6.pie(class_counts.values, labels=class_counts.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax6.set_title('Job Class Distribution')

plt.tight_layout()
plt.savefig('./reports/smf30_subtype2_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nStatistics Summary:")
print(f"  Avg Memory Allocated: {df2['memory_allocated_mb'].mean():.0f} MB")
print(f"  Avg Memory Used: {df2['memory_max_used_mb'].mean():.0f} MB")
print(f"  Memory Efficiency: {(df2['memory_max_used_mb'].mean()/df2['memory_allocated_mb'].mean()*100):.1f}%")
print(f"  Success Rate: {(df2['failed_steps']==0).sum()}/{len(df2)} ({(df2['failed_steps']==0).sum()/len(df2)*100:.1f}%)")

## Step 7: Subtype 5 - NetStep Completion Analysis

In [None]:
# Subtype 5 Visualization
df5 = pd.DataFrame([r.to_dict() for r in all_records[5]])

fig = plt.figure(figsize=(15, 8))
fig.suptitle('SMF Type 30 Subtype 5: NetStep Completion Analysis', fontsize=16, fontweight='bold')
gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.3, wspace=0.3)

# Bytes Transmitted vs Received
ax1 = fig.add_subplot(gs[0, 0])
x_pos = np.arange(len(df5))
width = 0.35
ax1.bar(x_pos - width/2, df5['bytes_transmitted']/1000, width, label='Transmitted', color=colors[0], alpha=0.7)
ax1.bar(x_pos + width/2, df5['bytes_received']/1000, width, label='Received', color=colors[1], alpha=0.7)
ax1.set_xlabel('NetStep Index')
ax1.set_ylabel('Bytes (KB)')
ax1.set_title('Network Data Transfer')
ax1.legend()
ax1.grid(True, alpha=0.3, axis='y')

# Network Response Time
ax2 = fig.add_subplot(gs[0, 1])
ax2.bar(range(len(df5)), df5['network_response_time_ms'], color=colors[2], alpha=0.7)
ax2.set_xlabel('NetStep Index')
ax2.set_ylabel('Response Time (ms)')
ax2.set_title('Network Response Time')
ax2.grid(True, alpha=0.3, axis='y')

# Return Codes
ax3 = fig.add_subplot(gs[0, 2])
rc_counts = df5['return_code'].value_counts()
colors_rc = [colors[0] if x == 0 else colors[1] for x in rc_counts.index]
ax3.bar(rc_counts.index.astype(str), rc_counts.values, color=colors_rc, alpha=0.7)
ax3.set_xlabel('Return Code')
ax3.set_ylabel('Count')
ax3.set_title('Return Code Distribution')
ax3.grid(True, alpha=0.3, axis='y')

# CPU Time
ax4 = fig.add_subplot(gs[1, 0])
ax4.bar(range(len(df5)), df5['cpu_time_ms'], color=colors[3], alpha=0.7)
ax4.set_xlabel('NetStep Index')
ax4.set_ylabel('CPU Time (ms)')
ax4.set_title('CPU Time per NetStep')
ax4.grid(True, alpha=0.3, axis='y')

# Network Destination Distribution
ax5 = fig.add_subplot(gs[1, 1])
dest_counts = df5['network_destination'].value_counts()
ax5.pie(dest_counts.values, labels=dest_counts.index, autopct='%1.1f%%', colors=colors, startangle=90)
ax5.set_title('Network Destination Distribution')

# Throughput
ax6 = fig.add_subplot(gs[1, 2])
throughput = (df5['bytes_transmitted'] + df5['bytes_received']) / df5['elapsed_time_ms']
ax6.bar(range(len(df5)), throughput, color=colors[4], alpha=0.7)
ax6.set_xlabel('NetStep Index')
ax6.set_ylabel('Bytes/ms')
ax6.set_title('Network Throughput')
ax6.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('./reports/smf30_subtype5_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nStatistics Summary:")
print(f"  Avg Response Time: {df5['network_response_time_ms'].mean():.0f} ms")
print(f"  Total Data Transmitted: {df5['bytes_transmitted'].sum()/1024:.0f} KB")
print(f"  Total Data Received: {df5['bytes_received'].sum()/1024:.0f} KB")
print(f"  Success Rate: {(df5['return_code']==0).sum()}/{len(df5)} ({(df5['return_code']==0).sum()/len(df5)*100:.1f}%)")

## Step 8: Summary Dashboard

In [None]:
# Summary Dashboard
fig = plt.figure(figsize=(16, 10))
fig.suptitle('SMF Type 30 - Complete Analysis Dashboard', fontsize=18, fontweight='bold')
gs = gridspec.GridSpec(2, 3, figure=fig, hspace=0.35, wspace=0.3)

# Record count by subtype
ax1 = fig.add_subplot(gs[0, 0])
subtype_counts = [len(all_records.get(i, [])) for i in range(1, 6)]
ax1.bar(range(1, 6), subtype_counts, color=colors, alpha=0.7)
ax1.set_xlabel('Subtype')
ax1.set_ylabel('Record Count')
ax1.set_title('Records per Subtype')
ax1.set_xticks(range(1, 6))
ax1.grid(True, alpha=0.3, axis='y')

# CPU Time by subtype
ax2 = fig.add_subplot(gs[0, 1])
cpu_data = []
for i in [1, 2, 5]:
    df = pd.DataFrame([r.to_dict() for r in all_records[i]])
    cpu_data.append(df['cpu_time_ms'].mean())
ax2.bar(['ST1', 'ST2', 'ST5'], cpu_data, color=[colors[0], colors[1], colors[4]], alpha=0.7)
ax2.set_ylabel('Average CPU Time (ms)')
ax2.set_title('Average CPU Time by Subtype')
ax2.grid(True, alpha=0.3, axis='y')

# Success Rate
ax3 = fig.add_subplot(gs[0, 2])
success_rates = []
for i in [1, 2, 5]:
    df = pd.DataFrame([r.to_dict() for r in all_records[i]])
    if i == 2:
        rate = (df['failed_steps'] == 0).sum() / len(df) * 100
    else:
        rate = (df['return_code'] == 0).sum() / len(df) * 100
    success_rates.append(rate)
ax3.bar(['ST1', 'ST2', 'ST5'], success_rates, color=[colors[0], colors[1], colors[4]], alpha=0.7)
ax3.set_ylabel('Success Rate (%)')
ax3.set_ylim(0, 105)
ax3.set_title('Job Success Rate')
ax3.grid(True, alpha=0.3, axis='y')

# Summary table
ax4 = fig.add_subplot(gs[1, :])
ax4.axis('tight')
ax4.axis('off')

summary_data = [
    ['Subtype', 'Name', 'Records', 'Key Focus'],
    ['1', 'Job Step Termination', f"{len(all_records[1])}", 'Step-level CPU, IO, Pages'],
    ['2', 'Job Termination', f"{len(all_records[2])}", 'Total job resources, Memory'],
    ['3', 'Step Initiation', f"{len(all_records[3])}", 'Memory allocation, Startup'],
    ['4', 'Job Initiation', f"{len(all_records[4])}", 'Job class, Priority, Scheduling'],
    ['5', 'NetStep Completion', f"{len(all_records[5])}", 'Network performance, Data transfer'],
]

table = ax4.table(cellText=summary_data, loc='center', cellLoc='left')
table.auto_set_font_size(False)
table.set_fontsize(11)
table.scale(1, 3)

for i in range(4):
    table[(0, i)].set_facecolor('#34495E')
    table[(0, i)].set_text_props(weight='bold', color='white')

for i in range(1, 6):
    table[(i, 0)].set_facecolor(colors[i-1])
    table[(i, 0)].set_text_props(weight='bold', color='white')

plt.tight_layout()
plt.savefig('./reports/smf30_summary_dashboard.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úì Summary dashboard generated")

## Final Summary

In [None]:
from pathlib import Path

print("="*80)
print("SMF TYPE 30 ANALYSIS COMPLETE")
print("="*80)

reports_dir = Path("./reports")

print("\nüìä Generated Reports:")
for i in range(1, 6):
    csv_file = reports_dir / f"smf30_subtype{i}.csv"
    json_file = reports_dir / f"smf30_subtype{i}.json"
    if csv_file.exists():
        print(f"  ‚úì Subtype {i}: {csv_file.name} + {json_file.name}")

print("\nüìà Generated Visualizations:")
viz_files = [
    ("smf30_subtype1_analysis.png", "Job Step Termination"),
    ("smf30_subtype2_analysis.png", "Job Termination"),
    ("smf30_subtype5_analysis.png", "NetStep Completion"),
    ("smf30_summary_dashboard.png", "Overall Dashboard"),
]

for filename, desc in viz_files:
    filepath = reports_dir / filename
    if filepath.exists():
        print(f"  ‚úì {desc}: {filename}")

print(f"\nüìÅ Output Location: {reports_dir.absolute()}")
print("\n" + "="*80)
print("Analysis complete! Check the reports/ folder for all outputs.")
print("="*80)