## Step 1: Import Required Libraries

In [None]:
import sys
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Import SMF110 modules
from smf110_binary_parser import SMF110BinaryParser
from smf110_parser import SMF110Parser
from smf110_analysis import SMF110Analysis

# Configure visualization
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

print("âœ“ Libraries imported successfully")

## Step 2: Configure Binary Dump File Path

**Update the path below to point to your SMF 110 binary dump file.**

### How to Obtain SMF 110 Dump:
```jcl
//SMFDUMP  JOB  ...
//STEP1    EXEC PGM=IFASMFDP
//INDD     DD   DSN=SYS1.MAN1,DISP=SHR
//OUTDD    DD   DSN=USER.SMF110.DUMP,
//              DISP=(NEW,CATLG,DELETE),
//              SPACE=(CYL,(10,5),RLSE),
//              DCB=(RECFM=VBS,LRECL=32760,BLKSIZE=32764)
//SYSIN    DD   *
  INDD(INDD,OPTIONS(DUMP))
  OUTDD(OUTDD,TYPE(110))
  DATE(2024335,2024336)
/*
```

Then FTP in **binary mode**:
```
ftp mainframe.example.com
> binary
> get 'USER.SMF110.DUMP' smf110.dump
```

In [None]:
# Update this path to your actual SMF 110 binary dump file
DUMP_FILE = r"C:\path\to\your\smf110.dump"

# Or use a sample/test file if you have one
# DUMP_FILE = r"sample_smf110.bin"

dump_path = Path(DUMP_FILE)

if dump_path.exists():
    print(f"âœ“ Found dump file: {dump_path}")
    print(f"  File size: {dump_path.stat().st_size:,} bytes")
else:
    print(f"âš  Dump file not found: {dump_path}")
    print("  Please update DUMP_FILE path above")

## Step 3: Parse Binary Dump

This step:
- Reads the binary SMF 110 dump
- Converts EBCDIC to ASCII
- Unpacks binary fields (big-endian)
- Organizes records by subtype

In [None]:
# Initialize binary parser
print("Parsing SMF 110 binary dump...\n")
parser = SMF110BinaryParser(DUMP_FILE)

# Parse all records
records_by_subtype = parser.parse_dump()

# Display summary
print("\n" + "="*60)
print("PARSING SUMMARY")
print("="*60)

total_records = sum(len(recs) for recs in records_by_subtype.values())
print(f"Total records parsed: {total_records}\n")

subtype_names = {
    1: "Transaction Statistics",
    2: "File Statistics",
    3: "Program Statistics",
    4: "Terminal Statistics",
    5: "Storage Statistics",
    6: "Dispatcher Statistics",
    7: "Loader Statistics",
    8: "Temporary Storage",
    9: "Transient Data",
    10: "Journal Statistics",
    11: "Database Statistics",
    12: "MQ Statistics",
    13: "Web Services",
    14: "ISC Statistics",
    15: "Coupling Facility"
}

for subtype in range(1, 16):
    count = len(records_by_subtype.get(subtype, []))
    if count > 0:
        name = subtype_names.get(subtype, f"Unknown {subtype}")
        print(f"  Subtype {subtype:2d} ({name:25s}): {count:5d} records")

## Step 4: Transaction Analysis (Subtype 1)

Analyze CICS transaction performance metrics

In [None]:
# Get transaction records
transaction_records = records_by_subtype.get(1, [])

if transaction_records:
    print(f"Analyzing {len(transaction_records)} transaction records...\n")
    
    # Convert to DataFrame
    df_trans = pd.DataFrame([rec.to_dict() for rec in transaction_records])
    
    # Display sample
    print("Sample Transaction Records:")
    print(df_trans[['transaction_id', 'program_name', 'cpu_time', 'response_time', 
                     'transaction_count', 'completed', 'abended']].head(10))
    
    # Key statistics
    print("\n" + "="*60)
    print("TRANSACTION STATISTICS")
    print("="*60)
    print(f"Total transactions: {df_trans['transaction_count'].sum():,}")
    print(f"Completed: {df_trans['completed'].sum():,}")
    print(f"Abended: {df_trans['abended'].sum():,}")
    print(f"Avg CPU time: {df_trans['cpu_time'].mean():.2f} ms")
    print(f"Avg response time: {df_trans['response_time'].mean():.2f} ms")
    print(f"Max response time: {df_trans['response_time'].max():.2f} ms")
else:
    print("No transaction records found in dump")

## Step 5: File Statistics Analysis (Subtype 2)

In [None]:
# Get file records
file_records = records_by_subtype.get(2, [])

if file_records:
    print(f"Analyzing {len(file_records)} file records...\n")
    
    # Convert to DataFrame
    df_files = pd.DataFrame([rec.to_dict() for rec in file_records])
    
    # Display sample
    print("Sample File Records:")
    print(df_files[['file_name', 'file_type', 'reads', 'writes', 
                     'buffer_hits', 'buffer_misses', 'io_errors']].head(10))
    
    # Calculate buffer hit ratio
    df_files['hit_ratio'] = (
        df_files['buffer_hits'] / 
        (df_files['buffer_hits'] + df_files['buffer_misses']) * 100
    ).fillna(0)
    
    print("\n" + "="*60)
    print("FILE STATISTICS")
    print("="*60)
    print(f"Total files: {len(df_files)}")
    print(f"Total reads: {df_files['reads'].sum():,}")
    print(f"Total writes: {df_files['writes'].sum():,}")
    print(f"Avg buffer hit ratio: {df_files['hit_ratio'].mean():.2f}%")
    print(f"Total I/O errors: {df_files['io_errors'].sum():,}")
else:
    print("No file records found in dump")

## Step 6: Visualization - Transaction Performance

In [None]:
if transaction_records and len(transaction_records) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('CICS Transaction Performance Analysis', fontsize=16, fontweight='bold')
    
    # 1. Top transactions by count
    top_trans = df_trans.nlargest(10, 'transaction_count')
    axes[0, 0].barh(top_trans['transaction_id'], top_trans['transaction_count'])
    axes[0, 0].set_xlabel('Transaction Count')
    axes[0, 0].set_ylabel('Transaction ID')
    axes[0, 0].set_title('Top 10 Transactions by Volume')
    axes[0, 0].invert_yaxis()
    
    # 2. CPU time distribution
    axes[0, 1].hist(df_trans['cpu_time'], bins=30, edgecolor='black')
    axes[0, 1].set_xlabel('CPU Time (ms)')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].set_title('CPU Time Distribution')
    
    # 3. Response time by transaction
    top_response = df_trans.nlargest(10, 'response_time')
    axes[1, 0].barh(top_response['transaction_id'], top_response['response_time'], color='coral')
    axes[1, 0].set_xlabel('Response Time (ms)')
    axes[1, 0].set_ylabel('Transaction ID')
    axes[1, 0].set_title('Top 10 Slowest Transactions')
    axes[1, 0].invert_yaxis()
    
    # 4. Abend rate
    abend_data = df_trans[df_trans['abended'] > 0].nlargest(10, 'abended')
    if len(abend_data) > 0:
        axes[1, 1].bar(abend_data['transaction_id'], abend_data['abended'], color='red')
        axes[1, 1].set_xlabel('Transaction ID')
        axes[1, 1].set_ylabel('Abend Count')
        axes[1, 1].set_title('Transactions with Abends')
        plt.setp(axes[1, 1].xaxis.get_majorticklabels(), rotation=45)
    else:
        axes[1, 1].text(0.5, 0.5, 'No Abends Detected', 
                        ha='center', va='center', fontsize=14, color='green')
        axes[1, 1].set_title('Abend Analysis')
    
    plt.tight_layout()
    plt.show()
else:
    print("No transaction data available for visualization")

## Step 7: Visualization - File I/O Performance

In [None]:
if file_records and len(file_records) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('CICS File I/O Performance Analysis', fontsize=16, fontweight='bold')
    
    # 1. Top files by I/O
    df_files['total_io'] = df_files['reads'] + df_files['writes']
    top_io = df_files.nlargest(10, 'total_io')
    axes[0, 0].barh(top_io['file_name'], top_io['total_io'])
    axes[0, 0].set_xlabel('Total I/O Operations')
    axes[0, 0].set_ylabel('File Name')
    axes[0, 0].set_title('Top 10 Files by I/O Activity')
    axes[0, 0].invert_yaxis()
    
    # 2. Buffer hit ratio
    top_buffer = df_files.nlargest(10, 'buffer_requests')
    axes[0, 1].barh(top_buffer['file_name'], top_buffer['hit_ratio'], color='green')
    axes[0, 1].set_xlabel('Buffer Hit Ratio (%)')
    axes[0, 1].set_ylabel('File Name')
    axes[0, 1].set_title('Buffer Hit Ratio (Higher is Better)')
    axes[0, 1].invert_yaxis()
    axes[0, 1].set_xlim(0, 100)
    
    # 3. Read vs Write comparison
    top_files = df_files.nlargest(10, 'total_io')
    x = range(len(top_files))
    width = 0.35
    axes[1, 0].bar([i - width/2 for i in x], top_files['reads'], width, label='Reads', color='skyblue')
    axes[1, 0].bar([i + width/2 for i in x], top_files['writes'], width, label='Writes', color='orange')
    axes[1, 0].set_xlabel('File')
    axes[1, 0].set_ylabel('Operations')
    axes[1, 0].set_title('Read vs Write Operations')
    axes[1, 0].set_xticks(x)
    axes[1, 0].set_xticklabels(top_files['file_name'], rotation=45, ha='right')
    axes[1, 0].legend()
    
    # 4. I/O errors
    error_files = df_files[df_files['io_errors'] > 0]
    if len(error_files) > 0:
        axes[1, 1].bar(error_files['file_name'], error_files['io_errors'], color='red')
        axes[1, 1].set_xlabel('File Name')
        axes[1, 1].set_ylabel('Error Count')
        axes[1, 1].set_title('Files with I/O Errors')
        plt.setp(axes[1, 1].xaxis.get_majorticklabels(), rotation=45, ha='right')
    else:
        axes[1, 1].text(0.5, 0.5, 'No I/O Errors Detected', 
                        ha='center', va='center', fontsize=14, color='green')
        axes[1, 1].set_title('I/O Error Analysis')
    
    plt.tight_layout()
    plt.show()
else:
    print("No file data available for visualization")

## Step 8: Export Reports

Generate CSV and JSON reports for all subtypes

In [None]:
# Create reports directory
reports_dir = Path('reports')
reports_dir.mkdir(exist_ok=True)

# Initialize parser
report_parser = SMF110Parser()

print("Generating reports...\n")

generated_files = []

# Generate reports for each subtype
for subtype, records in records_by_subtype.items():
    if records:
        subtype_name = subtype_names.get(subtype, f"Type{subtype}")
        
        # CSV report
        csv_file = reports_dir / f"smf110_type{subtype}_{subtype_name.replace(' ', '_').lower()}.csv"
        report_parser.save_csv_report(records, csv_file)
        generated_files.append(csv_file)
        
        # JSON report
        json_file = reports_dir / f"smf110_type{subtype}_{subtype_name.replace(' ', '_').lower()}.json"
        report_parser.save_json_report(records, json_file)
        generated_files.append(json_file)
        
        print(f"âœ“ Subtype {subtype:2d} ({subtype_name:25s}): {len(records):4d} records")

print(f"\nâœ“ Generated {len(generated_files)} report files in {reports_dir}/")
print("\nReport files:")
for f in sorted(generated_files):
    print(f"  - {f.name}")

## Step 9: Advanced Analysis - All Subtypes Summary

In [None]:
# Create comprehensive summary
summary_data = []

for subtype in range(1, 16):
    records = records_by_subtype.get(subtype, [])
    summary_data.append({
        'Subtype': subtype,
        'Name': subtype_names.get(subtype, 'Unknown'),
        'Record Count': len(records),
        'Status': 'âœ“' if len(records) > 0 else '-'
    })

df_summary = pd.DataFrame(summary_data)

print("\n" + "="*70)
print("SMF 110 SUBTYPE SUMMARY")
print("="*70)
print(df_summary.to_string(index=False))
print("="*70)
print(f"\nTotal Records: {df_summary['Record Count'].sum():,}")
print(f"Active Subtypes: {(df_summary['Record Count'] > 0).sum()} / 15")

## Step 10: Export Summary Dashboard

Create a comprehensive visual dashboard

In [None]:
fig = plt.figure(figsize=(16, 10))
fig.suptitle('SMF 110 CICS Statistics - Complete Analysis Dashboard', 
             fontsize=18, fontweight='bold', y=0.98)

# Create grid
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# 1. Subtype distribution (top left, spanning 2 columns)
ax1 = fig.add_subplot(gs[0, :])
active_subtypes = df_summary[df_summary['Record Count'] > 0]
ax1.barh(active_subtypes['Name'], active_subtypes['Record Count'], color='steelblue')
ax1.set_xlabel('Number of Records', fontsize=11)
ax1.set_title('Records by Subtype', fontsize=12, fontweight='bold')
ax1.invert_yaxis()

# Add remaining visualizations based on available data
if transaction_records:
    # 2. Transaction CPU distribution
    ax2 = fig.add_subplot(gs[1, 0])
    ax2.hist(df_trans['cpu_time'], bins=20, color='orange', edgecolor='black')
    ax2.set_xlabel('CPU Time (ms)')
    ax2.set_ylabel('Frequency')
    ax2.set_title('Transaction CPU Time', fontweight='bold')

if file_records:
    # 3. File buffer efficiency
    ax3 = fig.add_subplot(gs[1, 1])
    ax3.hist(df_files['hit_ratio'], bins=20, color='green', edgecolor='black')
    ax3.set_xlabel('Buffer Hit Ratio (%)')
    ax3.set_ylabel('Frequency')
    ax3.set_title('File Buffer Efficiency', fontweight='bold')

# 4. Overall statistics text box
ax4 = fig.add_subplot(gs[1, 2])
ax4.axis('off')
stats_text = f"""
OVERALL STATISTICS

Total Records: {total_records:,}
Active Subtypes: {(df_summary['Record Count'] > 0).sum()}/15

Analysis Date:
{datetime.now().strftime('%Y-%m-%d %H:%M')}

Dump File:
{Path(DUMP_FILE).name}
"""
ax4.text(0.1, 0.5, stats_text, fontsize=10, family='monospace', 
         verticalalignment='center', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

# 5. Top transactions pie chart
if transaction_records:
    ax5 = fig.add_subplot(gs[2, 0])
    top5_trans = df_trans.nlargest(5, 'transaction_count')
    ax5.pie(top5_trans['transaction_count'], labels=top5_trans['transaction_id'], 
            autopct='%1.1f%%', startangle=90)
    ax5.set_title('Top 5 Transactions', fontweight='bold')

# 6. File I/O type distribution
if file_records:
    ax6 = fig.add_subplot(gs[2, 1])
    io_types = ['Reads', 'Writes', 'Updates', 'Deletes']
    io_counts = [
        df_files['reads'].sum(),
        df_files['writes'].sum(),
        df_files['updates'].sum(),
        df_files['deletes'].sum()
    ]
    ax6.pie(io_counts, labels=io_types, autopct='%1.1f%%', startangle=90)
    ax6.set_title('File I/O Operations', fontweight='bold')

# 7. Success rate indicator
ax7 = fig.add_subplot(gs[2, 2])
ax7.axis('off')
if transaction_records:
    success_rate = (df_trans['completed'].sum() / df_trans['transaction_count'].sum() * 100) if df_trans['transaction_count'].sum() > 0 else 0
    status_text = f"""
HEALTH METRICS

Transaction Success Rate:
{success_rate:.2f}%

Total Abends:
{df_trans['abended'].sum():,}
    """
    color = 'lightgreen' if success_rate > 95 else 'lightyellow' if success_rate > 90 else 'lightcoral'
else:
    status_text = "\n\nNo transaction data\navailable"
    color = 'lightgray'

ax7.text(0.1, 0.5, status_text, fontsize=11, family='monospace',
         verticalalignment='center', bbox=dict(boxstyle='round', facecolor=color, alpha=0.5))

# Save dashboard
dashboard_file = reports_dir / 'smf110_dashboard.png'
plt.savefig(dashboard_file, dpi=150, bbox_inches='tight')
print(f"\nâœ“ Dashboard saved: {dashboard_file}")

plt.show()

## Summary

This notebook provides complete analysis of SMF 110 binary dumps:

### âœ“ Completed
- Parsed binary dump with EBCDIC conversion
- Analyzed all 15 CICS statistics subtypes
- Generated interactive visualizations
- Exported CSV/JSON reports
- Created comprehensive dashboard

### ðŸ“Š Outputs
- **CSV Reports**: One per subtype in `reports/`
- **JSON Reports**: One per subtype in `reports/`
- **Dashboard**: `reports/smf110_dashboard.png`
- **Interactive Charts**: Displayed inline

### ðŸ“– Documentation
See `BINARY_DUMP_GUIDE.md` for:
- SMF 110 binary format details
- JCL examples for dump extraction
- FTP transfer instructions
- Troubleshooting guide