# Environment Verification
## üîç Validating Workshop Setup

---

**Purpose:** Verify that all components are properly installed and configured for the OpenShift AI workshop.

**Prerequisites:**
- Completed dataset download (`download_datasets.ipynb`)
- Completed requirements installation (`install_requirements.ipynb`)

---

## üîß System Resources Check

In [None]:
import sys
import os
import platform
import psutil
from datetime import datetime

print("üñ•Ô∏è  SYSTEM INFORMATION")
print("=" * 50)
print(f"Python Version: {sys.version}")
print(f"Platform: {platform.platform()}")
print(f"Architecture: {platform.architecture()}")
print(f"Processor: {platform.processor()}")
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print()

In [None]:
# Check available system resources
print("üíæ RESOURCE AVAILABILITY")
print("=" * 50)

# Memory information
memory = psutil.virtual_memory()
print(f"Total RAM: {memory.total / 1024**3:.2f} GB")
print(f"Available RAM: {memory.available / 1024**3:.2f} GB")
print(f"Used RAM: {memory.used / 1024**3:.2f} GB ({memory.percent:.1f}%)")
print()

# CPU information
print(f"CPU Cores (physical): {psutil.cpu_count(logical=False)}")
print(f"CPU Cores (logical): {psutil.cpu_count(logical=True)}")
print(f"CPU Usage: {psutil.cpu_percent(interval=1):.1f}%")
print()

# Disk space
disk = psutil.disk_usage('/')
print(f"Disk Total: {disk.total / 1024**3:.2f} GB")
print(f"Disk Free: {disk.free / 1024**3:.2f} GB")
print(f"Disk Used: {disk.used / 1024**3:.2f} GB ({(disk.used/disk.total)*100:.1f}%)")
print()

# Resource adequacy check
print("‚úÖ ADEQUACY CHECK")
ram_ok = memory.available / 1024**3 >= 2.0  # At least 2GB available
cpu_ok = psutil.cpu_count(logical=True) >= 2  # At least 2 cores
disk_ok = disk.free / 1024**3 >= 5.0  # At least 5GB free

print(f"RAM (‚â•2GB available): {'‚úÖ' if ram_ok else '‚ùå'}")
print(f"CPU (‚â•2 cores): {'‚úÖ' if cpu_ok else '‚ùå'}")
print(f"Disk (‚â•5GB free): {'‚úÖ' if disk_ok else '‚ùå'}")

if all([ram_ok, cpu_ok, disk_ok]):
    print("\nüéâ System resources are adequate for the workshop!")
else:
    print("\n‚ö†Ô∏è  System resources may be insufficient. Consider requesting larger instance.")

## üìö Library Import Verification

In [None]:
# Core ML and Data Libraries
print("üî¨ CORE ML & DATA LIBRARIES")
print("=" * 50)

libraries = [
    ('pandas', 'pd'),
    ('numpy', 'np'),
    ('sklearn', None),
    ('matplotlib.pyplot', 'plt'),
    ('seaborn', 'sns')
]

for lib_name, alias in libraries:
    try:
        if alias:
            exec(f"import {lib_name} as {alias}")
        else:
            exec(f"import {lib_name}")
        
        # Get version if available
        try:
            version = eval(f"{alias or lib_name.split('.')[0]}.__version__")
            print(f"‚úÖ {lib_name:<20} v{version}")
        except:
            print(f"‚úÖ {lib_name:<20} (version not available)")
    except ImportError as e:
        print(f"‚ùå {lib_name:<20} - Import failed: {e}")

In [None]:
# ONNX and Model Export Libraries
print("\nüîÑ ONNX & MODEL EXPORT")
print("=" * 50)

onnx_libraries = [
    'onnx',
    'skl2onnx',
    'onnxruntime'
]

for lib in onnx_libraries:
    try:
        exec(f"import {lib}")
        try:
            version = eval(f"{lib}.__version__")
            print(f"‚úÖ {lib:<20} v{version}")
        except:
            print(f"‚úÖ {lib:<20} (version not available)")
    except ImportError as e:
        print(f"‚ùå {lib:<20} - Import failed: {e}")

In [None]:
# LangChain and LLM Libraries
print("\nü¶ú LANGCHAIN & LLM FRAMEWORK")
print("=" * 50)

langchain_libs = [
    'langchain',
    'langchain_core',
    'langchain_community'
]

for lib in langchain_libs:
    try:
        exec(f"import {lib}")
        try:
            version = eval(f"{lib}.__version__")
            print(f"‚úÖ {lib:<20} v{version}")
        except:
            print(f"‚úÖ {lib:<20} (imported successfully)")
    except ImportError as e:
        print(f"‚ùå {lib:<20} - Import failed: {e}")

In [None]:
# Web and Dashboard Libraries
print("\nüåê WEB & DASHBOARD LIBRARIES")
print("=" * 50)

web_libs = [
    'gradio',
    'requests',
    'plotly'
]

for lib in web_libs:
    try:
        exec(f"import {lib}")
        try:
            version = eval(f"{lib}.__version__")
            print(f"‚úÖ {lib:<20} v{version}")
        except:
            print(f"‚úÖ {lib:<20} (imported successfully)")
    except ImportError as e:
        print(f"‚ùå {lib:<20} - Import failed: {e}")

In [None]:
# Monitoring and Utilities
print("\nüìä MONITORING & UTILITIES")
print("=" * 50)

utility_libs = [
    'prometheus_client',
    'yaml',
    'json',
    'pickle',
    'joblib'
]

for lib in utility_libs:
    try:
        exec(f"import {lib}")
        print(f"‚úÖ {lib:<20} (available)")
    except ImportError as e:
        print(f"‚ùå {lib:<20} - Import failed: {e}")

## üìä Dataset Verification

In [None]:
import pandas as pd
import os

print("üìÇ DATASET AVAILABILITY CHECK")
print("=" * 50)

# Expected datasets
datasets = {
    'sales_historical_data.csv': {
        'description': 'Sales transactions with temporal patterns',
        'expected_columns': ['date', 'product_id', 'quantity', 'revenue', 'customer_id'],
        'min_rows': 1000
    },
    'product_catalog.csv': {
        'description': 'Product metadata and categories',
        'expected_columns': ['product_id', 'product_name', 'category', 'price'],
        'min_rows': 100
    },
    'customer_behavior.csv': {
        'description': 'User interaction and behavioral data',
        'expected_columns': ['customer_id', 'session_id', 'action', 'timestamp'],
        'min_rows': 500
    }
}

datasets_path = 'datasets'
all_datasets_ok = True

for filename, info in datasets.items():
    filepath = os.path.join(datasets_path, filename)
    print(f"\nüìÑ {filename}")
    print(f"   {info['description']}")
    
    if os.path.exists(filepath):
        try:
            df = pd.read_csv(filepath)
            file_size = os.path.getsize(filepath) / 1024 / 1024  # MB
            
            print(f"   ‚úÖ File exists ({file_size:.2f} MB)")
            print(f"   ‚úÖ Loaded successfully ({len(df):,} rows, {len(df.columns)} columns)")
            
            # Check minimum rows
            if len(df) >= info['min_rows']:
                print(f"   ‚úÖ Row count adequate (‚â•{info['min_rows']:,})")
            else:
                print(f"   ‚ö†Ô∏è  Row count below expected (‚â•{info['min_rows']:,})")
                all_datasets_ok = False
            
            # Check for expected columns (at least some should be present)
            available_cols = set(df.columns.str.lower())
            expected_cols = set([col.lower() for col in info['expected_columns']])
            matching_cols = available_cols.intersection(expected_cols)
            
            if len(matching_cols) >= len(expected_cols) * 0.6:  # At least 60% match
                print(f"   ‚úÖ Column structure looks good ({len(matching_cols)}/{len(expected_cols)} expected columns found)")
            else:
                print(f"   ‚ö†Ô∏è  Column structure may need review ({len(matching_cols)}/{len(expected_cols)} expected columns found)")
            
            # Show basic info
            print(f"   üìä Columns: {list(df.columns[:5])}{'...' if len(df.columns) > 5 else ''}")
            
        except Exception as e:
            print(f"   ‚ùå Error loading file: {e}")
            all_datasets_ok = False
    else:
        print(f"   ‚ùå File not found at {filepath}")
        all_datasets_ok = False

print(f"\n{'='*50}")
if all_datasets_ok:
    print("üéâ All datasets are available and properly formatted!")
else:
    print("‚ö†Ô∏è  Some datasets may need attention. Check the download_datasets.ipynb notebook.")

## üåê Environment Variables Check

In [None]:
print("üîß ENVIRONMENT VARIABLES")
print("=" * 50)

# Check important environment variables
env_vars = {
    'PYTHONPATH': 'Python module search path',
    'MODEL_ENDPOINT_URL': 'Model serving endpoint (will be set later)',
    'WORKSHOP_PATH': 'Workshop materials path',
    'HOME': 'User home directory',
    'USER': 'Current user',
    'PWD': 'Current working directory'
}

for var, description in env_vars.items():
    value = os.environ.get(var, 'Not set')
    status = '‚úÖ' if value != 'Not set' else '‚ö†Ô∏è '
    print(f"{status} {var:<20} = {value}")
    print(f"   {description}")
    print()

# Check current working directory
print(f"üìÅ Current working directory: {os.getcwd()}")
print(f"üìÇ Files in current directory: {len(os.listdir('.'))} items")

## üß™ Functional Testing

In [None]:
# Test basic ML functionality
print("ü§ñ MACHINE LEARNING FUNCTIONALITY TEST")
print("=" * 50)

try:
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error
    import numpy as np
    
    # Create sample data
    X = np.random.rand(1000, 10)
    y = np.sum(X, axis=1) + np.random.normal(0, 0.1, 1000)
    
    # Split and train
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = RandomForestRegressor(n_estimators=10, random_state=42)
    model.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    print(f"‚úÖ RandomForest training successful")
    print(f"‚úÖ Model prediction working (MSE: {mse:.4f})")
    print(f"‚úÖ Basic ML pipeline functional")
    
except Exception as e:
    print(f"‚ùå ML functionality test failed: {e}")

In [None]:
# Test ONNX export capability
print("\nüîÑ ONNX EXPORT FUNCTIONALITY TEST")
print("=" * 50)

try:
    from skl2onnx import convert_sklearn
    from skl2onnx.common.data_types import FloatTensorType
    import onnx
    import onnxruntime as ort
    
    # Use the model from previous test
    initial_type = [('float_input', FloatTensorType([None, 10]))]
    onnx_model = convert_sklearn(model, initial_types=initial_type)
    
    # Verify ONNX model
    onnx.checker.check_model(onnx_model)
    
    # Test ONNX Runtime
    sess = ort.InferenceSession(onnx_model.SerializeToString())
    test_input = X_test[:5].astype(np.float32)
    onnx_pred = sess.run(None, {'float_input': test_input})[0]
    
    print(f"‚úÖ ONNX model conversion successful")
    print(f"‚úÖ ONNX model validation passed")
    print(f"‚úÖ ONNX Runtime inference working")
    print(f"‚úÖ ONNX predictions shape: {onnx_pred.shape}")
    
except Exception as e:
    print(f"‚ùå ONNX functionality test failed: {e}")

In [None]:
# Test data visualization
print("\nüìä DATA VISUALIZATION TEST")
print("=" * 50)

try:
    import matplotlib.pyplot as plt
    import seaborn as sns
    import plotly.graph_objects as go
    
    # Test matplotlib
    fig, ax = plt.subplots(figsize=(6, 4))
    ax.plot([1, 2, 3, 4], [1, 4, 2, 3])
    ax.set_title('Test Plot')
    plt.close(fig)  # Close to avoid display
    print("‚úÖ Matplotlib plotting functional")
    
    # Test seaborn
    test_data = pd.DataFrame({
        'x': np.random.randn(100),
        'y': np.random.randn(100)
    })
    fig, ax = plt.subplots(figsize=(6, 4))
    sns.scatterplot(data=test_data, x='x', y='y', ax=ax)
    plt.close(fig)
    print("‚úÖ Seaborn plotting functional")
    
    # Test plotly
    fig = go.Figure(data=go.Bar(x=['A', 'B', 'C'], y=[1, 3, 2]))
    print("‚úÖ Plotly plotting functional")
    
except Exception as e:
    print(f"‚ùå Visualization test failed: {e}")

## üîó Network Connectivity Test

In [None]:
print("üåê NETWORK CONNECTIVITY TEST")
print("=" * 50)

import requests
import socket
from urllib.parse import urlparse

# Test external connectivity
test_urls = [
    'https://httpbin.org/status/200',  # Simple HTTP test
    'https://api.github.com',          # GitHub API
    'https://huggingface.co'           # Hugging Face (for model downloads)
]

for url in test_urls:
    try:
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            print(f"‚úÖ {url} - Accessible")
        else:
            print(f"‚ö†Ô∏è  {url} - HTTP {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"‚ùå {url} - Connection failed: {type(e).__name__}")

# Test DNS resolution
print("\nüîç DNS RESOLUTION TEST")
test_domains = ['google.com', 'github.com', 'redhat.com']

for domain in test_domains:
    try:
        ip = socket.gethostbyname(domain)
        print(f"‚úÖ {domain} -> {ip}")
    except socket.gaierror:
        print(f"‚ùå {domain} - DNS resolution failed")

## üìã Environment Summary Report

In [None]:
print("üìã ENVIRONMENT VERIFICATION SUMMARY")
print("=" * 60)
print()

# Collect verification results
results = {
    'System Resources': {
        'RAM Available': f"{psutil.virtual_memory().available / 1024**3:.2f} GB",
        'CPU Cores': f"{psutil.cpu_count(logical=True)}",
        'Disk Free': f"{psutil.disk_usage('/').free / 1024**3:.2f} GB"
    },
    'Core Libraries': {
        'pandas': '‚úÖ' if 'pandas' in sys.modules else '‚ùå',
        'sklearn': '‚úÖ' if 'sklearn' in sys.modules else '‚ùå',
        'numpy': '‚úÖ' if 'numpy' in sys.modules else '‚ùå'
    },
    'ONNX Stack': {
        'onnx': '‚úÖ' if 'onnx' in sys.modules else '‚ùå',
        'skl2onnx': '‚úÖ' if 'skl2onnx' in sys.modules else '‚ùå',
        'onnxruntime': '‚úÖ' if 'onnxruntime' in sys.modules else '‚ùå'
    },
    'LangChain': {
        'langchain': '‚úÖ' if 'langchain' in sys.modules else '‚ùå'
    },
    'Dashboard Tools': {
        'gradio': '‚úÖ' if 'gradio' in sys.modules else '‚ùå',
        'plotly': '‚úÖ' if 'plotly' in sys.modules else '‚ùå'
    }
}

for category, items in results.items():
    print(f"üìÇ {category}")
    for item, status in items.items():
        print(f"   {item:<20} {status}")
    print()

# Dataset status
print("üìä Dataset Status")
for filename in datasets.keys():
    filepath = os.path.join('datasets', filename)
    status = '‚úÖ' if os.path.exists(filepath) else '‚ùå'
    print(f"   {filename:<30} {status}")
print()

# Overall readiness assessment
critical_modules = ['pandas', 'sklearn', 'numpy', 'onnx', 'langchain']
modules_ok = sum(1 for mod in critical_modules if mod in sys.modules)
datasets_ok = sum(1 for f in datasets.keys() if os.path.exists(os.path.join('datasets', f)))

print("üéØ READINESS ASSESSMENT")
print(f"   Critical libraries: {modules_ok}/{len(critical_modules)} available")
print(f"   Required datasets: {datasets_ok}/{len(datasets)} available")
print(f"   System resources: {'Adequate' if all([ram_ok, cpu_ok, disk_ok]) else 'May need attention'}")
print()

if modules_ok == len(critical_modules) and datasets_ok == len(datasets):
    print("üéâ ENVIRONMENT READY FOR WORKSHOP!")
    print("\n‚úÖ You can proceed to Module 2: Predictive Model Development")
    print("   Next notebook: 2-predictive-model/notebooks/01_data_exploration.ipynb")
else:
    print("‚ö†Ô∏è  ENVIRONMENT NEEDS ATTENTION")
    print("\nüîß Recommended actions:")
    if modules_ok < len(critical_modules):
        print("   - Re-run install_requirements.ipynb")
    if datasets_ok < len(datasets):
        print("   - Re-run download_datasets.ipynb")
    print("   - Check troubleshooting section in Module 1 documentation")

print(f"\nüìÖ Verification completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*60)

## üÜò Troubleshooting Guide

If you encounter issues, here are common solutions:

### ‚ùå Library Import Failures
```python
# Reinstall specific package
!pip install --force-reinstall package_name

# Clear pip cache
!pip cache purge

# Install from conda-forge
!conda install -c conda-forge package_name
```

### üìÇ Dataset Issues
- Re-run `download_datasets.ipynb`
- Check internet connectivity
- Verify storage space availability

### üíæ Resource Constraints
- Request larger workbench instance
- Close other notebooks
- Clear notebook outputs to save memory

### üåê Network Issues
- Check OpenShift cluster network policies
- Verify external connectivity permissions
- Contact cluster administrator if needed

---

**‚úÖ Environment verification complete!**

**Next Step:** Module 2 - Predictive Model Development  
**File:** `2-predictive-model/notebooks/01_data_exploration.ipynb`