In [1]:
import pm4py 
bpmn_model = pm4py.read_bpmn('BPMN.bpmn')

net, im , fm = pm4py.convert_to_petri_net(bpmn_model)


In [2]:
from core_simulator import SimulationEngine
engine = SimulationEngine(net, im, fm)
business_log = engine.run_simulation()

Loaded organizational model:
Simple permissions: 26 activities
Context permissions: 401 contexts
Generated 29570 arrivals based on dynamic rates.


In [3]:
print(business_log[0]) # datetime.datetime(1970, 4, 27, 0, 31, 12, tzinfo=datetime.timezone.utc)

{'attributes': {'concept:name': 'Application_1', 'LoanGoal': 'Home improvement', 'ApplicationType': 'New credit', 'RequestedAmount': 37483.8}, 'events': [{'concept:name': 'A_Create Application', 'org:resource': 'User_1', 'time:timestamp': datetime.datetime(2016, 1, 2, 6, 7, 11, 333502), 'lifecycle:transition': 'complete'}, {'concept:name': 'W_Complete application', 'org:resource': 'User_103', 'time:timestamp': datetime.datetime(2016, 1, 2, 6, 9, 40, 700099), 'lifecycle:transition': 'complete'}, {'concept:name': 'W_Complete application', 'org:resource': 'User_1', 'time:timestamp': datetime.datetime(2016, 1, 2, 6, 11, 29, 927567), 'lifecycle:transition': 'complete'}, {'concept:name': 'W_Call after offers', 'org:resource': 'User_103', 'time:timestamp': datetime.datetime(2016, 1, 2, 6, 18, 12, 197655), 'lifecycle:transition': 'complete'}, {'concept:name': 'W_Validate application', 'org:resource': 'User_1', 'time:timestamp': datetime.datetime(2016, 1, 2, 6, 22, 8, 419693), 'lifecycle:transi

In [4]:
len(business_log)

29570

In [5]:
import pandas as pd
df = pd.DataFrame(business_log)

In [None]:
df

### Analyze resource allocation 

- Resources were allocated based on resource availability probabilities
- Check against probabilies returned compliance of 92.17%

In [6]:
from collections import Counter

# If business_log is a list of multiple trace dictionaries
all_resources = [
    event['org:resource'] 
    for trace in business_log 
    for event in trace['events']
]

total_counts = Counter(all_resources)
print(f"Total Resource Distribution: {total_counts}")

Total Resource Distribution: Counter({'User_1': 26490, 'User_90': 10267, 'User_68': 7831, 'User_30': 7609, 'User_98': 7361, 'User_103': 7220, 'User_107': 7171, 'User_87': 6400, 'User_144': 6126, 'User_32': 5285, 'User_61': 5161, 'User_65': 5091, 'User_96': 4985, 'User_7': 4945, 'User_86': 4912, 'User_36': 4877, 'User_138': 4836, 'User_80': 4807, 'User_71': 4802, 'User_63': 4799, 'User_104': 4750, 'User_18': 4711, 'User_119': 4680, 'User_38': 4655, 'User_28': 4630, 'User_4': 4586, 'User_15': 4556, 'User_10': 4551, 'User_47': 4541, 'User_46': 4513, 'User_109': 4513, 'User_105': 4478, 'User_13': 4442, 'User_75': 4409, 'User_49': 4386, 'User_122': 4381, 'User_27': 4320, 'User_78': 4306, 'User_56': 4268, 'User_45': 4251, 'User_23': 4222, 'User_97': 4217, 'User_41': 4211, 'User_81': 4196, 'User_117': 4186, 'User_91': 4185, 'User_53': 4181, 'User_34': 4149, 'User_93': 4146, 'User_11': 4078, 'User_110': 4076, 'User_42': 4057, 'User_51': 4052, 'User_94': 4043, 'User_106': 4038, 'User_6': 4011, 

In [7]:
def verify_stochastic_compliance(simulated_log, availability_probs):
    violations = 0
    total_human_events = 0

    for trace in simulated_log:
        for event in trace['events']:
            res = event['org:resource']
            ts = event['time:timestamp']
            
            # We only check human resources, as 'System' is our fallback
            if res != "System":
                total_human_events += 1
                # Check the (Resource, Weekday, Hour) triplet
                prob = availability_probs.get((res, ts.weekday(), ts.hour), 0)
                
                # If prob is 0, the resource worked when they never did in the log
                if prob == 0:
                    violations += 1

    print(f"--- Advanced Stochastic Report ---")
    print(f"Total Human Events: {total_human_events}")
    print(f"Historical Probability Violations: {violations}")
    if total_human_events > 0:
        print(f"Advanced Compliance: {((total_human_events - violations) / total_human_events) * 100:.2f}%")

In [8]:
import json

def load_availability_from_json(filename="resource_availability.json"):
        with open(filename, 'r') as f:
            data = json.load(f)
        
        # Reconstruct the original tuple keys
        reconstructed = {}
        for k, v in data.items():
            parts = k.split('|')
            # parts[0] is resource, parts[1] is weekday (int), parts[2] is hour (int)
            reconstructed[(parts[0], int(parts[1]), int(parts[2]))] = v
        return reconstructed

In [9]:
availability = load_availability_from_json()


In [10]:
verify_stochastic_compliance(business_log, availability)

--- Advanced Stochastic Report ---
Total Human Events: 591164
Historical Probability Violations: 21768
Advanced Compliance: 96.32%


In [11]:
def verify_ordinor_permission_compliance(simulated_log, org_model, activity_clusters):
    """
    Verify that all resource assignments respect OrdinoR permissions.
    
    Checks:
    1. Does the resource have permission for this activity in this context?
    2. Are context-aware permissions being used correctly?
    """
    violations = []
    total_events = 0
    context_aware_assignments = 0
    fallback_assignments = 0
    
    # Load permission maps
    context_permission_map = org_model.get('context_permission_map', {})
    simple_permission_map = org_model.get('permission_map', {})
    
    for trace in simulated_log:
        case_attrs = trace['attributes']
        
        for event in trace['events']:
            total_events += 1
            
            activity = event['concept:name']
            resource = event['org:resource']
            timestamp = event['time:timestamp']
            
            # Skip system assignments
            if resource == "System":
                continue
            
            # Determine context
            case_type = _determine_case_type_from_attrs(case_attrs)
            time_type = _get_time_type(timestamp)
            
            # Build context key
            context_key = f"{activity}|{case_type}|{time_type}"
            
            # Check context-aware permission first
            context_permitted = context_permission_map.get(context_key, [])
            
            if context_permitted:
                # Context-aware permission exists
                if resource in context_permitted:
                    context_aware_assignments += 1
                else:
                    # Violation: resource not permitted in this context
                    violations.append({
                        'case': trace['attributes']['concept:name'],
                        'activity': activity,
                        'resource': resource,
                        'timestamp': timestamp,
                        'context': f"({case_type}, {time_type})",
                        'reason': 'Not permitted in context',
                        'permitted_resources': context_permitted[:5]  # Show first 5
                    })
            else:
                # No context-aware permission - check simple permission
                simple_permitted = simple_permission_map.get(activity, [])
                
                if simple_permitted:
                    if resource in simple_permitted:
                        fallback_assignments += 1
                    else:
                        violations.append({
                            'case': trace['attributes']['concept:name'],
                            'activity': activity,
                            'resource': resource,
                            'timestamp': timestamp,
                            'context': f"({case_type}, {time_type})",
                            'reason': 'Not permitted (simple map)',
                            'permitted_resources': simple_permitted[:5]
                        })
                else:
                    # No permissions defined at all
                    violations.append({
                        'case': trace['attributes']['concept:name'],
                        'activity': activity,
                        'resource': resource,
                        'timestamp': timestamp,
                        'context': f"({case_type}, {time_type})",
                        'reason': 'No permissions defined',
                        'permitted_resources': []
                    })
    
    # Report
    print("=" * 80)
    print("ORDINOR PERMISSION COMPLIANCE REPORT")
    print("=" * 80)
    print(f"Total Events: {total_events}")
    print(f"Context-Aware Assignments: {context_aware_assignments} ({context_aware_assignments/total_events*100:.2f}%)")
    print(f"Fallback Assignments: {fallback_assignments} ({fallback_assignments/total_events*100:.2f}%)")
    print(f"Permission Violations: {len(violations)} ({len(violations)/total_events*100:.2f}%)")
    
    if len(violations) > 0:
        print(f"\nüö® VIOLATIONS DETECTED:")
        for i, v in enumerate(violations[:10], 1):  # Show first 10
            print(f"\n  Violation {i}:")
            print(f"    Case: {v['case']}")
            print(f"    Activity: {v['activity']}")
            print(f"    Resource: {v['resource']}")
            print(f"    Context: {v['context']}")
            print(f"    Reason: {v['reason']}")
            print(f"    Should be: {v['permitted_resources']}")
        
        if len(violations) > 10:
            print(f"\n  ... and {len(violations) - 10} more violations")
    else:
        print("\n‚úÖ NO VIOLATIONS - All assignments respect OrdinoR permissions!")
    
    compliance_rate = ((total_events - len(violations)) / total_events * 100) if total_events > 0 else 0
    print(f"\nüìä Overall Permission Compliance: {compliance_rate:.2f}%")
    print("=" * 80)
    
    return violations

# Helper functions
def _determine_case_type_from_attrs(case_attrs):
    """Determine case type from attributes."""
    requested_amount = case_attrs.get('RequestedAmount', 0)
    
    if requested_amount < 10000:
        return 'Simple'
    elif requested_amount < 25000:
        return 'Standard'
    elif requested_amount < 40000:
        return 'Complex'
    else:
        return 'Very Complex'

def _get_time_type(timestamp):
    """Convert timestamp to time type."""
    hour = timestamp.hour
    day_of_week = timestamp.weekday()
    
    if day_of_week >= 5:
        return 'Weekend'
    elif 9 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 14:
        return 'Lunch'
    elif 14 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 20:
        return 'Evening'
    else:
        return 'Off-Hours'

In [12]:
def analyze_context_distribution(simulated_log, org_model):
    """
    Analyze how well the simulation matches the expected context distribution
    from OrdinoR's capability model.
    """
    from collections import defaultdict, Counter
    
    context_usage = defaultdict(lambda: {'count': 0, 'resources': []})
    activity_usage = Counter()
    time_type_usage = Counter()
    case_type_usage = Counter()
    
    for trace in simulated_log:
        case_attrs = trace['attributes']
        
        for event in trace['events']:
            activity = event['concept:name']
            resource = event['org:resource']
            timestamp = event['time:timestamp']
            
            if resource == "System":
                continue
            
            # Determine context
            case_type = _determine_case_type_from_attrs(case_attrs)
            time_type = _get_time_type(timestamp)
            
            # Track usage
            context_key = (activity, case_type, time_type)
            context_usage[context_key]['count'] += 1
            context_usage[context_key]['resources'].append(resource)
            
            activity_usage[activity] += 1
            time_type_usage[time_type] += 1
            case_type_usage[case_type] += 1
    
    print("=" * 80)
    print("CONTEXT DISTRIBUTION ANALYSIS")
    print("=" * 80)
    
    # Overall distribution
    print("\nüìä Case Type Distribution:")
    for ct, count in case_type_usage.most_common():
        print(f"   {ct}: {count} events ({count/sum(case_type_usage.values())*100:.1f}%)")
    
    print("\n‚è∞ Time Type Distribution:")
    for tt, count in time_type_usage.most_common():
        print(f"   {tt}: {count} events ({count/sum(time_type_usage.values())*100:.1f}%)")
    
    print("\nüéØ Top 10 Activities:")
    for activity, count in activity_usage.most_common(10):
        print(f"   {activity}: {count} events")
    
    print("\nüîç Top 10 Context Combinations:")
    sorted_contexts = sorted(context_usage.items(), key=lambda x: x[1]['count'], reverse=True)
    for (activity, ct, tt), data in sorted_contexts[:10]:
        unique_resources = len(set(data['resources']))
        print(f"   ({activity[:30]:30}, {ct:15}, {tt:10}): {data['count']:4} events, {unique_resources:3} resources")
    
    # Compare with OrdinoR capabilities
    print("\nüìã OrdinoR Capability Coverage:")
    context_permission_map = org_model.get('context_permission_map', {})
    
    used_contexts = set()
    for (activity, ct, tt) in context_usage.keys():
        context_key = f"{activity}|{ct}|{tt}"
        used_contexts.add(context_key)
    
    defined_contexts = set(context_permission_map.keys())
    
    print(f"   Defined contexts in OrdinoR: {len(defined_contexts)}")
    print(f"   Used contexts in simulation: {len(used_contexts)}")
    print(f"   Coverage: {len(used_contexts & defined_contexts) / len(defined_contexts) * 100:.1f}%")
    
    # Find contexts used but not defined
    undefined_used = used_contexts - defined_contexts
    if undefined_used:
        print(f"\n‚ö†Ô∏è  Contexts used but not defined in OrdinoR: {len(undefined_used)}")
        for ctx in list(undefined_used)[:5]:
            print(f"      {ctx}")
        if len(undefined_used) > 5:
            print(f"      ... and {len(undefined_used) - 5} more")
    
    print("=" * 80)
    
    return context_usage

In [13]:
def analyze_role_specialization(simulated_log, org_model):
    """
    Analyze if resources are being used according to their roles
    (specialists vs generalists).
    """
    from collections import defaultdict, Counter
    
    resource_workload = defaultdict(lambda: {'total': 0, 'by_cluster': Counter()})
    
    activity_clusters = {k: int(v) for k, v in org_model['activity_clusters'].items()}
    
    for trace in simulated_log:
        for event in trace['events']:
            resource = event['org:resource']
            activity = event['concept:name']
            
            if resource == "System":
                continue
            
            cluster_id = activity_clusters.get(activity)
            
            if cluster_id:
                resource_workload[resource]['total'] += 1
                resource_workload[resource]['by_cluster'][cluster_id] += 1
    
    print("=" * 80)
    print("ROLE SPECIALIZATION ANALYSIS")
    print("=" * 80)
    
    # Calculate specialization scores (Herfindahl index)
    specialists = []
    generalists = []
    
    for resource, workload in resource_workload.items():
        total = workload['total']
        cluster_counts = workload['by_cluster']
        
        if total == 0:
            continue
        
        # Herfindahl index
        proportions = [count / total for count in cluster_counts.values()]
        herfindahl = sum(p ** 2 for p in proportions)
        
        dominant_cluster = max(cluster_counts, key=cluster_counts.get)
        num_clusters = len(cluster_counts)
        
        resource_info = {
            'resource': resource,
            'total_events': total,
            'num_clusters': num_clusters,
            'dominant_cluster': dominant_cluster,
            'specialization': herfindahl
        }
        
        if herfindahl > 0.6:
            specialists.append(resource_info)
        else:
            generalists.append(resource_info)
    
    print(f"\nüë• Resource Classification:")
    print(f"   Specialists (>60% in one cluster): {len(specialists)}")
    print(f"   Generalists (distributed): {len(generalists)}")
    
    if specialists:
        print(f"\nüéØ Top 10 Specialists:")
        specialists_sorted = sorted(specialists, key=lambda x: x['specialization'], reverse=True)
        for res in specialists_sorted[:10]:
            print(f"   {res['resource']:15} | Cluster {res['dominant_cluster']} | "
                  f"Score: {res['specialization']:.3f} | Events: {res['total_events']}")
    
    if generalists:
        print(f"\nüåç Top 10 Generalists:")
        generalists_sorted = sorted(generalists, key=lambda x: x['num_clusters'], reverse=True)
        for res in generalists_sorted[:10]:
            print(f"   {res['resource']:15} | {res['num_clusters']} clusters | "
                  f"Score: {res['specialization']:.3f} | Events: {res['total_events']}")
    
    # Compare with OrdinoR model
    org_mem = org_model['organizational_model']['mem']
    
    print(f"\nüìä Comparison with OrdinoR Model:")
    multi_cluster_resources = sum(1 for groups in org_mem.values() if len(groups) > 1)
    print(f"   Resources in multiple clusters (OrdinoR): {multi_cluster_resources}")
    print(f"   Generalist resources (simulation): {len(generalists)}")
    
    print("=" * 80)
    
    return specialists, generalists

In [14]:
def run_full_ordinor_validation(simulated_log, org_model, availability_probs):
    """
    Run all OrdinoR validation tests.
    """
    print("\n" + "=" * 80)
    print("COMPREHENSIVE ORDINOR VALIDATION SUITE")
    print("=" * 80)
    
    # Test 1: Permission Compliance
    print("\n[1/4] Testing Permission Compliance...")
    violations = verify_ordinor_permission_compliance(simulated_log, org_model, org_model['activity_clusters'])
    
    # Test 2: Availability Compliance (your existing test)
    print("\n[2/4] Testing Availability Compliance...")
    verify_stochastic_compliance(simulated_log, availability_probs)
    
    # Test 3: Context Distribution
    print("\n[3/4] Analyzing Context Distribution...")
    context_usage = analyze_context_distribution(simulated_log, org_model)
    
    # Test 4: Role Specialization
    print("\n[4/4] Analyzing Role Specialization...")
    specialists, generalists = analyze_role_specialization(simulated_log, org_model)
    
    # Summary
    print("\n" + "=" * 80)
    print("VALIDATION SUMMARY")
    print("=" * 80)
    print(f"‚úÖ Tests completed for {len(simulated_log)} cases")
    print(f"   Permission violations: {len(violations)}")
    print(f"   Specialists identified: {len(specialists)}")
    print(f"   Generalists identified: {len(generalists)}")
    print("=" * 80)
    
    return {
        'violations': violations,
        'context_usage': context_usage,
        'specialists': specialists,
        'generalists': generalists
    }

In [15]:
# Load your organizational model
import pickle

with open('organizational_model.pkl', 'rb') as f:
    org_model = pickle.load(f)

# Load availability
availability = load_availability_from_json()

# Run full validation
results = run_full_ordinor_validation(
    simulated_log=business_log,
    org_model=org_model,
    availability_probs=availability
)

# Access specific results
if results['violations']:
    print(f"\n‚ö†Ô∏è Found {len(results['violations'])} permission violations to fix")

# Export violations for analysis
if results['violations']:
    import pandas as pd
    violations_df = pd.DataFrame(results['violations'])
    violations_df.to_csv('ordinor_violations.csv', index=False)
    print("Violations exported to ordinor_violations.csv")


COMPREHENSIVE ORDINOR VALIDATION SUITE

[1/4] Testing Permission Compliance...
ORDINOR PERMISSION COMPLIANCE REPORT
Total Events: 591164
Context-Aware Assignments: 502928 (85.07%)
Fallback Assignments: 88236 (14.93%)
Permission Violations: 0 (0.00%)

‚úÖ NO VIOLATIONS - All assignments respect OrdinoR permissions!

üìä Overall Permission Compliance: 100.00%

[2/4] Testing Availability Compliance...
--- Advanced Stochastic Report ---
Total Human Events: 591164
Historical Probability Violations: 21768
Advanced Compliance: 96.32%

[3/4] Analyzing Context Distribution...
CONTEXT DISTRIBUTION ANALYSIS

üìä Case Type Distribution:
   Standard: 196990 events (33.3%)
   Complex: 195412 events (33.1%)
   Very Complex: 132416 events (22.4%)
   Simple: 66346 events (11.2%)

‚è∞ Time Type Distribution:
   Morning: 144071 events (24.4%)
   Off-Hours: 127586 events (21.6%)
   Afternoon: 108152 events (18.3%)
   Lunch: 91433 events (15.5%)
   Evening: 62975 events (10.7%)
   Weekend: 56947 events 