In [10]:
import pandas as pd
import numpy as np
import sqlite3
from datetime import datetime, timedelta
import json
import random

In [11]:
CHARLSON_CONDITIONS = {
    'mi': {
        '9': ["410", "412"],
        '10': ["I21", "I22", "I25.2"]
    },
    'chf': {
        '9': ["398.91", "402.01","402.11","402.91","404.01","404.03","404.11","404.13","404.91","404.93","425.4","425.5","425.6","425.7","425.8","425.9","428"],
        '10': ["I09.9", "I11.0", "I13.0", "I13.2", "I25.5", "I42.0", "I42.5", "I42.6", "I42.7", "I42.8", "I42.9", "I43", "I50", "P29.0"]
    },
    'pvd': {
        '9': ["093.0", "440","441","443.1","443.2","443.3","443.4","443.5","443.6","443.7","443.8","443.9","557.1","557.9","V43.4"],
        '10': ["I70", "I71", "I73.1","I73.8","I73.9","I77.1","I79.0","I79.2","K55.1","K55.8","K55.9","Z95.8","Z95.9"]
    },
    'cevd': {
        '9': ["362.34", "430","431","432","433","434","435","436","437","438"],
        '10': ["G45", "G46", "H34.0","I60", "I61", "I62","I63","I64","I65","I66","I67","I68","I69"]
    },
    'dementia': {
        '9': ["290","294.1","331.2"],
        '10': ["F00","F01","F02","F03","F05.1","G30","G31.1"]
    },
    'copd': {
        '9': ["416.8","416.9","490", "491","492","493","494","495","496","497","498","499","500","501","502","503","504","505","506.4","508.1", "508.8"],
        '10': ["I27.8","I27.9","J40", "J41", "J42","J43","J44","J45","J46","J47","J60","J61","J62","J63","J64","J65","J66","J67","J68.4","J70.1", "J70.3"]
    },
    'rheumd': {
        '9': ["446.5", "710.0", "710.1", "710.2", "710.3", "710.4", "714.0", "714.1", "714.2", "714.8", "725"],
        '10': ["M05", "M06", "M31.5", "M32", "M33", "M34", "M35.1", "M35.3", "M36.0"]
    },
    'pud': {
        '9': ["531", "532", "533", "534"],
        '10': ["K25", "K26", "K27", "K28"]
    },
    'mld': {
        '9': ["070.22", "070.23", "070.32", "070.33", "070.44", "070.54", "070.6", "070.9", "570", "571", "573.3", "573.4", "573.8", "573.9", "V42.7"],
        '10': ["B18", "K70.0", "K70.1", "K70.2", "K70.3", "K70.9", "K71.3", "K71.4", "K71.5", "K71.7", "K73", "K74", "K76.0", "K76.2", "K76.3", "K76.4", "K76.8", "K76.9", "Z94.4"]
    },
    'msld': {
        '9': ["456.0", "456.1", "456.2", "572.2", "572.3", "572.4","572.5","572.6","572.7", "572.8"],
        '10': ["I85.0","I85.9", "I86.4","I98.2", "K70.4", "K71.1", "K72.1", "K72.9", "K76.5", "K76.6", "K76.7"]
    },
    'diab': {
        '9': ["250.0", "250.1", "250.2", "250.3", "250.8", "250.9"],
        '10': ["E10.0", "E10.1", "E10.6", "E10.8","E10.9","E11.0","E11.1","E11.6","E11.8","E11.9","E13.0","E13.1","E13.6","E13.8","E13.9"]
    },
    'dia_w_c': {
        '9': ["250.4", "250.5", "250.6", "250.7"],
        '10': ["E10.2", "E10.3", "E10.4", "E10.5", "E10.7", "E11.2", "E11.3", "E11.4", "E11.5", "E11.7", "E13.2", "E13.3", "E13.4", "E13.5", "E13.7"]
    },
    'hp': {
        '9': ["334.1", "342", "343", "344.0","344.1","344.2","344.3","344.4","344.5","344.6","344.9"],
        '10': ["G04.1", "G11.4", "G80.1", "G80.2", "G81", "G82", "G83.0","G83.1","G83.2","G83.3","G83.4","G83.9"]
    },
    'mrend': {
        '9': ["403.00","403.10", "403.90", "404.00", "404.01", "404.10", "404.11", "404.90", "404.91", "584", "585.6", "589"],
        '10': ["I12.9","I13.0","I13.10","N03", "N05","N18.1", "N18.2","N18.3","N18.4","N18.9", "Z49.0"]
    },
    'srend': {
        '9': ["403.01", "403.11", "403.91", "404.02", "404.03", "404.12", "404.13", "404.92", "404.93","582","583.0","583.1","583.2","583.3","583.4","583.5","583.6","583.7","585.5", "585.6","586","588.0","V42.0","V45.1","V56"],
        '10': ["I12.0", "I13.11","I13.2","N18.5","N18.6","N19", "N25.0","Z49","Z94.0", "Z99.2"]
    },
    'aids': {
        '9': ["112", "180", "114", "117.5", "007.4", "078.5", "348.3", "054", "115", "007.2", "176", "200", "201", "202", "203", "204", "205", "206", "207", "208", "209", "031", "010", "011", "012", "013", "014", "015", "016", "017", "018", "136.3", "V12.61", "046.3", "003.1", "130", "799.4"],
        '10': ["B37", "C53", "B38", "B45", "A07.2", "B25", "G93.4", "B00", "B39", "A07.3", "C46", "C81", "C82", "C83", "C84", "C85", "C86", "C87", "C88", "C89", "C90", "C91", "C92", "C93", "C94", "C95", "C96", "A31", "A15", "A16", "A17", "A18", "A19", "B59", "Z87.01", "A81.2", "A02.1", "B58", "R64"]
    },
    'hiv': {
        '9': ["042"],
        '10': ["B20"]
    },
    'mst': {
        '9': ["196", "197", "198", "199.0"],
        '10': ["C77", "C78", "C79", "C80.0", "C80.2"]
    },
    'mal': {
        '9': ["14", "15", "16", "170", "171", "172", "174", "175", "176", "179", "18", "190", "191", "192", "193", "194", "195", "199.1", "200", "201", "202", "203", "204", "205", "206", "207", "208", "238.6"],
        '10': ["C0", "C1", "C2", "C30", "C31", "C32", "C33", "C34", "C37", "C38", "C39", "C40", "C41", "C43", "C45", "C46", "C47", "C48", "C49", "C50", "C51","C52","C53","C54","C55","C56","C57", "C58", "C60","C61","C62", "C63", "C76", "C80.1", "C81", "C82", "C83", "C84", "C85", "C88", "C9"]
    },
    'Obesity': {
        '9': ["278.0"],
        '10': ["E66"]
    },
    'WL': {
        '9': ['260','261','262','263','783.2','799.4'],
        '10': ['E40','E41','E42','E43','E44','E45','E46','R63.4','R64']
    },
    'Alcohol': {
        '9': ['265.2','291.1','291.2','291.3','291.5','291.8','291.9','303.0','303.9','305.0','357.5','425.5','535.3','571.0','571.1','5712','5713','980','V113'],
        '10': ['F10','E52','G62.1','I42.6','K29.2','K70.0','K70.3','K70.9','T51','Z50.2','Z71.4','Z72.1']
    },
    'Drug': {
        '9': ['292','304','305.2','305.3','305.4','305.5','305.6','305.7','305.8','305.9','V65.42'],
        '10': ['F11','F12','F13','F14','F15','F16','F18','F19','Z71.5','Z72.2']
    },
    'Psycho': {
        '9': ['293.8','295','296.04','296.14','296.44','296.54','297','298'],
        '10': ['F20','F22','F23','F24','F25','F28','F29','F30.2','F31.2','F31.5']
    },
    'Dep': {
        '9': ['296.2','296.3','296.5','300.4','309','311'],
        '10': ['F20.4','F31.3','F31.4','F31.5','F32','F33','F34.1','F41.2','F43.2']
    }
}

In [12]:
CONDITION_WEIGHTS = {
    'mi': 0.04,       
    'chf': 0.06,      
    'pvd': 0.05,      
    'cevd': 0.05,      
    'dementia': 0.04,  
    'copd': 0.08,      
    'rheumd': 0.04,    
    'pud': 0.04,       
    'mld': 0.05,       
    'msld': 0.03,     
    'diab': 0.08,      
    'dia_w_c': 0.06,   
    'hp': 0.04,        
    'mrend': 0.05,     
    'srend': 0.04,     
    'aids': 0.03,      
    'hiv': 0.02,       
    'mst': 0.03,       
    'mal': 0.05,       
    'Obesity': 0.04,   
    'WL': 0.03,        
    'Alcohol': 0.03,   
    'Drug': 0.02,      
    'Psycho': 0.02,    
    'Dep': 0.03        
}

total_weight = sum(CONDITION_WEIGHTS.values())
print(f"Total weight: {total_weight}")

Total weight: 1.0500000000000003


In [13]:
def create_database():
    """Create SQLite database and tables"""
    conn = sqlite3.connect('healthcare.db')
    cursor = conn.cursor()
    
    # Create tables
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS provider (
        provider_id TEXT PRIMARY KEY,
        latitude REAL,
        longitude REAL
    )
    ''')
    
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS geolocation (
        patient_id TEXT PRIMARY KEY,
        census_block TEXT,
        latitude REAL,
        longitude REAL
    )
    ''')
    
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS travel_time (
        census_block TEXT,
        provider_id TEXT,
        travel_time_type TEXT,
        travel_time_minutes REAL,
        PRIMARY KEY (census_block, provider_id, travel_time_type)
    )
    ''')
    
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS demographics (
        patient_id TEXT PRIMARY KEY,
        birth_date DATE,
        sex TEXT,
        race TEXT,
        ethnicity TEXT,
        education TEXT,
        income REAL
    )
    ''')
    
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS rucc (
        census_block TEXT PRIMARY KEY,
        rucc_code INTEGER
    )
    ''')
    
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS encounter (
        patient_id TEXT,
        encounter_id TEXT PRIMARY KEY,
        start_date DATE,
        end_date DATE
    )
    ''')
    
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS procedure_table (
        patient_id TEXT,
        encounter_id TEXT,
        provider_id TEXT,
        procedure_code TEXT,
        start_datetime DATETIME,
        end_datetime DATETIME,
        FOREIGN KEY (encounter_id) REFERENCES encounter(encounter_id)
    )
    ''')

    cursor.execute('''
    CREATE TABLE IF NOT EXISTS diagnosis (
        patient_id TEXT,
        encounter_id TEXT,
        diagnosis_code TEXT,
        vocabulary_id TEXT,
        diagnosis_date DATE,
        FOREIGN KEY (patient_id) REFERENCES demographics(patient_id),
        FOREIGN KEY (encounter_id) REFERENCES encounter(encounter_id)
    )
    ''')
    
    conn.commit()
    return conn

def load_existing_data(conn, patient_locations, clinic_locations, travel_times):
    """Load existing location and travel time data, and RUCC data"""
    cursor = conn.cursor()
    
    # clear all
    cursor.execute('DELETE FROM provider')
    cursor.execute('DELETE FROM geolocation')
    cursor.execute('DELETE FROM travel_time')
    cursor.execute('DELETE FROM demographics')
    cursor.execute('DELETE FROM rucc')
    cursor.execute('DELETE FROM encounter')
    cursor.execute('DELETE FROM procedure_table')
    cursor.execute('DELETE FROM diagnosis')
    
    # Load provider (clinic) data
    provider_data = []
    for i, (lat, lon) in enumerate(clinic_locations):
        provider_id = f'PR{str(i+1).zfill(3)}'
        provider_data.append((provider_id, lat, lon))
    
    cursor.executemany('INSERT INTO provider (provider_id, latitude, longitude) VALUES (?, ?, ?)',
                      provider_data)
    
    # Load patient location data
    patient_data = []
    for i, location in enumerate(patient_locations):  # patient_locations
        patient_id = f'P{str(i+1).zfill(3)}'       # P-ID: P001-P100
        census_block = f'CB{str(i).zfill(3)}'      # census block: CB000-CB099
        patient_data.append((
            patient_id,
            census_block,
            location['lat'],
            location['lon']
        ))
            
    cursor.executemany('INSERT INTO geolocation (patient_id, census_block, latitude, longitude) VALUES (?, ?, ?, ?)',
                      patient_data)
    
    # Load travel time data
    travel_time_data = []
    seen_combinations = set()
    
    for i, times in enumerate(travel_times):
        census_block = f'CB{str(i).zfill(3)}'
        for j, time in enumerate(times.values()):
            provider_id = f'PR{str(j+1).zfill(3)}'
            combination = (census_block, provider_id, 'DRIVING')
            
            if combination not in seen_combinations:
                seen_combinations.add(combination)
                travel_time_data.append((census_block, provider_id, 'DRIVING', float(time) * 60))
    
    cursor.executemany('INSERT INTO travel_time (census_block, provider_id, travel_time_type, travel_time_minutes) VALUES (?, ?, ?, ?)',
                      travel_time_data)

    # Load RUCC data from CSV
    rucc_df = pd.read_csv('rucc_codes.csv')
    rucc_data = [(row['census_block'], row['rucc_code']) 
                 for _, row in rucc_df.iterrows()]
    
    cursor.executemany('INSERT INTO rucc (census_block, rucc_code) VALUES (?, ?)',
                      rucc_data)
    
    conn.commit()

def generate_mock_data(conn, n_patients=100):
    """Generate mock demographic and medical data"""
    cursor = conn.cursor()
    
    # Generate demographics with correlations
    current_date = datetime.now()
    demographics_data = []
    for i in range(n_patients):
        patient_id = f'P{str(i+1).zfill(3)}'
        
        # Age between 45-80
        age = np.random.normal(62, 10)
        age = max(45, min(80, age))
        birth_date = current_date - timedelta(days=int(age*365.25))
        
        # Correlated demographics
        education_level = np.random.choice(
            ['High School', 'Some College', 'Bachelor', 'Graduate'],
            p=[0.3, 0.3, 0.25, 0.15]
        )
        
        # Income correlated with education
        base_income = {
            'High School': 40000,
            'Some College': 55000,
            'Bachelor': 70000,
            'Graduate': 85000
        }
        income = np.random.normal(base_income[education_level], 10000)
        
        demographics_data.append((
            patient_id,
            birth_date.strftime('%Y-%m-%d'),
            np.random.choice(['M', 'F']),
            np.random.choice(['White', 'Black', 'Asian', 'Other'], p=[0.7, 0.15, 0.1, 0.05]),
            np.random.choice(['Hispanic', 'Non-Hispanic'], p=[0.15, 0.85]),
            education_level,
            income
        ))
    
    cursor.executemany('''
    INSERT INTO demographics 
    (patient_id, birth_date, sex, race, ethnicity, education, income)
    VALUES (?, ?, ?, ?, ?, ?, ?)
    ''', demographics_data)
    
    # Generate encounters and procedures
    encounter_data = []
    procedure_data = []
    
    for i in range(n_patients):
        patient_id = f'P{str(i+1).zfill(3)}'
        
        # 1-3 encounters per patient
        n_encounters = np.random.randint(1, 4)
        
        for j in range(n_encounters):
            encounter_id = f'E{patient_id}_{j}'
            
            # Generate dates within last 2 years
            start_date = current_date - timedelta(days=np.random.randint(1, 730))
            end_date = start_date + timedelta(days=np.random.randint(1, 5))
            
            encounter_data.append((
                patient_id,
                encounter_id,
                start_date.strftime('%Y-%m-%d'),
                end_date.strftime('%Y-%m-%d')
            ))
            
            # 30% chance of CRC screening procedure
            if np.random.random() < 0.3:
                procedure_code = np.random.choice(['45378', '45380', '45384', '45385'])
                provider_id = f'PR{str(np.random.randint(1, 17)).zfill(3)}'
                
                procedure_start = datetime.combine(start_date, 
                                                datetime.strptime(f"{np.random.randint(9,17)}:00", "%H:%M").time())
                procedure_end = procedure_start + timedelta(hours=np.random.randint(1, 4))
                
                procedure_data.append((
                    patient_id,
                    encounter_id,
                    provider_id,
                    procedure_code,
                    procedure_start.strftime('%Y-%m-%d %H:%M:%S'),
                    procedure_end.strftime('%Y-%m-%d %H:%M:%S')
                ))
    
    cursor.executemany('''
    INSERT INTO encounter 
    (patient_id, encounter_id, start_date, end_date)
    VALUES (?, ?, ?, ?)
    ''', encounter_data)
    
    cursor.executemany('''
    INSERT INTO procedure_table 
    (patient_id, encounter_id, provider_id, procedure_code, start_datetime, end_datetime)
    VALUES (?, ?, ?, ?, ?, ?)
    ''', procedure_data)

    # Generate diagnosis data with Charlson Comorbidities
    diagnosis_data = []
    for encounter in encounter_data:
        patient_id = encounter[0]
        encounter_id = encounter[1]
        start_date = datetime.strptime(encounter[2], '%Y-%m-%d')
        end_date = datetime.strptime(encounter[3], '%Y-%m-%d')
        
        # 30% pat with 1-3 diseases:
        if np.random.random() < 0.3:
            
            n_conditions = np.random.randint(1, 4)
            
            selected_conditions = random.choices(
                list(CONDITION_WEIGHTS.keys()),
                weights=list(CONDITION_WEIGHTS.values()),
                k=n_conditions
            )
            
            for condition in selected_conditions:
                # ICO version (80% ICD-10, 20% ICD-9)
                version = '10' if random.random() < 0.8 else '9'
                
                # get codes from CHARLSON_CONDITIONS
                codes = CHARLSON_CONDITIONS[condition][version]
                selected_code = random.choice(codes)
                
                # generate diagnosis time
                diag_datetime = start_date + timedelta(
                    hours=random.randint(0, 24)
                )
                
                diagnosis_data.append((
                    patient_id,
                    encounter_id,
                    selected_code,
                    f'ICD{version}CM',
                    diag_datetime.strftime('%Y-%m-%d')
                ))

    cursor.executemany('''
    INSERT INTO diagnosis 
    (patient_id, encounter_id, diagnosis_code, vocabulary_id, diagnosis_date)
    VALUES (?, ?, ?, ?, ?)
    ''', diagnosis_data)
    
    conn.commit()

In [14]:
def verify_data(conn):
    """Verify the generated data and show key statistics"""
    cursor = conn.cursor()
    
    print("Table Record Counts:")
    print("-" * 50)
    for table in ['provider', 'geolocation', 'travel_time', 'demographics', 'rucc', 'encounter', 'procedure_table']:
        count = cursor.execute(f'SELECT COUNT(*) FROM {table}').fetchone()[0]
        print(f"{table}: {count} records")
    
    print("\nSample Patient Data:")
    print("-" * 50)
    query = """
    SELECT 
        g.patient_id,
        g.census_block,
        d.sex,
        d.birth_date,
        d.education,
        d.race,
        d.ethnicity,
        r.rucc_code,
        COUNT(DISTINCT e.encounter_id) as num_encounters,
        COUNT(DISTINCT p.procedure_code) as num_procedures,
        MIN(t.travel_time_minutes) as min_travel_time
    FROM geolocation g
    JOIN demographics d ON g.patient_id = d.patient_id
    JOIN rucc r ON g.census_block = r.census_block
    LEFT JOIN encounter e ON g.patient_id = e.patient_id
    LEFT JOIN procedure_table p ON e.encounter_id = p.encounter_id
    LEFT JOIN travel_time t ON g.census_block = t.census_block
    GROUP BY g.patient_id
    LIMIT 5
    """
    df = pd.read_sql_query(query, conn)
    print(df)
    
    print("\nKey Statistics:")
    print("-" * 50)

    # Charlson Comorbidities
    print("\nCharlson Comorbidities Distribution:")
    for condition, codes in CHARLSON_CONDITIONS.items():
        query = f"""
        SELECT COUNT(DISTINCT patient_id) as count
        FROM diagnosis
        WHERE (vocabulary_id = 'ICD9CM' AND diagnosis_code IN ('{"','".join(codes['9'])}'))
        OR (vocabulary_id = 'ICD10CM' AND diagnosis_code IN ('{"','".join(codes['10'])}'))
        """
        count = cursor.execute(query).fetchone()[0]
        print(f"{condition}: {count} patients")
    
    screening_query = """
    SELECT 
        COUNT(DISTINCT CASE WHEN procedure_code IN ('45378', '45380', '45384', '45385') 
              THEN patient_id END) * 100.0 / COUNT(DISTINCT patient_id) as screening_rate
    FROM procedure_table
    """
    screening_rate = pd.read_sql_query(screening_query, conn).iloc[0,0]
    print(f"CRC Screening Rate: {screening_rate:.1f}%")
    
    encounters_query = """
    SELECT AVG(encounter_count) as avg_encounters
    FROM (
        SELECT patient_id, COUNT(*) as encounter_count
        FROM encounter
        GROUP BY patient_id
    )
    """
    avg_encounters = pd.read_sql_query(encounters_query, conn).iloc[0,0]
    print(f"Average encounters per patient: {avg_encounters:.1f}")
    
    # Travel time 
    travel_time_query = """
    SELECT 
        MIN(travel_time_minutes) as min_time,
        AVG(travel_time_minutes) as avg_time,
        MAX(travel_time_minutes) as max_time
    FROM travel_time
    """
    travel_times = pd.read_sql_query(travel_time_query, conn)
    print("\nTravel Time Distribution (minutes):")
    print(f"Min: {travel_times.iloc[0,0]:.1f}")
    print(f"Avg: {travel_times.iloc[0,1]:.1f}")
    print(f"Max: {travel_times.iloc[0,2]:.1f}")



In [15]:
def export_tables_to_csv(conn):
    """Export all tables from database to CSV files"""
    tables = [
        'geolocation', 'demographics', 'encounter', 
        'procedure_table', 'provider', 'rucc', 'travel_time','diagnosis'
    ]
    
    for table in tables:
        query = f"SELECT * FROM {table}"
        df = pd.read_sql_query(query, conn)
        df.to_csv(f'{table}.csv', index=False)
        print(f"Exported {table}.csv with {len(df)} rows")

        print(f"\nFirst few rows of {table}:")
        print(df.head())
        print("\n" + "="*50 + "\n")
        
def close_all_connections():
    import sqlite3
    sqlite3.connect('healthcare.db').close()

def ensure_fresh_database():
    import os
    if os.path.exists('healthcare.db'):
        try:
            os.remove('healthcare.db')
        except PermissionError:
            print("Could not remove existing database. Please close any programs that might be using it.")
            return False
    return True

def main():

    close_all_connections()
    
    if not ensure_fresh_database():
        return
    
    conn = create_database()

    try:
        # Load your existing data
        with open('sampled_block_group_centers_100_30.json', 'r') as f:
            patient_locations = json.load(f)
            
        clinic_locations = [
            [40.40655, -86.8321528],
            [40.7344392, -86.77769099999999],
            [40.2765035, -86.4970488],
            [39.9164485, -86.1557417],
            [39.7805894, -86.3405844],
            [39.7775523, -86.1837364],
            [39.79052859999999, -86.16338739999999],
            [39.7756075, -86.1761174],
            [39.9868449, -85.929307],
            [39.6379321, -86.1593584],
            [40.2247576, -85.4507319],
            [39.2893255, -86.7867983],
            [39.9075207, -85.3861367],
            [39.1606644, -86.55537140000001],
            [38.8599541, -86.51307659999999],
            [38.56829949999999, -86.47532799999999]
        ]
        
        with open('ExactTravelTimeDatafromAllMatrix.json', 'r') as f:
            travel_times = json.load(f)
        
        # Load existing data into database and generate mock data
        load_existing_data(conn, patient_locations, clinic_locations, travel_times)
        generate_mock_data(conn)

        # verify data
        verify_data(conn)
        
        # Export all tables to CSV
        export_tables_to_csv(conn)
        
    finally:
        conn.close()

In [16]:
if __name__ == "__main__":
    main()

Table Record Counts:
--------------------------------------------------
provider: 16 records
geolocation: 100 records
travel_time: 1600 records
demographics: 100 records
rucc: 100 records
encounter: 198 records
procedure_table: 59 records

Sample Patient Data:
--------------------------------------------------
  patient_id census_block sex  birth_date     education   race     ethnicity  \
0       P001        CB000   M  1961-04-05  Some College  White  Non-Hispanic   
1       P002        CB001   F  1971-02-02   High School  White  Non-Hispanic   
2       P003        CB002   F  1955-12-05      Bachelor  Black  Non-Hispanic   
3       P004        CB003   M  1961-07-27   High School  White  Non-Hispanic   
4       P005        CB004   F  1980-02-20      Bachelor  Other  Non-Hispanic   

   rucc_code  num_encounters  num_procedures  min_travel_time  
0          2               2               1        69.600000  
1          1               3               1        16.983333  
2          6   