In [3]:
import networkx as nx
import pandas as pd
import numpy as np
import random
import pickle
import math
import time
import os
from utils import create_network_df, generate_od_matrices, prepare_network_data, generate_capacity_matrices
from leblanc import leblanc_algorithm
from tqdm import tqdm
import openmatrix as omx

In [4]:
filepath_sioux = 'data/sioux'
filepath_ema = 'data/ema'

In [5]:
sioux = create_network_df(network_name="SiouxFalls")
T_0_sioux, C_sioux = prepare_network_data(sioux)
eps_ema = 0.005
ema = create_network_df(network_name="EMA")
T_0_ema, C_ema = prepare_network_data(ema)
eps_ema = 0.005

In [None]:
OD_sioux = np.array([
    [0, 100, 100, 500, 200, 300, 500, 800, 500, 1300, 500, 200, 500, 300, 500, 500, 400, 100, 300, 300, 100, 400, 300, 100],
    [100, 0, 100, 200, 100, 400, 200, 400, 200, 600, 200, 100, 300, 100, 100, 400, 200, 0, 100, 100, 0, 100, 0, 0],
    [100, 100, 0, 200, 100, 300, 100, 200, 100, 300, 300, 200, 100, 100, 100, 200, 100, 0, 0, 0, 0, 100, 100, 0],
    [500, 200, 200, 0, 500, 400, 400, 700, 700, 1200, 1400, 600, 600, 500, 500, 800, 500, 100, 200, 300, 200, 400, 500, 200],
    [200, 100, 100, 500, 0, 200, 200, 500, 800, 1000, 500, 200, 200, 100, 200, 500, 200, 0, 100, 100, 100, 200, 100, 0],
    [300, 400, 300, 400, 200, 0, 400, 800, 400, 800, 400, 200, 200, 100, 200, 900, 500, 100, 200, 300, 100, 200, 100, 100],
    [500, 200, 100, 400, 200, 400, 0, 1000, 600, 1900, 500, 700, 400, 200, 500, 1400, 1000, 200, 400, 500, 200, 500, 200, 100],
    [800, 400, 200, 700, 500, 800, 1000, 0, 800, 1600, 800, 600, 600, 400, 600, 2200, 1400, 300, 700, 900, 400, 500, 300, 200],
    [500, 200, 100, 700, 800, 400, 600, 800, 0, 2800, 1400, 600, 600, 600, 900, 1400, 900, 200, 400, 600, 300, 700, 500, 200],
    [1300, 600, 300, 1200, 1000, 800, 1900, 1600, 2800, 0, 4000, 2000, 1900, 2100, 4000, 4400, 3900, 700, 1800, 2500, 1200, 2600, 1800, 800],
    [500, 200, 300, 1500, 500, 400, 500, 800, 1400, 3900, 0, 1400, 1000, 1600, 1400, 1400, 1000, 100, 400, 600, 400, 1100, 1300, 600],
    [200, 100, 200, 600, 200, 200, 700, 600, 600, 2000, 1400, 0, 1300, 700, 700, 700, 600, 200, 300, 400, 300, 700, 700, 500],
    [500, 300, 100, 600, 200, 200, 400, 600, 600, 1900, 1000, 1300, 0, 600, 700, 600, 500, 100, 300, 600, 600, 1300, 800, 800],
    [300, 100, 100, 500, 100, 100, 200, 400, 600, 2100, 1600, 700, 600, 0, 1300, 700, 700, 100, 300, 500, 400, 1200, 1100, 400],
    [500, 100, 100, 500, 200, 200, 500, 600, 1000, 4000, 1400, 700, 700, 1300, 0, 1200, 1500, 200, 800, 1100, 800, 2600, 1000, 400],
    [500, 400, 200, 800, 500, 900, 1400, 2200, 1400, 4400, 1400, 700, 600, 700, 1200, 0, 2800, 500, 1300, 1600, 600, 1200, 500, 300],
    [400, 200, 100, 500, 200, 500, 1000, 1400, 900, 3900, 1000, 600, 500, 700, 1500, 2800, 0, 600, 1700, 1700, 600, 1700, 600, 300],
    [100, 0, 0, 100, 0, 100, 200, 300, 200, 700, 200, 200, 100, 100, 200, 500, 600, 0, 300, 400, 100, 300, 100, 0],
    [300, 100, 0, 200, 100, 200, 400, 700, 400, 1800, 400, 300, 300, 300, 800, 1300, 1700, 300, 0, 1200, 400, 1200, 300, 100],
    [300, 100, 0, 300, 100, 300, 500, 900, 600, 2500, 600, 500, 600, 500, 1100, 1600, 1700, 400, 1200, 0, 1200, 2400, 700, 400],
    [100, 0, 0, 200, 100, 100, 200, 400, 300, 1200, 400, 300, 600, 400, 800, 600, 600, 100, 400, 1200, 0, 1800, 700, 500],
    [400, 100, 100, 400, 200, 200, 500, 500, 700, 2600, 1100, 700, 1300, 1200, 2600, 1200, 1700, 300, 1200, 2400, 1800, 0, 2100, 1100],
    [300, 0, 100, 500, 100, 100, 200, 300, 500, 1800, 1300, 700, 800, 1100, 1000, 500, 600, 100, 300, 700, 700, 2100, 0, 700],
    [100, 0, 0, 200, 0, 100, 100, 200, 200, 800, 600, 500, 700, 400, 400, 300, 300, 0, 100, 400, 500, 1100, 700, 0]
])
def read_omx_demand(file_path):
    try:
        with omx.open_file(file_path, 'r') as omx_file:
            matrix_names = omx_file.list_matrices()
            
            if not matrix_names:
                raise ValueError("No matrices found in OMX file")
            
            matrix_name = matrix_names[0]
            demand_matrix = omx_file[matrix_name]
            
            mapping_title = 'NO_TITLE'
            if hasattr(omx_file, 'mappings'):
                mapping_dict = omx_file.mappings()
                if mapping_dict:
                    mapping_title = next(iter(mapping_dict.keys()))
            
            try:
                lookup = omx_file.mapping(title=mapping_title)
                zones = list(lookup.values())
            except:
                zones = list(range(1, demand_matrix.shape[0] + 1))
                lookup = {zone: idx for idx, zone in enumerate(zones, 1)}
            
            return {
                'matrix': np.array(demand_matrix),
                'zones': zones,
                'lookup': lookup
            }
            
    except Exception as e:
        print(f"Error reading OMX file: {e}")
        raise

OD_ema = 
# demand_ema = pd.DataFrame(
#     demand_data_ema['matrix'],
#     index=demand_data_ema['zones'],
#     columns=demand_data_ema['zones']
# )

# OD_eda = np.array(demand_ema)

Error reading OMX file: ``/home/polina/kans/traffic_assignment/TransportationNetworks/_scripts/demand.omx`` does not exist


FileNotFoundError: ``/home/polina/kans/traffic_assignment/TransportationNetworks/_scripts/demand.omx`` does not exist

In [5]:
os.makedirs(filepath + '/uncongested', exist_ok=True)
os.makedirs(filepath + '/moderate', exist_ok=True)
os.makedirs(filepath + '/congested', exist_ok=True)

In [6]:
num_matrices=5000
uncongested_matrices = generate_od_matrices(D, num_matrices, 'uncongested')
capacities = generate_capacity_matrices(C, num_matrices, disruption_level='L')
# moderate_matrices = generate_od_matrices(D, num_matrices, 'moderate')
# congested_matrices = generate_od_matrices(D, num_matrices, 'congested')

In [7]:
assert False

AssertionError: 

In [None]:
def create_index_file(condition, iters, times, eps, filepath='data/EMA'):
    files = os.listdir(filepath + f"/{condition}")
    indices = [int(f.split('_')[1].split('.')[0]) for f in files if f.startswith('sample_')]
    
    df = pd.DataFrame({
        'condition': condition,
        'eps' : eps,
        'sample_id': indices,
        'iterations' : iters,
        'execution_time' : times,
        'filename': [filepath + f"/{condition}/sample_{i:04d}.pkl" for i in indices]
    })
    df.to_csv(filepath + f"/{condition}_index.csv", index=False)

def save_dataset(condition='uncongested', num_samples=5000):
    start_time = time.time()
    od_matrices = generate_od_matrices(D, num_samples, condition)
    capacities = generate_capacity_matrices(C, num_samples, disruption_level='L')
    times = []
    iters = []
    for i in tqdm(range(num_samples), desc=f"Обработка {condition}"):
        result_matrix, iter_num = leblanc_algorithm(T_0, od_matrices[i], capacities[i], eps)
        end_time = time.time()
        total_time = end_time - start_time
        times.append(total_time)
        iters.append(iter_num)
        metadata = {
            'iterations': iter_num,
            'execution_time': total_time,
        }
        data_pair = {
            'input': od_matrices[i],
            'output': result_matrix,
            'metadata' : metadata
        }
        
        filename = filepath + f"/{condition}/sample_{i:04d}.pkl"
        with open(filename, 'wb') as f:
            pickle.dump(data_pair, f)

    create_index_file(condition, iters, times, eps)

In [None]:
save_dataset(condition='moderate', num_samples=1000)

  T = T_0 * (1 + 0.15 * (X / C)**4)
  C_inv_4 = 1 / C**4
Обработка moderate: 100%|██████████| 1000/1000 [29:53<00:00,  1.79s/it]


In [None]:
save_dataset(condition='congested', num_samples=1000)

Обработка congested:  49%|████▊     | 487/1000 [30:08<35:01,  4.10s/it] 

In [None]:
import asyncio
import aiofiles
import os
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
import numpy as np
import pickle
import time

import nest_asyncio
nest_asyncio.apply()

async def save_sample(condition, i, od_matrix, capacity, eps, filepath):
    """Async function to process and save a single sample"""
    start_time = time.time()
    result_matrix, iter_num = leblanc_algorithm(T_0, od_matrix, capacity, eps)
    exec_time = time.time() - start_time
    
    metadata = {
        'iterations': iter_num,
        'execution_time': exec_time,
    }
    data_pair = {
        'input': od_matrix,
        'output': result_matrix,
        'metadata': metadata
    }
    
    filename = f"{filepath}/{condition}/sample_{i:04d}.pkl"
    async with aiofiles.open(filename, 'wb') as f:
        await f.write(pickle.dumps(data_pair))
    
    return i, iter_num, exec_time

async def process_batch(condition, batch_od, batch_c, eps, filepath, start_idx):
    """Process a batch of samples asynchronously"""
    tasks = [
        save_sample(condition, start_idx+i, od, cap, eps, filepath)
        for i, (od, cap) in enumerate(zip(batch_od, batch_c))
    ]
    return await tqdm_asyncio.gather(*tasks, desc=f"Processing {condition} batch")

async def save_dataset_async(condition='uncongested', num_samples=5000, batch_size=100, eps=0.005, filepath='data/EMA'):
    """Main async function to generate and save dataset"""
    start_total = time.time()
    
    # Create directory if not exists
    os.makedirs(f"{filepath}/{condition}", exist_ok=True)
    
    # Generate all matrices upfront
    od_matrices = generate_od_matrices(D, num_samples, condition)
    capacities = generate_capacity_matrices(C, num_samples, disruption_level='L')
    
    # Process in batches
    all_results = []
    for batch_start in range(0, num_samples, batch_size):
        batch_end = min(batch_start + batch_size, num_samples)
        batch_results = await process_batch(
            condition,
            od_matrices[batch_start:batch_end],
            capacities[batch_start:batch_end],
            eps,
            filepath,
            batch_start
        )
        all_results.extend(batch_results)
    
    # Prepare and save index file
    indices, iters, times = zip(*all_results)
    create_index_file(condition, iters, times, eps, filepath)
    
    print(f"Total processing time: {time.time() - start_total:.2f} seconds")

def create_index_file(condition, iters, times, eps, filepath='data/EMA'):
    """Sync function to create index file (unchanged)"""
    df = pd.DataFrame({
        'condition': condition,
        'eps': eps,
        'sample_id': range(len(iters)),
        'iterations': iters,
        'execution_time': times,
        'filename': [f"{filepath}/{condition}/sample_{i:04d}.pkl" for i in range(len(iters))]
    })
    df.to_csv(f"{filepath}/{condition}_index.csv", index=False)

In [None]:
import multiprocessing
print(f"Available cores: {multiprocessing.cpu_count()}")

Available cores: 12


In [18]:
import os
import pickle
import time
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import multiprocessing

def process_sample(args):
    i, od, cap, condition, eps, filepath, T_0 = args
    start_time = time.time()
    result_matrix, iter_num = leblanc_algorithm(T_0, od, cap, eps)
    exec_time = time.time() - start_time
    
    data = {
        'input': od,
        'output': result_matrix,
        'capacities': cap,
        'free_flow_time': T_0,
        'metadata': {
            'iterations': iter_num,
            'execution_time': exec_time
        }
    }
    
    with open(f"{filepath}/{condition}/sample_{i:04d}.pkl", 'wb') as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
    
    return i, iter_num, exec_time

def save_dataset_fast(D, C, T_0, condition='uncongested', num_samples=5000, filepath='data/EMA', eps=0.05):
    start_total = time.time()
    os.makedirs(f"{filepath}/{condition}", exist_ok=True)
    
    od_matrices = generate_od_matrices(D, num_samples, condition)
    capacities = generate_capacity_matrices(C, num_samples, disruption_level='L')
    
    args = [(i, od_matrices[i], capacities[i], condition, eps, filepath, T_0) for i in range(num_samples)]
    
    completed = 0
    time_per_sample = []
    
    with ProcessPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(process_sample, arg): i for i, arg in enumerate(args)}
        
        with tqdm(total=num_samples, desc="Processing") as pbar:
            for future in as_completed(futures):
                i, iter_num, exec_time = future.result()
                time_per_sample.append(exec_time)
                completed += 1
                
                avg_time = np.mean(time_per_sample[-100:]) if len(time_per_sample) > 0 else 0
                remaining = (num_samples - completed) * avg_time / 12
                
                pbar.set_postfix({
                    'completed': f"{completed}/{num_samples}",
                    'avg_time': f"{avg_time:.2f}s",
                    'remaining': f"{remaining/60:.1f}min",
                    'total_est': f"{(time.time() - start_total + remaining)/60:.1f}min"
                })
                pbar.update(1)
    
    results = sorted([future.result() for future in futures], key=lambda x: x[0])
    indices, iters, times = zip(*results)
    
    pd.DataFrame({
        'condition': condition,
        'eps': eps,
        'sample_id': indices,
        'iterations': iters,
        'execution_time': times,
        'filename': [f"{filepath}/{condition}/sample_{i:04d}.pkl" for i in indices]
    }).to_csv(f"{filepath}/{condition}_index.csv", index=False)
    
    total_time = time.time() - start_total
    print(f"\nCompleted {num_samples} samples in {total_time/60:.1f} minutes")
    print(f"Average time per sample: {total_time/num_samples:.2f}s")
    print(f"Throughput: {num_samples/total_time*60:.1f} samples/minute")

In [None]:
save_dataset_fast(num_samples=5000, filepath='data/sioux_capacities', eps=0.005, )

NameError: name 'generate_od_matrices' is not defined

In [15]:
x, y = generate_od_matrices(D, 1)[0], capacities[0]

In [20]:
leblanc_algorithm(T_0, x, y, epsilon=0.05)

(array([[   0,    0,  707, ...,    0,    0,    0],
        [   0,    0, 1323, ...,    0,    0,    0],
        [ 297, 1091,    0, ...,    0,    0,    0],
        ...,
        [   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    0,    0,    0]], shape=(74, 74)),
 28)

In [18]:
leblanc_algorithm(T_0, x, y, epsilon=0.005)

(array([[   0,    0,  707, ...,    0,    0,    0],
        [   0,    0, 1323, ...,    0,    0,    0],
        [ 297, 1091,    0, ...,    0,    0,    0],
        ...,
        [   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    0,    0,    0],
        [   0,    0,    0, ...,    0,    0,    0]], shape=(74, 74)),
 189)

In [12]:
path_ema = "/home/podozerovapo/traffic_assignment/data/EMA/uncongested/sample_0000.pkl"
with open(path_ema, 'rb') as f:
    data_ema = pickle.load(f)

In [17]:
data_ema.keys()

dict_keys(['input', 'output', 'metadata'])

In [14]:
data_ema['input']

array([[  0,  40, 562, ...,   0,   0,   0],
       [ 58,   0, 104, ...,   0,   0,   0],
       [ 66,  85,   0, ...,   0,   0,   0],
       ...,
       [  0,   0,   0, ...,   0,   0,   0],
       [  0,   0,   0, ...,   0,   0,   0],
       [  0,   0,   0, ...,   0,   0,   0]], shape=(74, 74))