In [176]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import tensorflow as tf
import tensorflow.distributions
from tensorflow.distributions import Dirichlet, Multinomial
from scipy.stats import entropy
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d, Axes3D
np.set_printoptions(suppress=True)

import data_handler as dh
import model as md
import time
import pickle

In [84]:
net_name = 'Anaheim_net.tntp'
trips_name = 'Anaheim_trips.tntp'

handler = dh.DataHandler()
graph_data = handler.GetGraphData(net_name, columns_order = np.array([0, 1, 2, 4]))
graph_correspondences, total_od_flow = handler.GetGraphCorrespondences(trips_name)

graph_data['graph_table'].head()

Unnamed: 0,Init node,Term node,Capacity,Free Flow Time
0,1,117,9000.0,1.090459
1,2,87,9000.0,1.090459
2,3,74,9000.0,1.090459
3,4,233,9000.0,1.090459
4,5,165,9000.0,1.090459


In [182]:
def costs_func(df, i, j, column):
    rows = df.loc[df['Init node'] == i]
    columns = rows.loc[df['Term node'] == j]
    c = columns[column]
    print(rows)
    try:
        c = int(c)
        return c
    except TypeError: # если не пересекается район i с районом j
        return 0

def create_T(df):
    T = np.zeros((n, n))
    
    i_matrix = df['Init node'].as_matrix()
    j_matrix = df['Term node'].as_matrix()
    
    for i in i_matrix:
        for j in j_matrix:
            data = df.loc[(df['Init node'] == i) & 
                             (df['Term node'] == j)]
            if not data.empty:
                T[i-1][j-1] = data['Free Flow Time'].as_matrix()[0]
            else:
                T[i-1][j-1] = 0
    return T

def create_d(dictnr):
    d = np.zeros((n, n))
    i = 1
    
    for key in dictnr[i].keys():
        for k, v in zip(dictnr[key].keys(),
                    dictnr[key].values()):
            d[key][k] = v
        i += 1
        
    return d

In [170]:
df = graph_data['graph_table']
df.columns = ['Init node', 'Term node', 'Capacity', 'Free Flow Time']

In [171]:
df.head()

Unnamed: 0,Init node,Term node,Capacity,Free Flow Time
0,1,117,9000.0,1.090459
1,2,87,9000.0,1.090459
2,3,74,9000.0,1.090459
3,4,233,9000.0,1.090459
4,5,165,9000.0,1.090459


In [172]:
n = len(df['Init node'].unique())
T = create_T(df)

  from ipykernel import kernelapp as app
  app.launch_new_instance()


In [180]:
np.savetxt('T.csv', T, delimiter=',')

In [183]:
d = create_d(graph_correspondences)

In [188]:
L = np.nansum(d, axis=1)
W = np.nansum(d, axis=0)

L = L/np.sum(L)
W = W/np.sum(W)

people_num = np.sum(L)

In [189]:
def Sinkhorn(k, beta, alpha, lambda_W_prev, lambda_L_prev):
    summa = 0
    
    lambda_L = np.zeros((n, ), dtype=float)
    lambda_W = np.zeros((n, ), dtype=float)
    
    if k % 2 == 0:
        for i in range(n):
            for j in range(n):
                summa += ( np.exp(-lambda_W_prev[j] - 1 - beta * alpha * T[i][j]) / 
                            L[i] )
            lambda_L[i] = np.log(summa)
            summa = 0
        lambda_W = lambda_W_prev
    else:
        for j in range(n):
            for i in range(n):
                summa += ( np.exp(-lambda_L_prev[i] - 1 - beta * alpha * T[i][j]) / 
                            W[j] )
            lambda_W[j] = np.log(summa)
            summa = 0
        lambda_L = lambda_L_prev
        
    return lambda_W, lambda_L

def iterate(beta, alpha, L, W, num_iter, eps):
    
    lambda_L = np.zeros((n, ), dtype=float)
    lambda_W = np.zeros((n, ), dtype=float)
    
    for k in range(num_iter):
        
        lambda_Wn, lambda_Ln = Sinkhorn(k, beta, alpha, lambda_W, lambda_L)
        
        delta = np.linalg.norm(np.concatenate((lambda_Ln - lambda_L, 
                                               lambda_Wn - lambda_W)))
        
        lambda_L, lambda_W = lambda_Ln, lambda_Wn 
        
        if delta < eps:
            break
    
    r = error_d_i_j(lambda_Ln, lambda_Wn, beta, alpha)
    return r

def error_d_i_j(lambda_L, lambda_W, beta, alpha):
    
    er = np.exp( -1 - T * beta * alpha - (np.reshape(lambda_L, (22, 1)) + lambda_W))

    return er * people_num

In [192]:
num_iter = 1500
alpha = 1.0
beta_range = np.arange(0.0, 0.5, 0.25)
res_list = []

for beta in beta_range:
    rec = iterate(beta, alpha, L, W, num_iter, eps=10**(-4))
    print(beta, rec)
    res_list.append(np.linalg.norm(d - rec)) #/ 22**2)

  # This is added back by InteractiveShellApp.init_path()


KeyboardInterrupt: 

In [None]:
np.min(res_list), res_list.index(np.min(res_list)), beta_range[res_list.index(np.min(res_list))]

In [None]:
best_alpha = beta_range[res_list.index(np.min(res_list))]

In [None]:
res_list[0] - res_list[res_list.index(np.min(res_list))]

In [None]:
print(np.shape(beta_range), np.shape(res_list))

In [None]:
plt.ticklabel_format(useOffset=False)

plt.scatter(beta_range, res_list)
plt.ylabel('Невязка')
plt.xlabel('alpha')
plt.show()
plt.savefig('/Users/kate/Documents/MIPT/ТРАНСПОРТНЫЕ ПОТОКИ/Jupyter:Data/img/1.png')

##### Best recovery corr matrix  

In [72]:
rec = iterate(beta, best_alpha, L, W, num_iter, eps=10**(-4))

##### Multi-stage