In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd

from shapely import wkt
from shapely.geometry import Polygon

import matplotlib.pyplot as plt
import seaborn as sns

from src.est import *
from src.utils import *

# 1 - Prepare data

In [None]:
df_trips = pd.read_parquet("data/yellow_tripdata_2024-01.parquet")
df_zones = pd.read_csv("data/taxi_zones.csv")

In [None]:
def get_longitude(polygon):
    multipolygon = wkt.loads(polygon)
    centroid = multipolygon.centroid
    longitude = centroid.x
    return longitude

def get_latitude(polygon):
    multipolygon = wkt.loads(polygon)
    centroid = multipolygon.centroid
    latitude = centroid.y
    return latitude

df_zones['lon'] = df_zones['the_geom'].apply(get_longitude)
df_zones['lat'] = df_zones['the_geom'].apply(get_latitude)

df_trips['tpep_pickup_datetime'] = pd.to_datetime(df_trips['tpep_pickup_datetime'])
df_trips['tpep_dropoff_datetime'] = pd.to_datetime(df_trips['tpep_dropoff_datetime'])
df_trips['PUhour'] = df_trips['tpep_pickup_datetime'].dt.hour // 4
df_trips['DOhour'] = df_trips['tpep_dropoff_datetime'].dt.hour // 4

def categorize_time(hour):
    if 6 <= hour <= 11:
        return 0
    elif 12 <= hour <= 17:
        return 1
    else:
        return 2

df_trips['PUtime_period'] = df_trips['PUhour'].apply(categorize_time)
df_trips['DOtime_period'] = df_trips['DOhour'].apply(categorize_time)

df_trips = df_trips.merge(
    df_zones[['LocationID', 'borough', 'zone', 'lon', 'lat']], 
    left_on='PULocationID', 
    right_on='LocationID', 
    suffixes=('', '_PU')
)
df_trips.rename(columns={'lon': 'PUlon', 'lat': 'PUlat', 'borough': 'PUborough', 'zone': 'PUzone'}, inplace=True)

df_trips = df_trips.merge(
    df_zones[['LocationID', 'borough', 'zone', 'lon', 'lat']], 
    left_on='DOLocationID', 
    right_on='LocationID', 
    suffixes=('', '_DO')
)
df_trips.rename(columns={'lon': 'DOlon', 'lat': 'DOlat', 'borough': 'DOborough', 'zone': 'DOzone'}, inplace=True)

df_trips.drop(columns=['LocationID', 'LocationID_DO'], inplace=True)

In [None]:
neighborhoods_south_of_harlem = [
    'Alphabet City', 'Battery Park', 'Battery Park City', 'Central Park',
    'Chinatown', 'Clinton East', 'Clinton West', 'East Chelsea', 
    'East Village', 'Financial District North', 'Financial District South', 
    'Flatiron', 'Hudson Sq', 'Garment District', 
    "Governor's Island/Ellis Island/Liberty Island", 'Gramercy', 
    'Greenwich Village North', 'Greenwich Village South', 'Kips Bay', 
    'Lenox Hill East', 'Lenox Hill West', 'Lincoln Square East', 
    'Lincoln Square West', 'Little Italy/NoLiTa', 'Lower East Side', 
    'Meatpacking/West Village West', 'Midtown Center', 'Midtown East', 
    'Midtown North', 'Midtown South', 'Murray Hill', 
    'Penn Station/Madison Sq West', 'Seaport', 'SoHo', 
    'Stuy Town/Peter Cooper Village', 'Sutton Place/Turtle Bay North', 
    'Times Sq/Theatre District', 'TriBeCa/Civic Center', 
    'Two Bridges/Seward Park', 'UN/Turtle Bay South', 'Union Sq', 
    'Upper East Side North', 'Upper East Side South', 'Upper West Side South', 
    'Washington Heights South', 'West Chelsea/Hudson Yards', 'West Village', 
    'World Trade Center', 'Yorkville East', 'Yorkville West'
]

neighborhoods_from_central_park_to_south = [
    'Alphabet City', 'Battery Park', 'Battery Park City', 'Central Park',
    'Chinatown', 'Clinton East', 'Clinton West', 'East Chelsea', 
    'East Village', 'Financial District North', 'Financial District South', 
    'Flatiron', 'Hudson Sq', 'Garment District', 
    "Governor's Island/Ellis Island/Liberty Island", 'Gramercy', 
    'Greenwich Village North', 'Greenwich Village South', 'Kips Bay', 
    'Lenox Hill East', 'Lenox Hill West', 'Lincoln Square East', 
    'Lincoln Square West', 'Little Italy/NoLiTa', 'Lower East Side', 
    'Meatpacking/West Village West', 'Midtown Center', 'Midtown East', 
    'Midtown North', 'Midtown South', 'Murray Hill', 
    'Penn Station/Madison Sq West', 'Seaport', 'SoHo', 
    'Stuy Town/Peter Cooper Village', 'Sutton Place/Turtle Bay North', 
    'Times Sq/Theatre District', 'TriBeCa/Civic Center', 
    'Two Bridges/Seward Park', 'UN/Turtle Bay South', 'Union Sq', 
    'Upper East Side North', 'Upper East Side South', 'Upper West Side South', 
    'West Chelsea/Hudson Yards', 'West Village', 'World Trade Center', 
    'Yorkville East', 'Yorkville West'
]

neighborhoods_central_park_to_north_of_battery_park = [
    'Alphabet City', 'Central Park', 'Chinatown', 'Clinton East', 'Clinton West', 
    'East Chelsea', 'East Village', 'Flatiron', 'Hudson Sq', 'Garment District', 
    'Gramercy', 'Greenwich Village North', 'Greenwich Village South', 'Kips Bay', 
    'Lenox Hill East', 'Lenox Hill West', 'Lincoln Square East', 'Lincoln Square West', 
    'Little Italy/NoLiTa', 'Lower East Side', 'Meatpacking/West Village West', 
    'Midtown Center', 'Midtown East', 'Midtown North', 'Midtown South', 'Murray Hill', 
    'Penn Station/Madison Sq West', 'SoHo', 'Stuy Town/Peter Cooper Village', 
    'Sutton Place/Turtle Bay North', 'Times Sq/Theatre District', 'TriBeCa/Civic Center', 
    'UN/Turtle Bay South', 'Union Sq', 'Upper East Side North', 'Upper East Side South', 
    'Upper West Side South', 'West Chelsea/Hudson Yards', 'West Village', 'Yorkville East', 
    'Yorkville West'
]

df_trips_manhattan = df_trips[
    (df_trips['PUborough'] == 'Manhattan') & 
    (df_trips['DOborough'] == 'Manhattan') & 
    (df_trips['PUzone'].isin(neighborhoods_from_central_park_to_south)) & 
    (df_trips['DOzone'].isin(neighborhoods_from_central_park_to_south))
].copy()

df_trips_manhattan = df_trips[
    (df_trips['PUborough'] == 'Manhattan') & 
    (df_trips['DOborough'] == 'Manhattan')
].copy()

unique_pu_ids = set(df_trips_manhattan['PULocationID'].unique())
unique_do_ids = set(df_trips_manhattan['DOLocationID'].unique())

common_location_ids = unique_pu_ids.intersection(unique_do_ids)

df_trips_manhattan = df_trips_manhattan[
    (df_trips_manhattan['PULocationID'].isin(common_location_ids)) &
    (df_trips_manhattan['DOLocationID'].isin(common_location_ids))
].copy()

df_trips_manhattan['PULocationID'], pu_id_mapping = pd.factorize(df_trips_manhattan['PULocationID'])
df_trips_manhattan['DOLocationID'], do_id_mapping = pd.factorize(df_trips_manhattan['DOLocationID'])

df_trips_manhattan['PULocationID'] += 1
df_trips_manhattan['DOLocationID'] += 1

### Period - ID

In [None]:
n_periods = 6  # 24 bins for each hour of the day
n_locations = df_trips_manhattan['PULocationID'].nunique()  # Number of unique locations

M_ten = np.zeros((n_periods, n_locations))  # Marginal probability of pickup states
Q_ten = np.zeros((n_periods, n_locations, n_periods, n_locations))  # Joint distribution of transitions
P = np.zeros((n_periods, n_locations, n_periods, n_locations))  # Conditional transition probabilities

for _, row in df_trips_manhattan.iterrows():
    pu_period_idx = int(row['PUhour'])  # Pickup hour index (0-23)
    pu_loc_idx = int(row['PULocationID']) - 1  # Assuming LocationID is 1-indexed
    do_period_idx = int(row['DOhour'])  # Dropoff hour index (0-23)
    do_loc_idx = int(row['DOLocationID']) - 1  # Assuming LocationID is 1-indexed

    M_ten[pu_period_idx, pu_loc_idx] += 1

    Q_ten[pu_period_idx, pu_loc_idx, do_period_idx, do_loc_idx] += 1

M_ten_sum = M_ten.sum()
if M_ten_sum > 0:
    M_ten = M_ten / M_ten_sum

M_vec = M_ten.flatten()

Q_ten_sum = Q_ten.sum()
if Q_ten_sum > 0:
    Q_ten = Q_ten / Q_ten_sum

P_sum = Q_ten.sum(axis=(2, 3), keepdims=True)  # Sum over next states for normalization
P_sum[P_sum == 0] = 1  # Avoid division by zero
P_ten = Q_ten / P_sum  # Conditional probability

Q_mat = Q_ten.reshape(n_periods * n_locations, n_periods * n_locations)
P_mat = P_ten.reshape(n_periods * n_locations, n_periods * n_locations)

# 2 - Visualizations

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.imshow(P_mat, cmap='viridis')
plt.title('Conditional Transition Matrix (P_mat)')
plt.axis('off')

plt.subplot(1, 2, 2)
plt.imshow(Q_mat, cmap='viridis')
plt.title('Joint Probability Matrix (Q_mat)')
plt.axis('off')

plt.tight_layout()
plt.show()

# 3 - Estimation (tests)

In [None]:
def get_chain(df, n):
    M_ten = np.zeros((n_periods, n_locations))  # Marginal probability of pickup states
    Q_ten = np.zeros((n_periods, n_locations, n_periods, n_locations))  # Joint distribution of transitions

    for _, row in df.sample(n=n).iterrows():
        pu_period_idx = int(row['PUhour'])  # Pickup hour index (0-23)
        pu_loc_idx = int(row['PULocationID']) - 1  # Assuming LocationID is 1-indexed
        do_period_idx = int(row['DOhour'])  # Dropoff hour index (0-23)
        do_loc_idx = int(row['DOLocationID']) - 1  # Assuming LocationID is 1-indexed

        M_ten[pu_period_idx, pu_loc_idx] += 1

        Q_ten[pu_period_idx, pu_loc_idx, do_period_idx, do_loc_idx] += 1

    Q_ten_sum = Q_ten.sum()
    if Q_ten_sum > 0:
        Q_ten = Q_ten / Q_ten_sum
    Q_mat = Q_ten.reshape(n_periods * n_locations, n_periods * n_locations)

    P_sum = Q_ten.sum(axis=(2, 3), keepdims=True)
    P_sum[P_sum == 0] = 1
    P_ten = Q_ten / P_sum
    P_mat = P_ten.reshape(n_periods * n_locations, n_periods * n_locations)
    return P_ten, P_mat, Q_ten, Q_mat

In [None]:
P_mat = torch.tensor(P_mat).float()
Q_mat = torch.tensor(Q_mat).float()
M_vec = torch.tensor(M_vec).float()

P_ten = torch.tensor(P_ten).float()
Q_ten = torch.tensor(Q_ten).float()
M_ten = torch.tensor(M_ten).float()

num_trials = 5
N = torch.tensor(P_ten.shape[:2]) # No. states per dimension

mcs = []
P_tru = []; Q_tru = []; P_1D_tru = []; Q_1D_tru = []
for t in range(num_trials):
    P_tru.append(P_ten.reshape(tuple(N.repeat(2))).clone())
    Q_tru.append(Q_ten.reshape(tuple(N.repeat(2))).clone())
    P_1D_tru.append(P_mat.clone())
    Q_1D_tru.append(Q_mat.clone())

In [None]:
sampling_dims = np.logspace(3, 5, 5).astype(int)
P_1D_obs, Q_1D_obs = [[] for _ in range(num_trials)], [[] for _ in range(num_trials)]
P_obs, Q_obs = [[] for _ in range(num_trials)], [[] for _ in range(num_trials)]
for t in range(num_trials):
    for n in sampling_dims:
        P_ten, P_mat, Q_ten, Q_mat = get_chain(df_trips_manhattan, n)

        P_mat = torch.tensor(P_mat).float()
        Q_mat = torch.tensor(Q_mat).float()
        P_ten = torch.tensor(P_ten).float()
        Q_ten = torch.tensor(Q_ten).float()

        P_1D_obs[t].append(P_mat)
        Q_1D_obs[t].append(Q_mat)
        P_obs[t].append(P_ten)
        Q_obs[t].append(Q_ten)

In [None]:
SEED = 1000
num_cpus = os.cpu_count() // 2
np.random.seed(SEED)
os.environ['OMP_NUM_THREADS'] = str(num_cpus)
verbose = False

In [None]:
# Low-rank tensor estimation
T_range = len(sampling_dims)

lrte_est_parallel = lambda lrte, Qh, lrt_args: lrte.estimate(Qh, lrt_args)

eps_abs = 1e-6
eps_rel = 1e-6
eps_diff = 1e-6

lrt_args = {
    'K':None,
    'beta':None,
    'eps_abs':eps_abs,
    'eps_rel':eps_rel,
    'eps_diff':eps_diff,
    'max_itr':None,
    'verbose':verbose,
    'MARG_CONST':True,
    'ACCEL':True
}

lrte = [[LowRankTensorEstimator() for _ in range(T_range)] for _ in range(num_trials)]
# lrt_args['K'] = 50
lrt_args['K'] = 10
lrt_args['beta'] = .01 # .01
lrt_args['max_itr'] = 10_000 # 5_000
results = Parallel(n_jobs=num_cpus)(delayed(lrte_est_parallel)( lrte[t][i],Q_obs[t][i],lrt_args ) for t in range(num_trials) for i in range(T_range))

c = 0
P_1D_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
Q_1D_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
res_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
for t in range(num_trials):
    for i in range(T_range):
        P_1D_lrt[t][i] = results[c][0].P_1D
        Q_1D_lrt[t][i] = results[c][0].Q_1D
        res_lrt[t][i] = results[c][1]
        c+=1

t = 0; i = -1
#vmin = P_mat.min()
#vmax = P_mat.max()
fig = plt.figure(figsize=(2*4,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]; _ = [a.axis('off') for a in ax]
ax[0].imshow(P_1D_lrt[t][i],'plasma'); ax[1].imshow(Q_1D_lrt[t][i],'plasma')
ax[0].set_title('LRT cond. PMF'); ax[1].set_title('LRT joint PMF')

err_lrt_P = torch.tensor([[frob_err(P_1D_lrt[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[frob_err(Q_1D_lrt[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])

t = 0; i = 4
fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_lrt[t][i]['admm_obj'],'-',c=vib_qual['red'])
ax[1].plot(res_lrt[t][i]['admm_var'],'-',c=vib_qual['blue'])
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Objective'); ax[1].set_ylabel('Variable difference')
fig.tight_layout()

fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_lrt[t][i]['admm_res'][0],'-',c=vib_qual['red'])
ax[0].plot(res_lrt[t][i]['admm_res'][2],':',c=vib_qual['red'],alpha=.3)
ax[1].plot(res_lrt[t][i]['admm_res'][1],'-',c=vib_qual['blue'])
ax[1].plot(res_lrt[t][i]['admm_res'][3],':',c=vib_qual['blue'],alpha=.3)
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Primal residual'); ax[1].set_ylabel('Dual residual')
fig.tight_layout()

In [None]:
err_lrt_r10_P = torch.tensor([[norml1_err(P_1D_lrt[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)]).mean(0)
err_lrt_r10_P

In [None]:
# Low-rank tensor estimation
T_range = len(sampling_dims)

lrte_est_parallel = lambda lrte, Qh, lrt_args: lrte.estimate(Qh, lrt_args)

eps_abs = 1e-7
eps_rel = 1e-7
eps_diff = 1e-7

lrt_args = {
    'K':None,
    'beta':None,
    'eps_abs':eps_abs,
    'eps_rel':eps_rel,
    'eps_diff':eps_diff,
    'max_itr':None,
    'verbose':verbose,
    'MARG_CONST':True,
    'ACCEL':True
}

lrte = [[LowRankTensorEstimator() for _ in range(T_range)] for _ in range(num_trials)]
# lrt_args['K'] = 50
lrt_args['K'] = 20
lrt_args['beta'] = .01 # .01
lrt_args['max_itr'] = 50_000 # 5_000
results = Parallel(n_jobs=num_cpus)(delayed(lrte_est_parallel)( lrte[t][i],Q_obs[t][i],lrt_args ) for t in range(num_trials) for i in range(T_range))

c = 0
P_1D_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
Q_1D_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
res_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
for t in range(num_trials):
    for i in range(T_range):
        P_1D_lrt[t][i] = results[c][0].P_1D
        Q_1D_lrt[t][i] = results[c][0].Q_1D
        res_lrt[t][i] = results[c][1]
        c+=1

t = 0; i = -1
vmin = P_mat.min()
vmax = P_mat.max()
fig = plt.figure(figsize=(2*4,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]; _ = [a.axis('off') for a in ax]
ax[0].imshow(P_1D_lrt[t][i],'plasma', vmin=vmin, vmax=vmax); ax[1].imshow(Q_1D_lrt[t][i],'plasma', vmin=vmin, vmax=vmax)
ax[0].set_title('LRT cond. PMF'); ax[1].set_title('LRT joint PMF')

err_lrt_P = torch.tensor([[frob_err(P_1D_lrt[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[frob_err(Q_1D_lrt[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])

t = 0; i = 4
fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_lrt[t][i]['admm_obj'],'-',c=vib_qual['red'])
ax[1].plot(res_lrt[t][i]['admm_var'],'-',c=vib_qual['blue'])
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Objective'); ax[1].set_ylabel('Variable difference')
fig.tight_layout()

fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_lrt[t][i]['admm_res'][0],'-',c=vib_qual['red'])
ax[0].plot(res_lrt[t][i]['admm_res'][2],':',c=vib_qual['red'],alpha=.3)
ax[1].plot(res_lrt[t][i]['admm_res'][1],'-',c=vib_qual['blue'])
ax[1].plot(res_lrt[t][i]['admm_res'][3],':',c=vib_qual['blue'],alpha=.3)
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Primal residual'); ax[1].set_ylabel('Dual residual')
fig.tight_layout()

In [None]:
err_lrt_r20_P_alt = torch.tensor([[norml1_err(P_1D_lrt[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)]).mean(0)
err_lrt_r20_P_alt

In [None]:
# Nuclear norm matrix estimation

nnlrme_est_parallel = lambda nnlrm,Ph,args: nnlrm.estimate(Ph,args)

nnlrm_args = {
    'beta':None,
    'gamma':None,
    'eps_abs':eps_abs,
    'eps_rel':eps_rel,
    'eps_diff':eps_diff,
    'max_itr':None,
    'verbose':False
}

nnlrme = [[NucNormMatrixEstimator() for _ in range(T_range)] for _ in range(num_trials)]
nnlrm_args['beta'] = 10
nnlrm_args['gamma'] = 10
nnlrm_args['max_itr'] = 5000
results = Parallel(n_jobs=num_cpus)(delayed(nnlrme_est_parallel)( nnlrme[t][i],P_1D_obs[t][i],nnlrm_args ) for t in range(num_trials) for i in range(T_range))

c = 0
P_1D_nnlrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
Q_1D_nnlrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
res_nnlrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
for t in range(num_trials):
    for i in range(T_range):
        P_1D_nnlrm[t][i] = results[c][0].P
        Q_1D_nnlrm[t][i] = results[c][0].Q
        res_nnlrm[t][i] = results[c][1]
        c+=1

t = 0; i = -1
fig = plt.figure(figsize=(2*4,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]; _ = [a.axis('off') for a in ax]
ax[0].imshow(P_1D_nnlrm[t][i],'plasma'); ax[1].imshow(Q_1D_nnlrm[t][i],'plasma')
ax[0].set_title('NNLRM cond. PMF'); ax[1].set_title('NNLRM joint PMF')

err_nnlrm_P = torch.tensor([[frob_err(P_1D_nnlrm[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_Q = torch.tensor([[frob_err(Q_1D_nnlrm[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])

t = 0; i = -1
fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_nnlrm[t][i]['admm_obj'],'-',c=vib_qual['red'])
ax[1].plot(res_nnlrm[t][i]['admm_var'],'-',c=vib_qual['blue'])
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Objective'); ax[1].set_ylabel('Variable difference')
fig.tight_layout()

fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_nnlrm[t][i]['admm_res'][0],'-',c=vib_qual['red'])
ax[0].plot(res_nnlrm[t][i]['admm_res'][2],':',c=vib_qual['red'],alpha=.3)
ax[1].plot(res_nnlrm[t][i]['admm_res'][1],'-',c=vib_qual['blue'])
ax[1].plot(res_nnlrm[t][i]['admm_res'][3],':',c=vib_qual['blue'],alpha=.3)
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Primal residual'); ax[1].set_ylabel('Dual residual')
fig.tight_layout()


In [None]:
# Low-rank matrix estimation via DC algorithm

dclrme_est_parallel = lambda dclrm,Ph,args: dclrm.estimate(Ph,args)

dclrm_args = {
    'K':None,
    'c':None,
    'alpha':None,
    'beta':None,
    'eps_abs':eps_abs,
    'eps_rel':eps_rel,
    'eps_diff':eps_diff,
    'max_itr':None,
    'admm_itr':1,
    'verbose':False
}

dclrme = [[DCLowRankMatrixEstimator() for _ in range(T_range)] for _ in range(num_trials)]
dclrm_args['K'] = 10
dclrm_args['c'] = 10
dclrm_args['alpha'] = .5
dclrm_args['beta'] = 1
dclrm_args['max_itr'] = 500
dclrm_args['admm_itr'] = 1
results = Parallel(n_jobs=num_cpus)(delayed(dclrme_est_parallel)( dclrme[t][i],P_1D_obs[t][i],dclrm_args ) for t in range(num_trials) for i in range(T_range))

c = 0
P_1D_dclrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
Q_1D_dclrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
res_dclrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
for t in range(num_trials):
    for i in range(T_range):
        P_1D_dclrm[t][i] = results[c][0].P
        Q_1D_dclrm[t][i] = results[c][0].Q
        res_dclrm[t][i] = results[c][1]
        c+=1

t = 0; i = -1
fig = plt.figure(figsize=(2*4,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]; _ = [a.axis('off') for a in ax]
ax[0].imshow(P_1D_dclrm[t][i],'plasma'); ax[1].imshow(Q_1D_dclrm[t][i],'plasma')
ax[0].set_title('DCLRM cond. PMF'); ax[1].set_title('DCLRM joint PMF')

err_dclrm_P = torch.tensor([[frob_err(P_1D_dclrm[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_dclrm_Q = torch.tensor([[frob_err(Q_1D_dclrm[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])

t = 0; i = -1
fig = plt.figure(figsize=(2*5,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]
ax[0].plot(res_dclrm[t][i]['admm_obj'],'-',c=vib_qual['red'])
ax[1].plot(res_dclrm[t][i]['admm_var'],'-',c=vib_qual['blue'])
_ = [[a.set_xlabel('Iterations')] for a in ax]; ax[0].set_ylabel('Objective'); ax[1].set_ylabel('Variable difference')
fig.tight_layout()


In [None]:
# Spectral low-rank matrix estimation
slrme_est_parallel = lambda slrm,Qh,K: slrm.estimate(Qh,K)

slrme = [[SpecLowRankMatrixEstimator() for _ in range(T_range)] for _ in range(num_trials)]
K_slrme = 20
# results = [[slrme[t][i].estimate(Q_1D_obs[t][i],K_slrme) for i in range(T_range)] for t in range(num_trials)]
results = Parallel(n_jobs=num_cpus)(delayed(slrme_est_parallel)( slrme[t][i],Q_1D_obs[t][i],K_slrme ) for t in range(num_trials) for i in range(T_range))

c = 0
P_1D_slrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
Q_1D_slrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
res_slrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
for t in range(num_trials):
    for i in range(T_range):
        P_1D_slrm[t][i] = results[c][0].P
        Q_1D_slrm[t][i] = results[c][0].Q
        res_slrm[t][i] = results[c][1]
        c+=1

i = -1
i = 0
fig = plt.figure(figsize=(2*4,4)); ax = fig.subplots(1,2); _ = [a.grid(1) for a in ax]; _ = [a.set_axisbelow(1) for a in ax]; _ = [a.axis('off') for a in ax]
ax[0].imshow(P_1D_slrm[t][i],'plasma'); ax[1].imshow(Q_1D_slrm[t][i],'plasma')
ax[0].set_title('slrm cond. PMF'); ax[1].set_title('slrm joint PMF')

err_slrm_P = torch.tensor([[frob_err(P_1D_slrm[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_slrm_Q = torch.tensor([[frob_err(Q_1D_slrm[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])


In [None]:
# Computing the rank of conditional PMFs

rank_obs = torch.tensor([[torch.linalg.matrix_rank(P_1D_obs[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
rank_lrt = torch.tensor([[torch.linalg.matrix_rank(P_1D_lrt[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
rank_nnlrm = torch.tensor([[torch.linalg.matrix_rank(P_1D_nnlrm[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
rank_dclrm = torch.tensor([[torch.linalg.matrix_rank(P_1D_dclrm[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
rank_slrm = torch.tensor([[torch.linalg.matrix_rank(P_1D_slrm[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)

erank_obs = torch.tensor([[erank(P_1D_obs[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
erank_lrt = torch.tensor([[erank(P_1D_lrt[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
erank_nnlrm = torch.tensor([[erank(P_1D_nnlrm[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
erank_dclrm = torch.tensor([[erank(P_1D_dclrm[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)
erank_slrm = torch.tensor([[erank(P_1D_slrm[t][i]) for i in range(T_range)] for t in range(num_trials)]).to(torch.float)

erank_tru = torch.tensor([erank(P_1D_tru[t]) for t in range(num_trials)]).to(torch.float)
rank_tru = torch.tensor([torch.linalg.matrix_rank(P_1D_tru[t]) for t in range(num_trials)]).to(torch.float)

# Plot rank
clr_list = [muted_qual['indigo'],muted_qual['rose'],muted_qual['sand'],muted_qual['purple'],muted_qual['teal'],muted_qual['wine']]
methods = ['Emp.','LRT','NNLRM','DCLRM','SpecLRM','True']
mkr_list = ['o','X','d','^','P','s']

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, [rank_tru.mean(0)]*T_range, '-', c=clr_list[-1], label=methods[-1], markersize=10, zorder = 3 )
ax.semilogx( sampling_dims, rank_obs.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, rank_lrt.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, rank_nnlrm.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, rank_dclrm.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, rank_slrm.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Rank'); ax.legend()

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, [erank_tru.mean(0)]*T_range, '-', c=clr_list[-1], label=methods[-1], markersize=10, zorder = 3 )
ax.semilogx( sampling_dims, erank_obs.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, erank_lrt.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, erank_nnlrm.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, erank_dclrm.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, erank_slrm.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Effective erank'); ax.legend()

In [None]:
# Compute error of conditional PMFs

err_obs_P = torch.tensor([[normfrob_err(P_1D_obs[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_obs_Q = torch.tensor([[normfrob_err(Q_1D_obs[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_lrt_P = torch.tensor([[normfrob_err(P_1D_lrt[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[normfrob_err(Q_1D_lrt[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_P = torch.tensor([[normfrob_err(P_1D_nnlrm[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_Q = torch.tensor([[normfrob_err(Q_1D_nnlrm[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_dclrm_P = torch.tensor([[normfrob_err(P_1D_dclrm[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_dclrm_Q = torch.tensor([[normfrob_err(Q_1D_dclrm[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_slrm_P = torch.tensor([[normfrob_err(P_1D_slrm[t][i],P_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])
err_slrm_Q = torch.tensor([[normfrob_err(Q_1D_slrm[t][i],Q_1D_tru[t])**2 for i in range(T_range)] for t in range(num_trials)])

# Plot error
clr_list = [muted_qual['indigo'],muted_qual['rose'],muted_qual['sand'],muted_qual['purple'],muted_qual['teal'],muted_qual['wine']]
methods = ['Emp.','LRT','NNLRM','DCLRM','SpecLRM','True']
mkr_list = ['o','X','d','^','P','s']

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_obs_P.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

In [None]:
# Compute error of conditional PMFs

err_obs_P = torch.tensor([[l1_err(P_1D_obs[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_obs_Q = torch.tensor([[l1_err(Q_1D_obs[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_P = torch.tensor([[l1_err(P_1D_lrt[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[l1_err(Q_1D_lrt[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_P = torch.tensor([[l1_err(P_1D_nnlrm[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_Q = torch.tensor([[l1_err(Q_1D_nnlrm[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_P = torch.tensor([[l1_err(P_1D_dclrm[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_Q = torch.tensor([[l1_err(Q_1D_dclrm[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_P = torch.tensor([[l1_err(P_1D_slrm[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_Q = torch.tensor([[l1_err(Q_1D_slrm[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])

# Plot error
clr_list = [muted_qual['indigo'],muted_qual['rose'],muted_qual['sand'],muted_qual['purple'],muted_qual['teal'],muted_qual['wine']]
methods = ['Emp.','LRT','NNLRM','DCLRM','SpecLRM','True']
mkr_list = ['o','X','d','^','P','s']

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_obs_P.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

In [None]:
# Compute error of conditional PMFs

err_obs_P = torch.tensor([[sin_err(P_1D_obs[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_obs_Q = torch.tensor([[sin_err(Q_1D_obs[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_P = torch.tensor([[sin_err(P_1D_lrt[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[sin_err(Q_1D_lrt[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_P = torch.tensor([[sin_err(P_1D_nnlrm[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_Q = torch.tensor([[sin_err(Q_1D_nnlrm[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_P = torch.tensor([[sin_err(P_1D_dclrm[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_Q = torch.tensor([[sin_err(Q_1D_dclrm[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_P = torch.tensor([[sin_err(P_1D_slrm[t][i],P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_Q = torch.tensor([[sin_err(Q_1D_slrm[t][i],Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])

# Plot error
clr_list = [muted_qual['indigo'],muted_qual['rose'],muted_qual['sand'],muted_qual['purple'],muted_qual['teal'],muted_qual['wine']]
methods = ['Emp.','LRT','NNLRM','DCLRM','SpecLRM','True']
mkr_list = ['o','X','d','^','P','s']

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_obs_P.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

In [None]:
def compute_filtered_error(observed, true):
    mask = true != 0
    return (normfrob_err(observed * mask, true) ** 2)

# Calculate errors for P and Q matrices, considering only non-zero true values
err_obs_P = torch.tensor([[compute_filtered_error(P_1D_obs[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_obs_Q = torch.tensor([[compute_filtered_error(Q_1D_obs[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_P = torch.tensor([[compute_filtered_error(P_1D_lrt[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[compute_filtered_error(Q_1D_lrt[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_P = torch.tensor([[compute_filtered_error(P_1D_nnlrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_Q = torch.tensor([[compute_filtered_error(Q_1D_nnlrm[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_P = torch.tensor([[compute_filtered_error(P_1D_dclrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_Q = torch.tensor([[compute_filtered_error(Q_1D_dclrm[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_P = torch.tensor([[compute_filtered_error(P_1D_slrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_Q = torch.tensor([[compute_filtered_error(Q_1D_slrm[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])


# Plot error
clr_list = [muted_qual['indigo'],muted_qual['rose'],muted_qual['sand'],muted_qual['purple'],muted_qual['teal'],muted_qual['wine']]
methods = ['Emp.','LRT','NNLRM','DCLRM','SpecLRM','True']
mkr_list = ['o','X','d','^','P','s']

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_obs_P.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

In [None]:
def compute_filtered_error(observed, true):
    mask = true != 0
    return (l1_err(observed * mask, true))

# Calculate errors for P and Q matrices, considering only non-zero true values
err_obs_P = torch.tensor([[compute_filtered_error(P_1D_obs[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_obs_Q = torch.tensor([[compute_filtered_error(Q_1D_obs[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_P = torch.tensor([[compute_filtered_error(P_1D_lrt[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_lrt_Q = torch.tensor([[compute_filtered_error(Q_1D_lrt[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_P = torch.tensor([[compute_filtered_error(P_1D_nnlrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_nnlrm_Q = torch.tensor([[compute_filtered_error(Q_1D_nnlrm[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_P = torch.tensor([[compute_filtered_error(P_1D_dclrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_dclrm_Q = torch.tensor([[compute_filtered_error(Q_1D_dclrm[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_P = torch.tensor([[compute_filtered_error(P_1D_slrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])
err_slrm_Q = torch.tensor([[compute_filtered_error(Q_1D_slrm[t][i], Q_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)])


# Plot error
clr_list = [muted_qual['indigo'],muted_qual['rose'],muted_qual['sand'],muted_qual['purple'],muted_qual['teal'],muted_qual['wine']]
methods = ['Emp.','LRT','NNLRM','DCLRM','SpecLRM','True']
mkr_list = ['o','X','d','^','P','s']

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_obs_P.mean(0), mkr_list[0], c=clr_list[0], label=methods[0], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

fig = plt.figure(); ax = fig.subplots(); ax.grid(1); ax.set_axisbelow(1)
ax.semilogx( sampling_dims, err_lrt_P.mean(0), mkr_list[1], c=clr_list[1], label=methods[1], markersize=10, zorder = 5 )
ax.semilogx( sampling_dims, err_nnlrm_P.mean(0), mkr_list[2], c=clr_list[2], label=methods[2], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_dclrm_P.mean(0), mkr_list[3], c=clr_list[3], label=methods[3], markersize=10, zorder = 4 )
ax.semilogx( sampling_dims, err_slrm_P.mean(0), mkr_list[4], c=clr_list[4], label=methods[4], markersize=10, zorder = 4 )
ax.set_xlabel('No. samples'); ax.set_ylabel('Error'); ax.legend()

# 4 - Estimation (Experiments)

In [None]:
def get_chain(df, n):
    M_ten = np.zeros((n_periods, n_locations))  # Marginal probability of pickup states
    Q_ten = np.zeros((n_periods, n_locations, n_periods, n_locations))  # Joint distribution of transitions

    for _, row in df.sample(n=n).iterrows():
        pu_period_idx = int(row['PUhour'])  # Pickup hour index (0-23)
        pu_loc_idx = int(row['PULocationID']) - 1  # Assuming LocationID is 1-indexed
        do_period_idx = int(row['DOhour'])  # Dropoff hour index (0-23)
        do_loc_idx = int(row['DOLocationID']) - 1  # Assuming LocationID is 1-indexed

        M_ten[pu_period_idx, pu_loc_idx] += 1

        Q_ten[pu_period_idx, pu_loc_idx, do_period_idx, do_loc_idx] += 1

    Q_ten_sum = Q_ten.sum()
    if Q_ten_sum > 0:
        Q_ten = Q_ten / Q_ten_sum
    Q_mat = Q_ten.reshape(n_periods * n_locations, n_periods * n_locations)

    P_sum = Q_ten.sum(axis=(2, 3), keepdims=True)
    P_sum[P_sum == 0] = 1
    P_ten = Q_ten / P_sum
    P_mat = P_ten.reshape(n_periods * n_locations, n_periods * n_locations)
    return P_ten, P_mat, Q_ten, Q_mat

In [None]:
P_mat = torch.tensor(P_mat).float()
Q_mat = torch.tensor(Q_mat).float()
M_vec = torch.tensor(M_vec).float()

P_ten = torch.tensor(P_ten).float()
Q_ten = torch.tensor(Q_ten).float()
M_ten = torch.tensor(M_ten).float()

num_trials = 5
N = torch.tensor(P_ten.shape[:2]) # No. states per dimension

mcs = []
P_tru = []; Q_tru = []; P_1D_tru = []; Q_1D_tru = []
for t in range(num_trials):
    P_tru.append(P_ten.reshape(tuple(N.repeat(2))).clone())
    Q_tru.append(Q_ten.reshape(tuple(N.repeat(2))).clone())
    P_1D_tru.append(P_mat.clone())
    Q_1D_tru.append(Q_mat.clone())

In [None]:
sampling_dims = np.logspace(3, 5, 5).astype(int)
P_1D_obs, Q_1D_obs = [[] for _ in range(num_trials)], [[] for _ in range(num_trials)]
P_obs, Q_obs = [[] for _ in range(num_trials)], [[] for _ in range(num_trials)]
for t in range(num_trials):
    for n in sampling_dims:
        P_ten, P_mat, Q_ten, Q_mat = get_chain(df_trips_manhattan, n)

        P_mat = torch.tensor(P_mat).float()
        Q_mat = torch.tensor(Q_mat).float()
        P_ten = torch.tensor(P_ten).float()
        Q_ten = torch.tensor(Q_ten).float()

        P_1D_obs[t].append(P_mat)
        Q_1D_obs[t].append(Q_mat)
        P_obs[t].append(P_ten)
        Q_obs[t].append(Q_ten)

In [None]:
SEED = 1000
num_cpus = os.cpu_count() // 2
np.random.seed(SEED)
os.environ['OMP_NUM_THREADS'] = str(num_cpus)
verbose = False

T_range = len(sampling_dims)

eps_abs = 1e-7
eps_rel = 1e-7
eps_diff = 1e-7

In [None]:
# Low-rank tensor estimation
# res = []

eps_abs = 1e-7
eps_rel = 1e-7
eps_diff = 1e-7

for r in [2, 10, 20]:
    lrte_est_parallel = lambda lrte, Qh, lrt_args: lrte.estimate(Qh, lrt_args)

    lrt_args = {
        'K':None,
        'beta':None,
        'eps_abs':eps_abs,
        'eps_rel':eps_rel,
        'eps_diff':eps_diff,
        'max_itr':None,
        'verbose':verbose,
        'MARG_CONST':True,
        'ACCEL':True
    }

    lrte = [[LowRankTensorEstimator() for _ in range(T_range)] for _ in range(num_trials)]
    # lrt_args['K'] = 50
    lrt_args['K'] = r
    lrt_args['beta'] = .01 # .01
    lrt_args['max_itr'] = 40_000 # 5_000
    results = Parallel(n_jobs=num_cpus)(delayed(lrte_est_parallel)( lrte[t][i],Q_obs[t][i],lrt_args ) for t in range(num_trials) for i in range(T_range))

    c = 0
    P_1D_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
    Q_1D_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
    res_lrt = [[None for _ in range(T_range)] for _ in range(num_trials)]
    for t in range(num_trials):
        for i in range(T_range):
            P_1D_lrt[t][i] = results[c][0].P_1D
            Q_1D_lrt[t][i] = results[c][0].Q_1D
            res_lrt[t][i] = results[c][1]
            c+=1
    err = torch.tensor([[norml1_err(P_1D_lrt[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)]).mean(0)
    res.append(err.detach().numpy())
res = np.array(res)
np.save('results/lrt.npy', res)

In [None]:
idx = 2

eps_abs = 1e-7
eps_rel = 1e-7
eps_diff = 1e-7

args = {
    'K': 50,
    'beta':0.01,
    'eps_abs':eps_abs,
    'eps_rel':eps_rel,
    'eps_diff':eps_diff,
    'max_itr':40_000,
    'verbose':verbose,
    'MARG_CONST':True,
    'ACCEL':True
}

model = LowRankTensorEstimator()
model.estimate(Q_obs[0][idx], args)

e_r50 = norml1_err(model.P_1D, P_1D_tru[0])
e_r50

In [None]:
# Nuclear norm matrix estimation

eps_abs = 1e-6
eps_rel = 1e-6
eps_diff = 1e-6

res = []
for g in [0.1, 1.0]:
    nnlrme_est_parallel = lambda nnlrm,Ph,args: nnlrm.estimate(Ph,args)

    nnlrm_args = {
        'beta':None,
        'gamma':None,
        'eps_abs':eps_abs,
        'eps_rel':eps_rel,
        'eps_diff':eps_diff,
        'max_itr':None,
        'verbose':False
    }

    nnlrme = [[NucNormMatrixEstimator() for _ in range(T_range)] for _ in range(num_trials)]
    nnlrm_args['beta'] = 10
    nnlrm_args['gamma'] = g
    nnlrm_args['max_itr'] = 5000
    results = Parallel(n_jobs=num_cpus)(delayed(nnlrme_est_parallel)( nnlrme[t][i],P_1D_obs[t][i],nnlrm_args ) for t in range(num_trials) for i in range(T_range))

    c = 0
    P_1D_nnlrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    Q_1D_nnlrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    res_nnlrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    for t in range(num_trials):
        for i in range(T_range):
            P_1D_nnlrm[t][i] = results[c][0].P
            Q_1D_nnlrm[t][i] = results[c][0].Q
            res_nnlrm[t][i] = results[c][1]
            c+=1
    err = torch.tensor([[norml1_err(P_1D_nnlrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)]).mean(0)
    res.append(err.detach().numpy())
res = np.array(res)
np.save('results/nnlrm.npy', res)

In [None]:
# Low-rank matrix estimation via DC algorithm
eps_abs = 1e-6
eps_rel = 1e-6
eps_diff = 1e-6

res = []
for r in [5, 10]:
    dclrme_est_parallel = lambda dclrm,Ph,args: dclrm.estimate(Ph,args)

    dclrm_args = {
        'K':None,
        'c':None,
        'alpha':None,
        'beta':None,
        'eps_abs':eps_abs,
        'eps_rel':eps_rel,
        'eps_diff':eps_diff,
        'max_itr':None,
        'admm_itr':1,
        'verbose':False
    }

    dclrme = [[DCLowRankMatrixEstimator() for _ in range(T_range)] for _ in range(num_trials)]
    dclrm_args['K'] = r
    dclrm_args['c'] = 5
    dclrm_args['alpha'] = .5
    dclrm_args['beta'] = 1
    dclrm_args['max_itr'] = 500
    dclrm_args['admm_itr'] = 1
    results = Parallel(n_jobs=num_cpus)(delayed(dclrme_est_parallel)( dclrme[t][i],P_1D_obs[t][i],dclrm_args ) for t in range(num_trials) for i in range(T_range))

    c = 0
    P_1D_dclrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    Q_1D_dclrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    res_dclrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    for t in range(num_trials):
        for i in range(T_range):
            P_1D_dclrm[t][i] = results[c][0].P
            Q_1D_dclrm[t][i] = results[c][0].Q
            res_dclrm[t][i] = results[c][1]
            c+=1
    err = torch.tensor([[norml1_err(P_1D_dclrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)]).mean(0)
    res.append(err.detach().numpy())
res = np.array(res)
np.save('results/dclrm.npy', res)

In [None]:
res = []
for r in [1, 5, 10, 20]:
    slrme_est_parallel = lambda slrm,Qh,K: slrm.estimate(Qh,K)

    slrme = [[SpecLowRankMatrixEstimator() for _ in range(T_range)] for _ in range(num_trials)]
    K_slrme = r
    # results = [[slrme[t][i].estimate(Q_1D_obs[t][i],K_slrme) for i in range(T_range)] for t in range(num_trials)]
    results = Parallel(n_jobs=num_cpus)(delayed(slrme_est_parallel)( slrme[t][i],Q_1D_obs[t][i],K_slrme ) for t in range(num_trials) for i in range(T_range))

    c = 0
    P_1D_slrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    Q_1D_slrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    res_slrm = [[None for _ in range(T_range)] for _ in range(num_trials)]
    for t in range(num_trials):
        for i in range(T_range):
            P_1D_slrm[t][i] = results[c][0].P
            Q_1D_slrm[t][i] = results[c][0].Q
            res_slrm[t][i] = results[c][1]
            c+=1
    err = torch.tensor([[norml1_err(P_1D_slrm[t][i], P_1D_tru[t]) for i in range(T_range)] for t in range(num_trials)]).mean(0)
    res.append(err.detach().numpy())
res = np.array(res)
np.save('results/slrm.npy', res)

# 5 - Create data

In [None]:
lrt = np.load('results/lrt.npy')
dclrm = np.load('results/dclrm.npy')
nnlrm = np.load('results/nnlrm.npy')
slrm = np.load('results/slrm.npy')

In [None]:
lrt_2 = lrt[0].tolist() # 288 params
lrt_10 = lrt[1].tolist() # 1440 params
lrt_20 = lrt[2].tolist() # 2880 params
lrt_50 = lrt[3].tolist() # 7200 params

dclrm_5 = dclrm[0].tolist()
dclrm_10 = dclrm[1].tolist()

nnlrm_01 = nnlrm[0].tolist()
nnlrm_1 = nnlrm[1].tolist()

slrm_1 = slrm[0].tolist() # 792 params
slrm_5 = slrm[1].tolist() # 3960 params
slrm_10 = slrm[2].tolist() # 7920 params
slrm_20 = slrm[3].tolist() # 15840 params

In [None]:
df_ny_sampling = pd.DataFrame({
    'sampling': sampling_dims,
    'err_lrt_r10': lrt_10,
    'err_lrt_r20': lrt_20,
    'err_nnlrm_01_P': nnlrm_01,
    'err_nnlrm_1_P': nnlrm_1,
    'err_dclrm_5_P': dclrm_5,
    'err_dclrm_10_P': dclrm_10,
    'err_slrm_r5_P': slrm_5,
    'err_slrm_r10_P': slrm_10,
}).astype(float)


In [None]:
df_ny_sampling

In [None]:
df_ny_sampling.to_csv('results/5_ny_sampling.csv', index=False)

In [None]:
ranks_slrm = [1, 5, 10, 20]
ranks_lrt = [2, 10, 20, 50]

params_slrm = [2 * 66 * 6 * k for k in ranks_slrm]
params_lrt = [(66 + 6 + 66 + 6)*k for k in ranks_lrt]

idx = 2

err_rank_lrt = [
    lrt_2[idx],
    lrt_10[idx],
    lrt_20[idx],
    lrt_50[idx],
]

err_rank_slrm = [
    slrm_1[idx],
    slrm_5[idx],
    slrm_10[idx],
    slrm_20[idx],
]

In [None]:
df_ny_params = pd.DataFrame({
    'params_lrt': params_lrt,
    'params_slrm': params_slrm,
    'err_rank_lrt': err_rank_lrt,
    'err_rank_slrm': err_rank_slrm,
})

In [None]:
df_ny_params.astype(float)

In [None]:
df_ny_params.to_csv('results/6_ny_params.csv', index=False)