In [3]:
import gurobipy as gp
from gurobipy import GRB

import numpy as np
import pandas as pd
import graspy

import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm_notebook as tqdm

import os
sns.set()

In [4]:
def optimal_distance(dist_matrix, S_indices, model_name=None, return_new_dists=True):
    """
    A function to find the weights of optimal linear combination of distances.
    
    Input
    dist_matrix - np.array (shape=(n, J))
        Array containing the distances between the vertex of interest and the other n - 1
        vertices. It is assumed that the vertex of interest is indexed by 0.
    S_indices - array-like
        Array-like containing the indices of the vertices that should be at the top of the
        nomination list for the vertex of interest.
        
    Return
    weights - np.array (length=J)
        Array containing the coefficients for the optimal distance function.
    """
    
    n, J = dist_matrix.shape
    M = np.sum(abs(dist_matrix))
    
    S = len(S_indices)
    Q_indices = np.array([i for i in range(1, n) if i not in S_indices])
    Q = len(Q_indices)
    
    M = np.sum(abs(dist_matrix))
    
    if model_name is not None:
        m = gp.Model(name='%s'%(model_name))
    else:
        m= gp.Model()
        
    m.setParam('OutputFlag', 0)

    ind = m.addVars(Q, vtype=GRB.BINARY, name='ind')
    m.setObjective(gp.quicksum(ind), GRB.MINIMIZE)

    w = m.addVars(J, lb=0, ub=1, vtype=GRB.CONTINUOUS, name='w')
    m.addConstr(w.sum() == 1)

    # There's likely a more pythonic way to set up these constraints (in particular using m.addConstrs(..))
    for s in S_indices:
        temp_s = gp.tupledict([((i), dist_matrix[s, i]) for i in range(J)])
        for i, q in enumerate(Q_indices):
            temp_q = gp.tupledict([((i), dist_matrix[q, i]) for i in range(J)])
            m.addConstr(w.prod(temp_s) <= w.prod(temp_q) + ind[i]*M)
        
    m.optimize()
    
    alpha_hat = np.array([i.X for i in list(w.values())])
    
    if model_name:
        m.write('%s.ip'%(model_name))
        
    if return_new_dists:
        return alpha_hat, np.average(dist_matrix, axis=1, weights=alpha_hat)
    
    return alpha_hat

In [5]:
all_S_csv = pd.read_csv('ProductSimilarities.csv')
nodes = pd.read_csv('jhu_package_100/nodes.csv', header=None)
nodes.columns = ['idx', 'product']

product_array=np.array(list(nodes['product']))

all_deltas = []
vstars = []
vstars_idx = []
for f in os.listdir('jhu_package_100/deltas/'):
    all_deltas.append(np.load('jhu_package_100/deltas/' + str(f)))
    node_idx = int(str(f).split('.')[0])
    vstars.append(product_array[node_idx])
    vstars_idx.append(int(node_idx))
    
n, J = all_deltas[0].shape
vstars = np.array(vstars)
vstars_idx = np.array(vstars_idx)

nodes_with_similar_products = np.unique(np.array(list(all_S_csv['SourceProduct'])))

S_dict = {}
for idx, row in all_S_csv.iterrows():
    temp_prod = row['SourceProduct']
    if temp_prod in list(S_dict.keys()):
        S_dict[temp_prod].append(np.where(product_array == row['SimilarProduct'])[0][0])
    else:
        S_dict[temp_prod] = [np.where(product_array == row['SimilarProduct'])[0][0]]

print(all_S_csv.head())
print(S_dict)

         SourceProduct       SimilarProduct  Tier
0      Apple iPhone XR  Apple iPhone Xs Max     1
1      Apple iPhone XR       Apple iPhone X     1
2      Apple iPhone XR      Apple iPhone XS     1
3  Apple iPhone Xs Max       Apple iPhone X     1
4  Apple iPhone Xs Max      Apple iPhone XS     1
{'Apple iPhone XR': [75, 171, 135], 'Apple iPhone Xs Max': [171, 135, 100], 'Apple iPhone 11': [432, 341], 'Samsung Galaxy S10': [234, 62, 107, 187], 'Nikon D7200': [566, 271], 'Microsoft Surface Laptop': [1439, 1231]}


In [6]:
bad_keys = []
nomination_lists = {}
for key in S_dict:
    try:
        prod_idx = np.asarray(product_array == key).nonzero()[0][0]
        deltas_idx = np.where(vstars == key)[0][0]

        S_indices = S_dict[key]
        dist_matrix = all_deltas[deltas_idx]
        
        alpha_hat, new_dists = optimal_distance(dist_matrix, S_indices)
        
        temp_nom_list = np.argsort(new_dists)
        nomination_lists[key] = temp_nom_list
    except:
        bad_keys.append(key)
    break
print(bad_keys)

Using license file C:\Users\hhelm\gurobi.lic
Academic license - for non-commercial use only
[]


In [None]:
to_csv = []
for key in S_dict:
    if key not in bad_keys:
        to_csv.append(product_array[nomination_lists[key]])
df=pd.DataFrame(data=np.array(to_csv).T, index=np.concatenate((['V Star'], np.arange(1, n))))
# df.to_csv('initial-nomination-lists-100.csv')

In [None]:
print(nomination_lists)