In [6]:
import numpy as np
import itertools

In [99]:
class User():

    def __init__(self, locs, max_dist = 7, threshold_dist = 6, self_weight = 0.5, P = None):
        
        self.locs = locs
        self.dists = self.get_dists()

        
        if P is None:
            self.P = self.make_P(threshold_dist, self_weight)
        else:
            self.P = P
            
        self.reward_scale = self.get_scales(max_dist)
    
    def make_P(self, threshold_dist, self_weight):
        # Creating Markov Transition Probability Matrix 
        
        P = np.zeros(self.dists.shape)
        locs = self.locs
        for i in range(len(locs)):
            cut_list = self.dists[i,:]
            others = np.squeeze(np.argwhere((cut_list > 0) * (cut_list < threshold_dist) == True))
            num_others = others.shape[0]
        
            # Draw values to make up row of MC
            self_transition = np.random.exponential(scale=1/self_weight)
            others_transition = np.random.exponential(scale=1/((1-self_weight)*num_others),size=num_others)
            total = self_transition + np.sum(others_transition)
            
            P[i,i] = self_transition/total
            
            idx = 0
            for j in others:
                P[i,j] = others_transition[idx]/total
                idx += 1
            
        return P
    
    def get_dists(self):
        # Obtaining distance matrix (from loc to loc) 
        
        locs = self.locs
        
        num_locs = len(locs)
        dists = np.zeros([num_locs,num_locs])
        
        for i,j in itertools.product(range(num_locs), range(num_locs)):
            if dists[i,j] == 0 and i != j:
                a = np.array(locs[i])
                b = np.array(locs[j])
                dists[i,j] = np.linalg.norm(a-b)
                dists[j,i] = dists[i,j]
        
        return dists
    
    def get_scales(self,max_dist):
        # Mapping reward to [0,1] based on distance and max acceptable distance
        
        reward_scale = np.ones(self.dists.shape) - self.dists/max_dist
        reward_scale[reward_scale < 0] = 0
        
        return reward_scale

In [100]:
locs = [(0,0),(2,2),(4,4),(5,5)]
u = User(locs)

In [104]:
u.P[0]

array([0.88576017, 0.02343493, 0.09080489, 0.        ])

In [41]:
u.reward_scale

array([[1.        , 0.59593898, 0.19187796, 0.        ],
       [0.59593898, 1.        , 0.59593898, 0.39390847],
       [0.19187796, 0.59593898, 1.        , 0.79796949],
       [0.        , 0.39390847, 0.79796949, 1.        ]])

In [42]:
u.dists/7

array([[0.        , 0.40406102, 0.80812204, 1.01015254],
       [0.40406102, 0.        , 0.40406102, 0.60609153],
       [0.80812204, 0.40406102, 0.        , 0.20203051],
       [1.01015254, 0.60609153, 0.20203051, 0.        ]])

In [50]:
b = u.dists[0] > 0

In [52]:
c = a * b

In [53]:
c

array([False,  True, False, False])

In [71]:
cut_list = u.dists[0]
threshold_dist = 6
others = np.argwhere((cut_list > 0) * (cut_list < threshold_dist) == True)


In [72]:
others

array([[1],
       [2]])

In [83]:
self_weight = 0.5
num_others = others.shape[0]
self_transition = np.random.exponential(scale=1/self_weight)
others_transition = np.random.exponential(scale=1/((1-self_weight)*num_others),size=num_others)

In [84]:
others_transition

array([0.44364903, 1.49039435])

In [85]:
self_transition

2.1168835477130776

In [87]:
total = self_transition + np.sum(others_transition)

In [88]:
total

4.0509269307994815

In [89]:
others

array([[1],
       [2]])

In [98]:
np.squeeze(others).shape[0]

2

In [91]:
others.shape

(2, 1)