In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
type_weights = {
    "visited": 0.6,
    "typed": 0.2,
    "bookmarked": 0.2,
}

In [3]:
recency_weights = {
    "4-days": 0.15,
    "14-days": 0.15,
    "31-days": 0.15,
    "90-days": 0.2,
    "other_recency": 0.35
}

In [4]:
def combine_dicts_multiplicatively(dict1, dict2):
   
    weights = {}

    for key1, weight1 in dict1.items():
        for key2, weight2 in dict2.items():
            key = (key1, key2)
            weight = weight1 * weight2
            weights[key] = weight
            
    return weights

In [6]:
weights = combine_dicts_multiplicatively(type_weights, recency_weights)
#print(weights)

{('visited', '4-days'): 0.09, ('visited', '14-days'): 0.09, ('visited', '31-days'): 0.09, ('visited', '90-days'): 0.12, ('visited', 'other_recency'): 0.21, ('typed', '4-days'): 0.03, ('typed', '14-days'): 0.03, ('typed', '31-days'): 0.03, ('typed', '90-days'): 0.04000000000000001, ('typed', 'other_recency'): 0.06999999999999999, ('bookmarked', '4-days'): 0.03, ('bookmarked', '14-days'): 0.03, ('bookmarked', '31-days'): 0.03, ('bookmarked', '90-days'): 0.04000000000000001, ('bookmarked', 'other_recency'): 0.06999999999999999}


In [7]:
def con_vect(num_choices, vector):
    return np.eye(num_choices)[vector]

In [8]:
def sample_weighted(num_samples, weight_dict):
    num_choices = len(weight_dict)
    choice_weights = list(weight_dict.values())
    samples = np.random.choice(num_choices, num_samples, p=choice_weights)
    return con_vect(num_choices, samples)


In [9]:
def sample_url_features(num_samples):
    return sample_weighted(num_samples, weights)

In [10]:
type_points = {
    "visited": 1.2,
    "typed": 2,
    "bookmarked": 1.4,
    #"other_type": 0
}
recency_points = {
    "4-days": 100,
    "14-days": 70,
    "31-days": 50,
    "90-days": 30,
    "other_recency": 10
}
frecency_points_dict = combine_dicts_multiplicatively(type_points, recency_points)
#print(frecency_points_dict)

{('visited', '4-days'): 120.0, ('visited', '14-days'): 84.0, ('visited', '31-days'): 60.0, ('visited', '90-days'): 36.0, ('visited', 'other_recency'): 12.0, ('typed', '4-days'): 200, ('typed', '14-days'): 140, ('typed', '31-days'): 100, ('typed', '90-days'): 60, ('typed', 'other_recency'): 20, ('bookmarked', '4-days'): 140.0, ('bookmarked', '14-days'): 98.0, ('bookmarked', '31-days'): 70.0, ('bookmarked', '90-days'): 42.0, ('bookmarked', 'other_recency'): 14.0}


In [11]:
key_order = weights.keys()
#print(key_order)

frecency_points = np.array([frecency_points_dict[key] for key in key_order])
#print(frecency_points)

dict_keys([('visited', '4-days'), ('visited', '14-days'), ('visited', '31-days'), ('visited', '90-days'), ('visited', 'other_recency'), ('typed', '4-days'), ('typed', '14-days'), ('typed', '31-days'), ('typed', '90-days'), ('typed', 'other_recency'), ('bookmarked', '4-days'), ('bookmarked', '14-days'), ('bookmarked', '31-days'), ('bookmarked', '90-days'), ('bookmarked', 'other_recency')])
[120.  84.  60.  36.  12. 200. 140. 100.  60.  20. 140.  98.  70.  42.
  14.]


In [12]:
zip(key_order, frecency_points)

<zip at 0x22c7b86e600>

In [13]:
def frecency(url_features):
    return url_features.dot(frecency_points)

In [15]:
def sample(num_samples):
    X = sample_url_features(num_samples)
    y = frecency(X)
    return X, y

In [16]:
X, y = sample(len(frecency_points))
#print('len',len(frecency_points))
#print("\nX values: ",X)
#print("\ny values: ",y)

len 15

X values:  [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]

y values:  [ 84.  12.  20.  12.  42.  84.  36.  60.  36. 100.  14.  12.  84.  20.
  20.]


In [17]:
n = 100

In [18]:
num_options = np.random.normal(loc=10, scale=4, size=(n))
#print("random : ",num_options)
num_options = np.maximum(num_options, 1)
#print("\nmax : ",num_options)
num_options = np.int32(num_options)
#print("\nint : ",num_options)

random :  [ 8.9673457  15.86894058 15.49655596 11.45536083  8.50311933 10.35802024
 10.39965519 10.19629141 10.96999223  4.52998714 11.05479096 10.66682773
  8.1216147   9.94614728 13.76138845 13.50924987 12.24090843 12.90399001
  7.18584612 10.03126303 10.82693848  1.91575637  9.90535765  4.88634864
 13.91713294  8.43069383  8.31384317 13.66244662  8.49807667  8.41354141
 15.27133412  9.30055155 10.81582811  3.23568331  8.79825578  0.97236698
 12.25450912  9.51079033 13.57909795  9.69691681  9.99289639 12.98184399
 13.38512188 11.66705226  4.47442852 10.09967966 10.58723699  9.05055438
  4.44859889  3.07054983  2.91520412 10.6769628   8.22025189  7.87970617
 15.66201457  5.58652063 15.40128192 13.27465184 12.97365485  5.6480924
 15.20506032  3.25667415  6.8284418   6.36510244  6.18643367 10.79980737
  7.50858286 12.0567863   8.79523882  8.50463147 19.84929319  8.23616587
 11.32236749 16.46865257 -0.47921862 13.50375886 11.58075844  2.3017558
  5.32160203 10.50158338 15.18171337  7.919

In [20]:
def sample_num_options(n):
    num_options = np.random.normal(loc=10, scale=4, size=(n))
    num_options = np.maximum(num_options, 1)
    return np.int32(num_options)

In [27]:
def sample_suggestions_normal(n):
    num_options = sample_num_options(n)
    #print("SSN",num_options)
    data = map(sample, num_options)
    #print("SSN",data)
    X, y = zip(*data)
    #print("\nX",X)
    #print("\ny",y)
    return X, y

In [26]:
#sample_suggestions_normal(n)

SSN [16  8 18 14  6  6  9  8 18 10  8 14  8 13 12  8  9 13 16 11  4  5  8 14
  6  7 10 14  5  9 14 14 12 10 11  6 15 14  9 11  5  7 12  1  4 10  6  7
  9  6 11  8  2 13 13 15  9  8 10 10 10  7  6 14  4 10 13  6  5 12 15 13
  7 14  9  9  1  8 14 11  8  8 11  4 15  2 10  5  6  9 14 12  7  8  6 11
 12 13  9 16]


((array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      