In [3]:
# First, create a whole backlog of plans. Each index corresponds to a S-1 feature. Create it randomly 
# so that you have some variety in different frequencies of features
# Second, extract frequencies of all features present in a plan. Store everything in a dictionary for
# faster lookups
# Third, look how much time it takes for selecting plans in each round. Need to do that by applying a simple
# formulae to each plan - > for each feature f add its frequency count
# Fourth, Once a plan is selected, apply the formulae again over all the plans. 


In [72]:
import numpy as np
from itertools import combinations, product
import time

In [76]:
# Creating backlog plans
class Param:
    # class to hold all the parameters for this script
    def __init__(self):
        
        self.number_of_s1_features = 30
        self.min_on_s1_features = 3
        self.max_on_s1_features = 10
        self.backlog_size = 10000
        self.seed = 0
        self.max_feature_level = 5
        self.feature_to_count = {} # creating dictionary to map s-feature to its index in main plan-feature array
        self.number_of_rounds = 6
        self.number_of_plans_per_round = 10
        
        s1_features = list(range(self.number_of_s1_features))
        for single_feature_level in np.arange(1, self.max_feature_level + 1):
            for single_combination in product(s1_features, repeat=int(single_feature_level)):
                feature_string = "-".join([str(t1) for t1 in single_combination])
                self.feature_to_count[feature_string] = 0 # feature_count becomes its index
        print("Feature to idx dict created. Total number of features: %d" % len(self.feature_to_count))

param = Param()
np.random.seed(param.seed)


Feature to idx dict created. Total number of features: 25137930


In [86]:
backlog = []
backlog_to_features_array = []
plan_to_features = {}
for plan_number in range(param.backlog_size):
    r1 = np.random.randint(param.min_on_s1_features, param.max_on_s1_features + 1)
    plan_features_present = np.random.choice(np.arange(param.number_of_s1_features), r1, replace = True) # replace is true because a plan can have a repeated feature 
    backlog.append(plan_features_present) # each plan is being represented by the s-1 features present in it
    # create plan vector array which has value 1 for each s-k feature
    all_s_k_features = []
    for single_feature_level in np.arange(1, param.max_feature_level + 1):
        for single_combination in combinations(plan_features_present, single_feature_level):
            feature_string = "-".join([str(t1) for t1 in single_combination])
            param.feature_to_count[feature_string] += 1
            all_s_k_features.append(feature_string)
    plan_to_features[plan_number] = all_s_k_features
print("Backlog generated, features captured and feature frequency computed")

total backlog plans generated 0
total backlog plans generated 1000
total backlog plans generated 2000
total backlog plans generated 3000
total backlog plans generated 4000
total backlog plans generated 5000
total backlog plans generated 6000
total backlog plans generated 7000
total backlog plans generated 8000
total backlog plans generated 9000
Backlog generated, features captured and feature frequency computed


In [105]:
# now simulate the rounds here, where we loop over the backlog plans, compute a score for each of them
# and then pick the plan with the highest score
features_shown = set()
plans_to_show = []
for round_number in range(param.number_of_rounds):
    start = time.process_time()    
    for plan_number in range(param.number_of_plans_per_round):
        scores_list = []
        for plan_idx, single_plan in enumerate(backlog):
            plan_score = 0
            for single_feature in plan_to_features[plan_idx]:
                if single_feature not in features_shown:
                    plan_score += param.feature_to_count[single_feature]
            scores_list.append(plan_score)
        selected_plan_idx = np.argmax(scores_list)
        selected_plan = backlog[selected_plan_idx]
        plans_to_show.append(selected_plan)
        features_shown.update(plan_to_features[selected_plan_idx])
    print("Round %d time taken:" % (round_number + 1))
    print(time.process_time() - start)
print("Total features shown: %d / %d" %(len(features_shown), len(param.feature_to_count)))

Round 1 time taken:
7.986218043000008
Round 2 time taken:
7.8222373260000495
Round 3 time taken:
7.784362937999958
Round 4 time taken:
7.758068710000316
Round 5 time taken:
7.870916541000042
Round 6 time taken:
7.613118697000118
Total features shown: 24020 / 25137930


In [107]:
ftc = list(param.feature_to_count.items())




('0', 6289)