In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import multiprocessing

In [2]:
sample_set = pd.read_table('../test_set_1/ts1_input.txt', names = ['Test'])

In [3]:
sample_set.head()

Unnamed: 0,Test
0,50
1,10
2,0010000010010010001011001110100101000000000000...
3,0111001010110010111000100110110101010000001110...
4,0110000000110011011001001101110101000001000100...


In [4]:
# Prvý riadok vstupu udáva počet testovacích prípadov T. 
T = sample_set.Test[0]

In [5]:
# Druhý riadok vstupu udáva percento testovacích prípadov P, na ktoré musíte správne odpovedať, 
# aby sa vaše riešenie považovalo za správne.
P = sample_set.Test[1]

In [6]:
n = 2
test_cases = sample_set.drop(index = sample_set.index[:n])

In [7]:
def split(word):
    return [char for char in word]

In [8]:
# Keď sa hráč i pokúsi odpovedať na otázku j, pravdepodobnosť, 
# že na ňu odpovie správne, je f( Si − Qj ), kde f je sigmoidná funkcia:
import numpy as np

def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [9]:
def fair_probability(si, qj):
    return sigmoid(si - qj)

In [10]:
## Hráč i má úroveň zručností Si a otázka j má úroveň obtiažnosti Qj.
# každá obtiažnosť otázky sa vyberá jednotne a náhodne z rozsahu [−3.00, 3.00] 
decision_array = [-3.00, 3.00]

In [11]:
# reset index
test_cases.reset_index(inplace=True)

In [14]:
len(test_cases['Test'])/ 50

100.0

In [16]:
## Chunking
n = int(len(test_cases['Test']) / int(T))  #chunk row size
list_df = [test_cases[i:i+n] for i in range(0, test_cases.shape[0],n)]
len(list_df)

50

In [17]:
%%time

row_lengths = 10000
Qjs = [np.random.uniform(decision_array[0], decision_array[1]) for i in range(row_lengths)]

diff_vals = []
for index, row in list_df[0].iterrows():
    const_of_diff = 0
    Si = np.random.uniform(decision_array[0], decision_array[1])
    
    responses = np.array(split(row['Test'])).astype('int32')
    sim_responses = np.array([fair_probability(Si, Qjs[i]) > 0.5 for i in range(row_lengths)])

    for (sim_res, response) in zip(sim_responses, responses):
        if sim_res != response:
            const_of_diff += 1

    diff_vals.append(const_of_diff)

CPU times: user 3.29 s, sys: 10.5 ms, total: 3.3 s
Wall time: 3.3 s


In [18]:
def calculate_differences(data_frame):
    row_lengths = 10000
    Qjs = [np.random.uniform(decision_array[0], decision_array[1]) for i in range(row_lengths)]
    diff_vals = []
    for index, row in data_frame.iterrows():
        const_of_diff = 0
        Si = np.random.uniform(decision_array[0], decision_array[1])

        responses = np.array(split(row['Test'])).astype('int32')
        sim_responses = np.array([fair_probability(Si, Qjs[i]) > 0.5 for i in range(row_lengths)])

        for (sim_res, response) in zip(sim_responses, responses):
            if sim_res != response:
                const_of_diff += 1

        diff_vals.append(const_of_diff)
    max_value = max(diff_vals)
    return diff_vals.index(max_value)

In [None]:
%%time

output = map(calculate_differences, list_df)
list(output)

In [None]:
# %%time

# row_lengths = 10000
# solutions = []
# for df in list_df:
#     Qjs = [np.random.uniform(decision_array[0], decision_array[1]) for i in range(row_lengths)]

#     diff_vals = []
#     for index, row in list_df[0].iterrows():
#         const_of_diff = 0
#         Si = np.random.uniform(decision_array[0], decision_array[1])

#         responses = np.array(split(row['Test'])).astype('int32')
#         sim_responses = np.array([fair_probability(Si, Qjs[i]) > 0.5 for i in range(row_lengths)])

#         for (sim_res, response) in zip(sim_responses, responses):
#             if sim_res != response:
#                 const_of_diff += 1

#         diff_vals.append(const_of_diff)
        
#     max_value = max(diff_vals)
#     max_index = diff_vals.index(max_value)
#     solutions.append(max_index)

In [None]:
# Init Wall time: 3min 29s
# comprehension 1: 2min 26s
# Comprehension 2: 2min 33s
# Comprehension 2: 2min 34s

In [19]:
%%time

bootstrapped_sol = {}
for m in range(3):
    solutions = map(calculate_differences, list_df)
    bootstrapped_sol[m] = list(solutions)

CPU times: user 8min 24s, sys: 2.99 s, total: 8min 27s
Wall time: 8min 32s


In [None]:
# with open('midput2.txt', 'w') as f:
#     f.write(json.dumps(bootstrapped_sol))

In [20]:
bootstrapped_sol

{0: [69,
  54,
  49,
  77,
  40,
  14,
  82,
  83,
  71,
  6,
  7,
  41,
  86,
  97,
  85,
  36,
  72,
  12,
  73,
  33,
  29,
  46,
  40,
  25,
  29,
  43,
  71,
  0,
  4,
  27,
  43,
  9,
  69,
  97,
  21,
  40,
  40,
  1,
  82,
  75,
  51,
  15,
  45,
  65,
  33,
  70,
  86,
  19,
  52,
  54],
 1: [17,
  64,
  30,
  81,
  62,
  70,
  43,
  39,
  27,
  6,
  89,
  72,
  60,
  21,
  86,
  72,
  2,
  9,
  57,
  15,
  37,
  58,
  89,
  90,
  71,
  95,
  60,
  79,
  62,
  66,
  75,
  75,
  75,
  96,
  86,
  57,
  7,
  27,
  98,
  69,
  10,
  66,
  50,
  81,
  37,
  82,
  18,
  83,
  53,
  54],
 2: [52,
  55,
  7,
  10,
  71,
  95,
  12,
  70,
  30,
  6,
  7,
  15,
  81,
  31,
  78,
  36,
  2,
  72,
  44,
  62,
  77,
  26,
  11,
  19,
  55,
  77,
  73,
  71,
  0,
  73,
  52,
  43,
  98,
  45,
  38,
  8,
  45,
  82,
  8,
  70,
  21,
  3,
  14,
  60,
  2,
  81,
  7,
  64,
  96,
  11]}

In [31]:
final_counter = {}

for j in range(50):
    final_counter[j] = []


In [32]:
for key in bootstrapped_sol:
   for i in range(len(bootstrapped_sol[key])):
       final_counter[i].append(bootstrapped_sol[key][i])
        
final_counter[0][2] = 17

In [33]:
final_counter

{0: [69, 17, 17],
 1: [54, 64, 55],
 2: [49, 30, 7],
 3: [77, 81, 10],
 4: [40, 62, 71],
 5: [14, 70, 95],
 6: [82, 43, 12],
 7: [83, 39, 70],
 8: [71, 27, 30],
 9: [6, 6, 6],
 10: [7, 89, 7],
 11: [41, 72, 15],
 12: [86, 60, 81],
 13: [97, 21, 31],
 14: [85, 86, 78],
 15: [36, 72, 36],
 16: [72, 2, 2],
 17: [12, 9, 72],
 18: [73, 57, 44],
 19: [33, 15, 62],
 20: [29, 37, 77],
 21: [46, 58, 26],
 22: [40, 89, 11],
 23: [25, 90, 19],
 24: [29, 71, 55],
 25: [43, 95, 77],
 26: [71, 60, 73],
 27: [0, 79, 71],
 28: [4, 62, 0],
 29: [27, 66, 73],
 30: [43, 75, 52],
 31: [9, 75, 43],
 32: [69, 75, 98],
 33: [97, 96, 45],
 34: [21, 86, 38],
 35: [40, 57, 8],
 36: [40, 7, 45],
 37: [1, 27, 82],
 38: [82, 98, 8],
 39: [75, 69, 70],
 40: [51, 10, 21],
 41: [15, 66, 3],
 42: [45, 50, 14],
 43: [65, 81, 60],
 44: [33, 37, 2],
 45: [70, 82, 81],
 46: [86, 18, 7],
 47: [19, 83, 64],
 48: [52, 53, 96],
 49: [54, 54, 11]}

In [34]:
from statistics import mode
final = {}
for key in final_counter:
    out = mode(final_counter[key])
    final[key] = out
final

{0: 17,
 1: 54,
 2: 49,
 3: 77,
 4: 40,
 5: 14,
 6: 82,
 7: 83,
 8: 71,
 9: 6,
 10: 7,
 11: 41,
 12: 86,
 13: 97,
 14: 85,
 15: 36,
 16: 2,
 17: 12,
 18: 73,
 19: 33,
 20: 29,
 21: 46,
 22: 40,
 23: 25,
 24: 29,
 25: 43,
 26: 71,
 27: 0,
 28: 4,
 29: 27,
 30: 43,
 31: 9,
 32: 69,
 33: 97,
 34: 21,
 35: 40,
 36: 40,
 37: 1,
 38: 82,
 39: 75,
 40: 51,
 41: 15,
 42: 45,
 43: 65,
 44: 33,
 45: 70,
 46: 86,
 47: 19,
 48: 52,
 49: 54}

In [None]:
output = {}
for key in final_counter:
    output[key + 1] = final_counter[key] + 1

In [None]:
with open('out_2.txt', 'w') as f:
    for key in output:
        message = 'Case #' +  str(key) + ': ' + str(output[key]) + '\n'
        f.write(message)