In [175]:
import pandas as pd
import numpy as np
from scipy import sparse
import cpp.python_modules.ML as ML

In [225]:
train_data_x = pd.read_csv('train_data_x.csv')
test_data_x = pd.read_csv('test_data_x.csv')
test_data_y = pd.read_csv('test_data_y.csv')

In [251]:
amount_of_days = 1099
amount_of_weeks = (amount_of_days+1)//7
days_in_week = 7
days_in_last_week = [day for day in np.arange(amount_of_days-6,amount_of_days+1)]




In [258]:
def get_client_visits(client_id):
    visits = test_data_x.visits[client_id-1]
    visits = visits.replace(']','')
    visits = visits.replace('[','')
    visits = visits.replace(',','')
    return [int(visit) for visit in visits.split()]

In [259]:
def create_sparse_matrix_from_data(data,shape):
    row_indices = []
    column_indices = []
    
    for number, element in enumerate(data):
        row_indices.append((element-1)// days_in_week)
        column_indices.append((element-1)%days_in_week)
    elements = [True for index in range(len(column_indices))]
    return sparse.csr_matrix((elements,(row_indices,column_indices)),shape=shape,dtype=int)

def create_client_visits_matrix(client_id):
    visits = get_client_visits(client_id)
    return create_sparse_matrix_from_data(visits,(amount_of_weeks,days_in_week))


In [260]:
def create_classic_weights(size,delta=1):
    weights = [((size-i+1)/size)**delta for i in range(size)]
    weights_sum = sum(weights)
    return np.array([weight/weights_sum for weight in weights])

In [261]:
def get_weekly_probabilities(client_id):
   client_matrix = create_client_visits_matrix(client_id)
   weights = create_classic_weights(client_matrix.get_shape()[0])
   return weights.transpose() * client_matrix

def get_first_weekly_probabilities(client_id):
    weekly_probabilities = get_weekly_probabilities(client_id)
    first_weekly_probabilities = []
    for j,p in enumerate(weekly_probabilities):
        weekly_probabilities_neg = np.array([1-pj for pj in weekly_probabilities[0:j]])
        first_weekly_probabilities.append(p*weekly_probabilities_neg.prod())
    return first_weekly_probabilities

def get_first_day(client_id):
    return np.argmax(get_first_weekly_probabilities(client_id))+1

In [277]:
def check_accuracy():
    accuracy = 0
    for id in test_data_x.id:
        first_day = get_first_day(id)
        if(first_day == list(test_data_y[test_data_y.id == id].nextvisit).pop()):
            accuracy += 1
    return accuracy/len(test_data_y.id)

In [278]:
check_accuracy()

0.11715666666666667