In [1]:
import csv

from sklearn.neighbors import KNeighborsRegressor
import numpy as np
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

TEST_DATA_FILE_1 = "test_data_1.csv"
TEST_DATA_FILE_2 = "test_data_2.csv"
TRAIN_DATA_FILE_1 = "train_data_1.csv"
TRAIN_DATA_FILE_2 = "train_data_2.csv"
PROD_INDEX = 6
DIR_INDEX = 5
ERROR_THRESHOLD_1 = 0
ERROR_THRESHOLD_2 = 0.2
ERROR_THRESHOLD_3 = 0.4

In [2]:
def read_data_file(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(float(row[0]))
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [33]:
train_data_1_labels, train_data_1_values = read_data_file(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [34]:
def get_error_metric(threshold,actual_labels,predicted_labels):
    error_count = 0
    errors = np.zeros(len(actual_labels))
    for index in range(0,len(actual_labels)):
        errors[index] = (abs(float(actual_labels[index])-float(predicted_labels[index])))
        if abs(float(actual_labels[index])-float(predicted_labels[index])) > threshold:
            error_count +=1
    return error_count*100/len(actual_labels),np.mean(errors),np.std(errors)

In [35]:
def get_predictions(classifier,val,data):
    if val==1:
        X = np.asarray(train_data_1_values)
        Y = np.asarray(train_data_1_labels,dtype='S6')
    else:
        X = np.asarray(train_data_2_values)
        Y = np.asarray(train_data_2_labels,dtype="S6")
    classifier.fit(X,Y)
    return classifier.predict(data)


In [36]:
classifier = KNeighborsClassifier()
obs_labels = get_predictions(classifier,1,test_data_1_values)

In [37]:
error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3



Errors for Mechanism-1: Zero Tolerance: 79 0.31583729987 0.370785333084
Errors for Mechanism-1: 0.1 Tolerance: 50 0.31583729987 0.370785333084
Errors for Mechanism-1: 0.2 Tolerance: 27 0.31583729987 0.370785333084


In [19]:
classifier = KNeighborsClassifier()
obs_labels = get_predictions(classifier,2,test_data_2_values)

In [20]:
error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

Errors for Mechanism-2: Zero Tolerance: 78 0.349632193855 0.428938687718
Errors for Mechanism-2: 0.1 Tolerance: 50 0.349632193855 0.428938687718
Errors for Mechanism-2: 0.2 Tolerance: 31 0.349632193855 0.428938687718


In [21]:
print 'n_neighbors:10'
classifier = KNeighborsClassifier(n_neighbors=10)
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3  

print 'n_neighbors:15'
classifier = KNeighborsClassifier(n_neighbors=15)
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

print 'n_neighbors:20'
classifier = KNeighborsClassifier(n_neighbors=20)
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

n_neighbors:10
Errors for Mechanism-1: Zero Tolerance: 80 0.282518390307 0.325953478009
Errors for Mechanism-1: 0.1 Tolerance: 48 0.282518390307 0.325953478009
Errors for Mechanism-1: 0.2 Tolerance: 25 0.282518390307 0.325953478009
n_neighbors:15
Errors for Mechanism-1: Zero Tolerance: 81 0.276287321506 0.311540987299
Errors for Mechanism-1: 0.1 Tolerance: 49 0.276287321506 0.311540987299
Errors for Mechanism-1: 0.2 Tolerance: 24 0.276287321506 0.311540987299
n_neighbors:20
Errors for Mechanism-1: Zero Tolerance: 80 0.275638251839 0.312233223959
Errors for Mechanism-1: 0.1 Tolerance: 49 0.275638251839 0.312233223959
Errors for Mechanism-1: 0.2 Tolerance: 24 0.275638251839 0.312233223959


In [22]:
print 'n_neighbors:10'
classifier = KNeighborsClassifier(n_neighbors=10)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3  

print 'n_neighbors:15'
classifier = KNeighborsClassifier(n_neighbors=15)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

print 'n_neighbors:20'
classifier = KNeighborsClassifier(n_neighbors=20)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

n_neighbors:10
Errors for Mechanism-2: Zero Tolerance: 78 0.288533102553 0.347735536181
Errors for Mechanism-2: 0.1 Tolerance: 48 0.288533102553 0.347735536181
Errors for Mechanism-2: 0.2 Tolerance: 25 0.288533102553 0.347735536181
n_neighbors:15
Errors for Mechanism-2: Zero Tolerance: 79 0.283729987019 0.327921906773
Errors for Mechanism-2: 0.1 Tolerance: 49 0.283729987019 0.327921906773
Errors for Mechanism-2: 0.2 Tolerance: 25 0.283729987019 0.327921906773
n_neighbors:20
Errors for Mechanism-2: Zero Tolerance: 79 0.27797490264 0.314298599946
Errors for Mechanism-2: 0.1 Tolerance: 49 0.27797490264 0.314298599946
Errors for Mechanism-2: 0.2 Tolerance: 24 0.27797490264 0.314298599946


In [23]:
avg_rating = 6.031

def read_data_file_2(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(float(row[0])-avg_rating)
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

train_data_1_labels, train_data_1_values = read_data_file_2(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file_2(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file_2(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file_2(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [25]:
print 'n_neighbors:5'
classifier = KNeighborsClassifier()
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

n_neighbors:5
Errors(2) for Mechanism-1: Zero Tolerance: 97 0.307659022068 0.350714443782
Errors(2) for Mechanism-1: 0.1 Tolerance: 46 0.307659022068 0.350714443782
Errors(2) for Mechanism-1: 0.2 Tolerance: 27 0.307659022068 0.350714443782


In [26]:
print 'n_neighbors:5'
classifier = KNeighborsClassifier()
obs_labels = get_predictions(classifier,2,test_data_2_values)

n_neighbors:5


In [27]:
error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors(2) for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

Errors(2) for Mechanism-2: Zero Tolerance: 97 0.315404586759 0.379238114764
Errors(2) for Mechanism-2: 0.1 Tolerance: 45 0.315404586759 0.379238114764
Errors(2) for Mechanism-2: 0.2 Tolerance: 27 0.315404586759 0.379238114764


In [28]:
print 'n_neighbors:10'
classifier = KNeighborsClassifier(n_neighbors=10)
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3  

print 'n_neighbors:15'
classifier = KNeighborsClassifier(n_neighbors=15)
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

print 'n_neighbors:20'
classifier = KNeighborsClassifier(n_neighbors=20)
obs_labels = get_predictions(classifier,1,test_data_1_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

n_neighbors:10
Errors(2) for Mechanism-1: Zero Tolerance: 98 0.291778450887 0.328591259373
Errors(2) for Mechanism-1: 0.1 Tolerance: 45 0.291778450887 0.328591259373
Errors(2) for Mechanism-1: 0.2 Tolerance: 25 0.291778450887 0.328591259373
n_neighbors:15
Errors(2) for Mechanism-1: Zero Tolerance: 98 0.28649935093 0.317987219568
Errors(2) for Mechanism-1: 0.1 Tolerance: 45 0.28649935093 0.317987219568
Errors(2) for Mechanism-1: 0.2 Tolerance: 24 0.28649935093 0.317987219568
n_neighbors:20
Errors(2) for Mechanism-1: Zero Tolerance: 98 0.286585893553 0.315902040135
Errors(2) for Mechanism-1: 0.1 Tolerance: 46 0.286585893553 0.315902040135
Errors(2) for Mechanism-1: 0.2 Tolerance: 24 0.286585893553 0.315902040135


In [29]:
print 'n_neighbors:10'
classifier = KNeighborsClassifier(n_neighbors=10)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors(2) for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3  

print 'n_neighbors:15'
classifier = KNeighborsClassifier(n_neighbors=15)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors(2) for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

print 'n_neighbors:20'
classifier = KNeighborsClassifier(n_neighbors=20)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors(2) for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

n_neighbors:10
Errors(2) for Mechanism-2: Zero Tolerance: 97 0.287840761575 0.339985189053
Errors(2) for Mechanism-2: 0.1 Tolerance: 44 0.287840761575 0.339985189053
Errors(2) for Mechanism-2: 0.2 Tolerance: 25 0.287840761575 0.339985189053
n_neighbors:15
Errors(2) for Mechanism-2: Zero Tolerance: 97 0.287494591086 0.332156971073
Errors(2) for Mechanism-2: 0.1 Tolerance: 44 0.287494591086 0.332156971073
Errors(2) for Mechanism-2: 0.2 Tolerance: 24 0.287494591086 0.332156971073
n_neighbors:20
Errors(2) for Mechanism-2: Zero Tolerance: 97 0.283556901774 0.323930295152
Errors(2) for Mechanism-2: 0.1 Tolerance: 45 0.283556901774 0.323930295152
Errors(2) for Mechanism-2: 0.2 Tolerance: 23 0.283556901774 0.323930295152
