In [1]:
import csv

from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
import numpy as np

import warnings
warnings.filterwarnings('ignore')

TEST_DATA_FILE_1 = "test_data_1.csv"
TEST_DATA_FILE_2 = "test_data_2.csv"
TRAIN_DATA_FILE_1 = "train_data_1.csv"
TRAIN_DATA_FILE_2 = "train_data_2.csv"
PROD_INDEX = 6
DIR_INDEX = 5
ERROR_THRESHOLD_1 = 0
ERROR_THRESHOLD_2 = 0.2
ERROR_THRESHOLD_3 = 0.4

In [2]:
def read_data_file(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(round(float(row[index]),1))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(round(float(row[index]),1),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(round(float(row[index]),1),writer_value)
        value.append(writer_value)
        labels.append(round(float(row[0]),1))
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [8]:
train_data_1_labels, train_data_1_values = read_data_file(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [9]:
def get_error_metric(threshold,actual_labels,predicted_labels):
    error_count = 0
    errors = np.zeros(len(actual_labels))
    for index in range(0,len(actual_labels)):
        errors[index] = (abs(float(actual_labels[index])-float(predicted_labels[index])))
        if abs(round(float(actual_labels[index]),1)-round(float(predicted_labels[index]),1)) > threshold:
            error_count +=1
    return error_count*100/len(actual_labels),np.mean(errors),np.std(errors)

In [10]:
def get_predictions(classifier,val,data):
    if val==1:
        X = np.asarray(train_data_1_values)
        Y = np.asarray(train_data_1_labels,dtype='S6')
    else:
        X = np.asarray(train_data_2_values)
        Y = np.asarray(train_data_2_labels,dtype="S6")
    classifier.fit(X,Y)
    return classifier.predict(data)


In [11]:
classifier_1 = SVC()
obs_labels_1 = get_predictions(classifier_1,1,test_data_1_values)
classifier_2 = GaussianNB()
obs_labels_2 = get_predictions(classifier_2,1,test_data_1_values)


In [13]:
for k in np.arange(0.1,1,0.1):
    obs_labels = [ k*float(x) + (1-k)*float(y) for x,y in zip(obs_labels_1,obs_labels_2) ]    
    print 'k :',k
    error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
    error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
    error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
    print 'Errors for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
    print 'Errors for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
    print 'Errors for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

k : 0.1
Errors for Mechanism-1: Zero Tolerance: 87 0.289294677629 0.274334843524
Errors for Mechanism-1: 0.1 Tolerance: 59 0.289294677629 0.274334843524
Errors for Mechanism-1: 0.2 Tolerance: 26 0.289294677629 0.274334843524
k : 0.2
Errors for Mechanism-1: Zero Tolerance: 87 0.28338381653 0.269275626982
Errors for Mechanism-1: 0.1 Tolerance: 57 0.28338381653 0.269275626982
Errors for Mechanism-1: 0.2 Tolerance: 25 0.28338381653 0.269275626982
k : 0.3
Errors for Mechanism-1: Zero Tolerance: 86 0.277758546084 0.265807872095
Errors for Mechanism-1: 0.1 Tolerance: 56 0.277758546084 0.265807872095
Errors for Mechanism-1: 0.2 Tolerance: 24 0.277758546084 0.265807872095
k : 0.4
Errors for Mechanism-1: Zero Tolerance: 86 0.272756382518 0.263663848053
Errors for Mechanism-1: 0.1 Tolerance: 55 0.272756382518 0.263663848053
Errors for Mechanism-1: 0.2 Tolerance: 24 0.272756382518 0.263663848053
k : 0.5
Errors for Mechanism-1: Zero Tolerance: 85 0.268282128949 0.263008654985
Errors for Mechanism-1

In [14]:

classifier_1 = SVC()
obs_labels_1 = get_predictions(classifier_1,2,test_data_2_values)
classifier_2 = GaussianNB()
obs_labels_2 = get_predictions(classifier_2,2,test_data_2_values)


In [15]:
for k in np.arange(0.1,1,0.1):
    obs_labels = [ k*float(x) + (1-k)*float(y) for x,y in zip(obs_labels_1,obs_labels_2) ]    
    print 'k :',k
    error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
    error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
    error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
    print 'Errors for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
    print 'Errors for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
    print 'Errors for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

k : 0.1
Errors for Mechanism-2: Zero Tolerance: 88 0.601280830809 0.844167047833
Errors for Mechanism-2: 0.1 Tolerance: 62 0.601280830809 0.844167047833
Errors for Mechanism-2: 0.2 Tolerance: 36 0.601280830809 0.844167047833
k : 0.2
Errors for Mechanism-2: Zero Tolerance: 88 0.554089138901 0.749165656681
Errors for Mechanism-2: 0.1 Tolerance: 62 0.554089138901 0.749165656681
Errors for Mechanism-2: 0.2 Tolerance: 35 0.554089138901 0.749165656681
k : 0.3
Errors for Mechanism-2: Zero Tolerance: 88 0.507719601904 0.656099525141
Errors for Mechanism-2: 0.1 Tolerance: 60 0.507719601904 0.656099525141
Errors for Mechanism-2: 0.2 Tolerance: 35 0.507719601904 0.656099525141
k : 0.4
Errors for Mechanism-2: Zero Tolerance: 88 0.46218087408 0.566119209998
Errors for Mechanism-2: 0.1 Tolerance: 60 0.46218087408 0.566119209998
Errors for Mechanism-2: 0.2 Tolerance: 34 0.46218087408 0.566119209998
k : 0.5
Errors for Mechanism-2: Zero Tolerance: 87 0.417438338382 0.481225733298
Errors for Mechanism-2

In [16]:
avg_rating = 6.031

def read_data_file_2(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(float(row[0])-avg_rating)
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [17]:
train_data_1_labels, train_data_1_values = read_data_file_2(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file_2(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file_2(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file_2(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [18]:
classifier_1 = SVC()
obs_labels_1 = get_predictions(classifier_1,1,test_data_1_values)
classifier_2 = GaussianNB()
obs_labels_2 = get_predictions(classifier_2,1,test_data_1_values)

for k in np.arange(0.1,1,0.1):
    obs_labels = [ k*float(x) + (1-k)*float(y) for x,y in zip(obs_labels_1,obs_labels_2) ]    
    print 'k :',k
    error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
    error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
    error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
    print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
    print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
    print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

k : 0.1
Errors(2) for Mechanism-1: Zero Tolerance: 87 0.28832540026 0.273788918384
Errors(2) for Mechanism-1: 0.1 Tolerance: 46 0.28832540026 0.273788918384
Errors(2) for Mechanism-1: 0.2 Tolerance: 21 0.28832540026 0.273788918384
k : 0.2
Errors(2) for Mechanism-1: Zero Tolerance: 86 0.282327996538 0.268743792561
Errors(2) for Mechanism-1: 0.1 Tolerance: 46 0.282327996538 0.268743792561
Errors(2) for Mechanism-1: 0.2 Tolerance: 21 0.282327996538 0.268743792561
k : 0.3
Errors(2) for Mechanism-1: Zero Tolerance: 86 0.276711380355 0.265297206434
Errors(2) for Mechanism-1: 0.1 Tolerance: 44 0.276711380355 0.265297206434
Errors(2) for Mechanism-1: 0.2 Tolerance: 20 0.276711380355 0.265297206434
k : 0.4
Errors(2) for Mechanism-1: Zero Tolerance: 86 0.271648636954 0.263357822318
Errors(2) for Mechanism-1: 0.1 Tolerance: 44 0.271648636954 0.263357822318
Errors(2) for Mechanism-1: 0.2 Tolerance: 20 0.271648636954 0.263357822318
k : 0.5
Errors(2) for Mechanism-1: Zero Tolerance: 85 0.26715707485

In [19]:
classifier_1 = SVC()
obs_labels_1 = get_predictions(classifier_1,2,test_data_2_values)
classifier_2 = GaussianNB()
obs_labels_2 = get_predictions(classifier_2,2,test_data_2_values)

for k in np.arange(0.1,1,0.1):
    obs_labels = [ k*float(x) + (1-k)*float(y) for x,y in zip(obs_labels_1,obs_labels_2) ]    
    print 'k :',k
    error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
    error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
    error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
    print 'Errors(2) for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
    print 'Errors(2) for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
    print 'Errors(2) for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

k : 0.1
Errors(2) for Mechanism-2: Zero Tolerance: 88 0.640553872782 0.890678321002
Errors(2) for Mechanism-2: 0.1 Tolerance: 54 0.640553872782 0.890678321002
Errors(2) for Mechanism-2: 0.2 Tolerance: 34 0.640553872782 0.890678321002
k : 0.2
Errors(2) for Mechanism-2: Zero Tolerance: 88 0.588109043704 0.788633563623
Errors(2) for Mechanism-2: 0.1 Tolerance: 54 0.588109043704 0.788633563623
Errors(2) for Mechanism-2: 0.2 Tolerance: 34 0.588109043704 0.788633563623
k : 0.3
Errors(2) for Mechanism-2: Zero Tolerance: 88 0.536330592817 0.688542721215
Errors(2) for Mechanism-2: 0.1 Tolerance: 52 0.536330592817 0.688542721215
Errors(2) for Mechanism-2: 0.2 Tolerance: 32 0.536330592817 0.688542721215
k : 0.4
Errors(2) for Mechanism-2: Zero Tolerance: 88 0.48533102553 0.591481400731
Errors(2) for Mechanism-2: 0.1 Tolerance: 52 0.48533102553 0.591481400731
Errors(2) for Mechanism-2: 0.2 Tolerance: 32 0.48533102553 0.591481400731
k : 0.5
Errors(2) for Mechanism-2: Zero Tolerance: 87 0.43539593249