In [2]:
import csv

from sklearn.linear_model import perceptron,bayes
import numpy as np
import time

import warnings
warnings.filterwarnings('ignore')

TEST_DATA_FILE_1 = "test_data_1.csv"
TEST_DATA_FILE_2 = "test_data_2.csv"
TRAIN_DATA_FILE_1 = "train_data_1.csv"
TRAIN_DATA_FILE_2 = "train_data_2.csv"
PROD_INDEX = 6
DIR_INDEX = 5
ERROR_THRESHOLD_1 = 0
ERROR_THRESHOLD_2 = 0.2
ERROR_THRESHOLD_3 = 0.4

In [3]:
def read_data_file(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(str(row[0]))
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [4]:
train_data_1_labels, train_data_1_values = read_data_file(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [5]:
def get_error_metric(threshold,actual_labels,predicted_labels):
    error_count = 0
    errors = np.zeros(len(actual_labels))
    for index in range(0,len(actual_labels)):
        errors[index] = (abs(float(actual_labels[index])-float(predicted_labels[index][0])))
        if abs(float(actual_labels[index])-float(predicted_labels[index][0])) > threshold:
            error_count +=1
    return error_count*100/len(actual_labels),np.mean(errors),np.std(errors)

In [6]:
def get_predictions(classifier,val,data):
    if val==1:
        X = np.asarray(train_data_1_values)
        Y = np.asarray(train_data_1_labels, dtype="|S6")
    else:
        X = np.asarray(train_data_2_values)
        Y = np.asarray(train_data_2_labels, dtype="|S6")
    classifier.fit(X,Y)
    return classifier.predict(data)

In [7]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
t = time.time()
obs_labels = get_predictions(classifier,1,test_data_1_values)
print time.time() -t

100.490999937


In [72]:
error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

 Errors for Mechanism-1: Zero Tolerance: 99 4.20696668109 1.71374137261
Errors for Mechanism-1: 0.1 Tolerance: 99 4.20696668109 1.71374137261
Errors for Mechanism-1: 0.2 Tolerance: 97 4.20696668109 1.71374137261


In [None]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,2,test_data_2_values)

In [48]:
error_1,mean_1,std_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3= get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors for Mechanism-2: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors for Mechanism-2: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors for Mechanism-2: 0.2 Tolerance:',error_3,mean_3,std_3

Errors for Mechanism-1: Zero Tolerance: 96 0.742405884898 0.656520146896
Errors for Mechanism-1: 0.1 Tolerance: 86 0.742405884898 0.656520146896
Errors for Mechanism-1: 0.2 Tolerance: 68 0.742405884898 0.656520146896


In [49]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)

In [50]:
from sklearn import metrics
classifier.fit(X,Y)
l = classifier.predict(X)
training_error = 1 - metrics.accuracy_score(l, Y)

In [52]:
print training_error
avg_rating = 6.031

0.967546516659


In [55]:
def read_data_file_2(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(str(float(row[0])-avg_rating))
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [56]:
train_data_1_labels, train_data_1_values = read_data_file_2(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file_2(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file_2(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file_2(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [57]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,1,test_data_1_values)

In [63]:
def get_error_metric_2(threshold,actual_labels,predicted_labels):
    error_count = 0
    errors = np.zeros(len(actual_labels))
    for index in range(0,len(actual_labels)):
        errors[index] = (abs(float(actual_labels[index])-float(predicted_labels[index])))
        if abs(float(actual_labels[index])-float(predicted_labels[index])) > threshold:
            error_count +=1
    return error_count*100/len(actual_labels),np.mean(errors),np.std(errors)

error_1,mean_1,std_1 = get_error_metric_2(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric_2(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3,mean_3,std_3 = get_error_metric_2(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3


classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1,mean_1,std_1 = get_error_metric_2(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2,mean_2,std_2 = get_error_metric_2(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3,mean_3,std_3= get_error_metric_2(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors(2) for Mechanism-1: Zero Tolerance:',error_1,mean_1,std_1
print 'Errors(2) for Mechanism-1: 0.1 Tolerance:',error_2,mean_2,std_2
print 'Errors(2) for Mechanism-1: 0.2 Tolerance:',error_3,mean_3,std_3

Errors(2) for Mechanism-1: Zero Tolerance: 96 0.933232366941 0.71997756895
Errors(2) for Mechanism-1: 0.1 Tolerance: 87 0.933232366941 0.71997756895
Errors(2) for Mechanism-1: 0.2 Tolerance: 72 0.933232366941 0.71997756895
Errors(2) for Mechanism-1: Zero Tolerance: 93 0.711856339247 0.738475611967
Errors(2) for Mechanism-1: 0.1 Tolerance: 72 0.711856339247 0.738475611967
Errors(2) for Mechanism-1: 0.2 Tolerance: 50 0.711856339247 0.738475611967


In [64]:
print 'HERE'

HERE
