In [1]:
import csv

import sklearn.neural_network
from sklearn.linear_model import perceptron,bayes
from sklearn.linear_model import BayesianRidge, LinearRegression
import numpy as np


import warnings
warnings.filterwarnings('ignore')

TEST_DATA_FILE_1 = "test_data_1.csv"
TEST_DATA_FILE_2 = "test_data_2.csv"
TRAIN_DATA_FILE_1 = "train_data_1.csv"
TRAIN_DATA_FILE_2 = "train_data_2.csv"
PROD_INDEX = 6
DIR_INDEX = 5
ERROR_THRESHOLD_1 = 0
ERROR_THRESHOLD_2 = 0.2
ERROR_THRESHOLD_3 = 0.4

In [2]:
def read_data_file(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(str(row[0]))
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [3]:
train_data_1_labels, train_data_1_values = read_data_file(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [4]:
def get_error_metric(threshold,actual_labels,predicted_labels):
    error = 0
    errors = np.array()
    for index in range(0,len(actual_labels)):
        if abs(float(actual_labels[index])-float(predicted_labels[index][0])) > threshold:
            error +=1
    return error*100/len(actual_labels)

In [16]:
def get_predictions(classifier,val,data):
    if val==1:
        X = np.asarray(train_data_1_values)
        Y = np.asarray(train_data_1_labels, dtype="|S6")
    else:
        X = np.asarray(train_data_2_values)
        Y = np.asarray(train_data_2_labels, dtype="|S6")
    classifier.fit(X,Y)
    return classifier.predict(data)

In [17]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,1,test_data_1_values)

In [19]:
error_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1:',error_1,error_2,error_3

Errors for Mechanism-1: 97 89 76


In [20]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,2,test_data_2_values)

In [21]:
error_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-2',error_1,error_2,error_3

Errors for Mechanism-2 99 99 99


In [11]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)

In [13]:
from sklearn import metrics
classifier.fit(X,Y)
l = classifier.predict(X)
training_error = 1 - metrics.accuracy_score(l, Y)

In [23]:
print training_error
avg_rating = 6.031

0.933362180874


In [111]:
def read_data_file_2(filename):    
    data_file = file(filename, 'r')
    data = csv.reader(data_file, delimiter=',')
    labels = []
    values = []
    for row in data:
        value = []
        prod_value = 0
        writer_value = 0
        for index in range(1,PROD_INDEX):
            value.append(float(row[index]))
        for index in range(PROD_INDEX + 1,PROD_INDEX + int(row[PROD_INDEX]) + 1):
            prod_value = max(float(row[index]),prod_value)
        value.append(prod_value)
        WRITER_INDEX = PROD_INDEX + int(row[PROD_INDEX]) + 1
        for index in range(WRITER_INDEX+1,len(row)):
            writer_value = max(float(row[index]),writer_value)
        value.append(writer_value)
        labels.append(str(float(row[0]-avg_rating)))
        values.append(value)
    print 'Done Reading File',filename
    return labels,values

In [24]:
train_data_1_labels, train_data_1_values = read_data_file(TRAIN_DATA_FILE_1)
train_data_2_labels, train_data_2_values = read_data_file(TRAIN_DATA_FILE_2)

test_data_1_labels, test_data_1_values = read_data_file(TEST_DATA_FILE_1)
test_data_2_labels, test_data_2_values = read_data_file(TEST_DATA_FILE_2)

Done Reading File train_data_1.csv
Done Reading File train_data_2.csv
Done Reading File test_data_1.csv
Done Reading File test_data_2.csv


In [25]:
classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,1,test_data_1_values)

In [None]:
error_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_1_labels,obs_labels)
error_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_1_labels,obs_labels)
error_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_1_labels,obs_labels)
print 'Errors for Mechanism-1:',error_1,error_2,error_3

classifier = perceptron.Perceptron(n_iter=1000, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
obs_labels = get_predictions(classifier,2,test_data_2_values)

error_1 = get_error_metric(ERROR_THRESHOLD_1,test_data_2_labels,obs_labels)
error_2 = get_error_metric(ERROR_THRESHOLD_2,test_data_2_labels,obs_labels)
error_3 = get_error_metric(ERROR_THRESHOLD_3,test_data_2_labels,obs_labels)
print 'Errors for Mechanism-2:',error_1,error_2,error_3
