In [1]:
import numpy as np
# This is the only scipy method you are allowed to use
# Use of scipy is not allowed otherwise
from scipy.linalg import khatri_rao
import random as rnd
import time as tm

In [12]:
def get_renamed_labels( y ):
################################
#  Non Editable Region Ending  #
################################

	# Since the dataset contain 0/1 labels and SVMs prefer -1/+1 labels,
	# Decide here how you want to rename the labels
	# For example, you may map 1 -> 1 and 0 -> -1 or else you may want to go with 1 -> -1 and 0 -> 1
	# Use whatever convention you seem fit but use the same mapping throughout your code
	# If you use one mapping for train and another for test, you will get poor accuracy
	
	y_new=2*y-1
	return y_new.reshape( ( y_new.size, ) )					# Reshape y_new as a vector

In [3]:
################################
def get_features( X ):
################################
#  Non Editable Region Ending  #
################################

	# Use this function to transform your input features (that are 0/1 valued)
	# into new features that can be fed into a linear model to solve the problem
	# Your new features may have a different dimensionality than the input features
	# For example, in this application, X will be 8 dimensional but your new
	# features can be 2 dimensional, 10 dimensional, 1000 dimensional, 123456 dimensional etc
	# Keep in mind that the more dimensions you use, the slower will be your solver too
	# so use only as many dimensions as are absolutely required to solve the problem
	
	return X_new



In [4]:
def solver( X, y, timeout, spacing ):
	(n, d) = X.shape
	t = 0
	totTime = 0
	
	# W is the model vector and will get returned once timeout happens
	# B is the bias term that will get returned once timeout happens
	# The bias term is optional. If you feel you do not need a bias term at all, just keep it set to 0
	# However, if you do end up using a bias term, you are allowed to internally use a model vector
	# that hides the bias inside the model vector e.g. by defining a new variable such as
	# W_extended = np.concatenate( ( W, [B] ) )
	# However, you must maintain W and B variables separately as well so that they can get
	# returned when timeout happens. Take care to update W, B whenever you update your W_extended
	# variable otherwise you will get wrong results.
	# Also note that the dimensionality of W may be larger or smaller than 9
	
	W = []
	B = 0
	tic = tm.perf_counter()
################################
#  Non Editable Region Ending  #
################################

	# You may reinitialize W, B to your liking here e.g. set W to its correct dimensionality
	# You may also define new variables here e.g. step_length, mini-batch size etc

################################
# Non Editable Region Starting #
################################
	while True:
		t = t + 1
		if t % spacing == 0:
			toc = tm.perf_counter()
			totTime = totTime + (toc - tic)
			if totTime > timeout:
				return ( W.reshape( ( W.size, ) ), B, totTime )			# Reshape W as a vector
			else:
				tic = tm.perf_counter()
################################
#  Non Editable Region Ending  #
################################

		# Write all code to perform your method updates here within the infinite while loop
		# The infinite loop will terminate once timeout is reached
		# Do not try to bypass the timer check e.g. by using continue
		# It is very easy for us to detect such bypasses which will be strictly penalized
		
		# Note that most likely, you should be using get_features( X ) and get_renamed_labels( y )
		# in this part of the code instead of X and y -- please take care
		
		# Please note that once timeout is reached, the code will simply return W, B
		# Thus, if you wish to return the average model (as is sometimes done for GD),
		# you need to make sure that W, B store the averages at all times
		# One way to do so is to define a "running" variable w_run, b_run
		# Make all GD updates to W_run e.g. W_run = W_run - step * delW (similarly for B_run)
		# Then use a running average formula to update W (similarly for B)
		# W = (W * (t-1) + W_run)/t
		# This way, W, B will always store the averages and can be returned at any time
		# In this scheme, W, B play the role of the "cumulative" variables in the course module optLib (see the cs771 library)
		# W_run, B_run on the other hand, play the role of the "theta" variable in the course module optLib (see the cs771 library)
		
	return ( W.reshape( ( W.size, ) ), B, totTime )			# This return statement will never be reached

EVAL.PY

In [5]:
import numpy as np
from submit import get_renamed_labels
from submit import solver
from submit import get_features
import time as tm


In [6]:
def get_hinge_loss( X, y, w, b ):
	XX = get_features( X )
	scores = XX.dot( w ) + b
	hinge_loss = 1 - np.multiply( scores, get_renamed_labels( y ) )
	hinge_loss[ hinge_loss < 0 ] = 0
	return np.average( hinge_loss )


In [7]:
def get_misclassification_rate( X, y, w, b ):
	XX = get_features( X )
	scores = XX.dot( w ) + b
	predictions = np.ones_like( scores )
	predictions[ scores < 0 ] = -1
	return 1 - np.average( get_renamed_labels( y ) == predictions )


In [8]:
Z_trn = np.loadtxt( "secret_train.dat" )
Z_tst = np.loadtxt( "secret_test.dat" )


In [9]:
num_trials = 5

# Try various timeouts - the timeouts are in seconds
timeouts = np.array( [ 0.2, 0.5, 1, 2, 5 ] )

# Try checking for timeout every 10 iterations
spacing = 10

result = np.zeros( ( len( timeouts ), 4 ) )

In [10]:
for i in range( len( timeouts ) ):
	to = timeouts[i]
	print( "\ttimeout =", to )
	avg_hinge = 0
	avg_error = 0
	avg_time_reported = 0
	avg_time_wrapper = 0
	for t in range( num_trials ):
		print( "\t\tTrial %d of %d" % ( t + 1, num_trials ) )
		tic = tm.perf_counter()
		( w, b, totTime ) = solver( Z_trn[:,:-1], Z_trn[:,-1], to, spacing )
		toc = tm.perf_counter()
		avg_hinge += get_hinge_loss( Z_tst[:,:-1], Z_tst[:,-1], w, b )
		avg_error += get_misclassification_rate( Z_tst[:,:-1], Z_tst[:,-1], w, b )
		avg_time_reported += totTime
		avg_time_wrapper += toc - tic
	result[i, 0] = avg_hinge/num_trials
	result[i, 1] = avg_error/num_trials
	result[i, 2] = avg_time_reported/num_trials
	result[i, 3] = avg_time_wrapper/num_trials

np.savetxt( "result", result, fmt = "%.6f" )

	timeout = 0.2
		Trial 1 of 5


AttributeError: 'list' object has no attribute 'reshape'