In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.metrics import precision_recall_curve, average_precision_score, roc_auc_score, roc_curve
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [9]:
Z_trn = np.loadtxt( "train.dat" )
Z_tst = np.loadtxt( "test.dat" )

In [3]:
def binary_to_decimal(data, row):
    binary1 = ''
    binary2 = ''

    for i in data[row][64:68]:
        binary1 += str(int(i))

    for j in data[row][68:72]:
        binary2 += str(int(j))

    p = int(binary1, 2)
    q = int(binary2, 2)

    return [p, q]

In [4]:
def create_feature(data):
    for challenge in data:
        np.append(challenge, 1.0)
    return data

In [5]:
def transform_train_data(data):
	n = np.shape(data)[0]
	trans_data = []
	for row in range(n):
		challenge = np.append(np.append(data[row][0:64], binary_to_decimal(data, row)), data[row][-1])
		trans_data.append(challenge)
	trans_data = np.array(trans_data)
	return trans_data

In [6]:
def transform_test_data(data):
	n = np.shape(data)[0]
	trans_data = []
	for row in range(n):
		challenge = np.append(data[row][0:64], binary_to_decimal(data, row))
		trans_data.append(challenge)
	trans_data = np.array(trans_data)
	return trans_data

In [7]:
def my_fit( Z_train ):
	model = {}
	Z_train = transform_train_data(Z_train)

	train_data = {}

	for challenge in Z_train:
		p = int(challenge[64])
		q = int(challenge[65])
		key = None
		if p < q:
			key = str(p) + '$' + str(q)
		else:
			key = str(q) + '$' + str(p)

		if train_data.get(key) is None:
			train_data[key] = np.empty((0, 65), float)

		challenge = np.delete(challenge, [64, 65])
		if p < q:
			train_data[key] = np.append(train_data[key], np.array([challenge]), axis=0)
		else:
			challenge[-1] = 1.0 - challenge[-1]
			train_data[key] = np.append(train_data[key], np.array([challenge]), axis=0)

	for key, data in train_data.items():
		if model.get(key) is None:
			model[key] = LogisticRegression(C=100, max_iter=500)
		X = create_feature(data[:, :-1])
		y = data[:, -1]

		model[key].fit(X, y)

	return model

In [8]:
def my_predict(X_tst, model):
################################
#  Non Editable Region Ending  #
################################
	X_tst = transform_test_data(X_tst)
	predicted_response = []

	for challenge in X_tst:
		p = int(challenge[64])
		q = int(challenge[65])

		challenge = np.delete(challenge, [64, 65])

		if (p < q):
			key = str(p) + '$' + str(q)
			predicted_response.append((model[key].predict(create_feature([challenge])))[0])
		else:
			key = str(q) + '$' + str(p)
			predicted_response.append(1.0 - (model[key].predict(create_feature([challenge])))[0])

	predicted_response = np.array(predicted_response)
	return predicted_response

In [10]:
model = my_fit(Z_trn)