In [1]:
%matplotlib inline

In [2]:
import matplotlib.pyplot as plt

import scipy as sp
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [3]:
# Testing Logistic Regression for p vs. C and Noise

# Data with no tilt

# Data with tilt

In [4]:
p_data = sp.sparse.load_npz('../data/tilt/20x20x20/pDisc_40000_20x20x20_tilt.npz')
C_data = sp.sparse.load_npz('../data/tilt/20x20x20/CDisc_40000_20x20x20_tilt.npz')
noise_data = sp.sparse.load_npz('../data/tilt/20x20x20/noiseDisc_40000_20x20x20_tilt.npz')

#same labels will work
p_labels = np.zeros((p_data.shape[0],))
C_labels = np.ones((C_data.shape[0],))
noise_labels = np.ones((noise_data.shape[0],))

In [5]:
full_data = sp.sparse.vstack([p_data, C_data, noise_data], format='csr')

full_labels = np.hstack((p_labels, C_labels, noise_labels))

print(full_data.shape)
print(full_labels.shape)

(120002, 8000)
(120002,)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(full_data, full_labels, test_size=0.25, random_state=42)

In [None]:
reg = 1
#iterations = 1000
nc = LogisticRegression(C=reg)
y_pred_c = nc.fit(X_train, y_train).predict(X_test)

In [None]:
iterations = 100
C_vals = [10e-6, 10e-5, 10e-4, 10e-3, 10e-2, 10e-1, 10e0, 10e1, 10e2, 10e3]
print(C_vals)

In [None]:
precisions = []
recalls = []
f1s = []
accuracies = []

for c in C_vals:
    y_pred = LogisticRegression(C=c, max_iter=iterations).fit(X_train, y_train).predict(X_test)
    
    precisions.append(metrics.precision_score(y_test, y_pred))
    recalls.append(metrics.recall_score(y_test, y_pred))
    f1s.append(metrics.f1_score(y_test, y_pred))
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
    print("Charge and C=" + str(c) + " accuracy=" + str(metrics.accuracy_score(y_test, y_pred)))

In [None]:
print(max(accuracies))

In [None]:
plt.plot(C_vals, accuracies)

plt.xscale('log')
        
plt.xlabel('C (inverse regularization constant)')
plt.ylabel('Accuracy')
plt.title('Accuracy by Regularization - p vs. C and noise')

#plt.savefig('../plots/results/NO-tilt/LogisticRegression_accuracyxC_pCnoise.pdf')