In [1]:
from sim_logistic import sim
sim(n = 1000)

ValueError: Found input variables with inconsistent numbers of samples: [1000, 2000]

In [2]:
import numpy as np
from sklearn.linear_model import LogisticRegression
n, s, p, k, theta, penalty = 10000, 2, 0.5, 0.5, np.pi/2, 'l1'

In [3]:
# parameters 

## model pars
d = int(n * k)
n0, n1 = n - int(n * p), int(n * p)
sigma = 0.2
n_test = 1000

In [4]:
## coefficients
beta0, beta1 = np.zeros(shape = (d, )), np.zeros(shape = (d, ))
beta0[0] = s
beta1[0], beta1[1] = s * np.cos(theta), s * np.sin(theta)

In [5]:
## regularizer 
if penalty == 'l1':
    regularizer = sigma * np.sqrt(np.log(d)/n)
elif penalty == 'l2':
    regularizer = sigma ** 2 * np.log(d)/n
else: 
    raise ValueError('Not implemented for ' + penalty + '.\n')

In [23]:
# data
x0, x1 = np.random.normal(size = (n0, d)), np.random.normal(size = (n1, d))
h0, h1 = x0 @ beta0.reshape((-1, 1)) + sigma * np.random.normal(size = (n0, 1)),\
         x1 @ beta1.reshape((-1, 1)) + sigma * np.random.normal(size = (n1, 1))
y0, y1 = (h0.reshape((-1, )) > 0).astype('float32'), (h1.reshape((-1, )) > 0).astype('float32')
x_train, y_train = np.concatenate((x0, x1), axis = 0), np.concatenate((y0, y1))

x0_test, x1_test = np.random.normal(size = (n_test, d)), np.random.normal(size = (n_test, d))
h0_test, h1_test = x0_test @ beta0.reshape((-1, 1)) + sigma * np.random.normal(size = (n_test, 1)),\
         x1_test @ beta1.reshape((-1, 1)) + sigma * np.random.normal(size = (n_test, 1))
y0_test, y1_test = (h0_test.reshape((-1, )) > 0).astype('float32'),\
         (h1_test.reshape((-1, )) > 0).astype('float32')

In [26]:
# model 
cl = LogisticRegression(penalty = penalty, C = 1/regularizer, fit_intercept = False, solver = 'liblinear')
cl.fit(x_train, y_train)

LogisticRegression(C=171.3254002981677, fit_intercept=False, penalty='l1',
                   solver='liblinear')

In [27]:
score0, score1 = cl.score(x0_test, y0_test), cl.score(x1_test, y1_test)
b = cl.coef_.T
norm_b = np.linalg.norm(b)

In [28]:
score0, score0, norm_b

(0.59, 0.59, 45.1105094533143)

In [7]:
from sim_logistic import sim
score, b, calibration_error = sim(n = 10000, k = 0.001, pi = 0)

In [8]:
score

[0.929, 0.504]

In [9]:
b

[9.26890801249023, 0.016435406024695315, 1.5704057344039022]

In [10]:
ce0, ce1 = calibration_error

In [11]:
ce0(0.1)

0.09999298152888829

In [12]:
ce1(0.1)

0.09999289154049579

In [16]:
import numpy as np
thetas = np.linspace(0, 1, 5)
thetas

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [19]:
job = 1
job_file = f'job_files/job_{job}.sh'

    # content of batch file
job_string = f'#!/bin/bash\n'
job_string += f'#SBATCH --job-name=sim{job}\n'
job_string += f'#SBATCH --output=logs/op-sim_%A_%a.out\n'
job_string += f'#SBATCH --array=0-4999\n'
job_string += f'#SBATCH --nodes=1\n#SBATCH --cpus-per-task=1\n#SBATCH --mem-per-cpu=6gb\n'
job_string += f'#SBATCH --time=1:00:00\n#SBATCH --account=yuekai1\n#SBATCH --mail-type=NONE\n'
job_string += f'#SBATCH --mail-user=smaity@umich.edu\n#SBATCH --partition=standard\n'
job_string += f'echo "SLURM_JOBID: " $SLURM_JOBID\necho "SLURM_ARRAY_TASK_ID: " $SLURM_ARRAY_TASK_ID\n'
job_string += f'echo "SLURM_ARRAY_JOB_ID: " $SLURM_ARRAY_JOB_ID\n'
job_string += f'python3 grid_sim.py $(($SLURM_ARRAY_TASK_ID+{job * 5000}))'


with open(job_file, 'w') as jf:
    jf.write(job_string)

In [21]:
np.linspace(0.1, 0.9, 9)

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [22]:
rd = {'st': [0, 1, 2], 't': 2}

In [23]:
type(rd)

dict

In [25]:
a = str(rd)

In [26]:
eval(a)

{'st': [0, 1, 2], 't': 2}

In [1]:
from sim_logistic import grid
import numpy as np
import itertools
import subprocess
import os, sys

with open('parms.txt', 'r') as f:
    pars = f.readline()

pars = eval(pars)
parameters = grid(n_signal = pars['n_signal'], n_pi = pars['n_pi'],\
     n_kappa = pars['n_kappa'], n_theta = pars['n_theta'], n_sim = pars['n_sim'])

In [2]:
pars

{'n_signal': 2, 'n_pi': 2, 'n_kappa': 2, 'n_theta': 2, 'n_sim': 2}

In [3]:
parameters

[(0.5, 0.1, 0.1, 0.0, 'l1', 0),
 (0.5, 0.1, 0.1, 0.0, 'l1', 1),
 (0.5, 0.1, 0.1, 0.0, 'l2', 0),
 (0.5, 0.1, 0.1, 0.0, 'l2', 1),
 (0.5, 0.1, 0.1, 3.141592653589793, 'l1', 0),
 (0.5, 0.1, 0.1, 3.141592653589793, 'l1', 1),
 (0.5, 0.1, 0.1, 3.141592653589793, 'l2', 0),
 (0.5, 0.1, 0.1, 3.141592653589793, 'l2', 1),
 (0.5, 0.1, 10.0, 0.0, 'l1', 0),
 (0.5, 0.1, 10.0, 0.0, 'l1', 1),
 (0.5, 0.1, 10.0, 0.0, 'l2', 0),
 (0.5, 0.1, 10.0, 0.0, 'l2', 1),
 (0.5, 0.1, 10.0, 3.141592653589793, 'l1', 0),
 (0.5, 0.1, 10.0, 3.141592653589793, 'l1', 1),
 (0.5, 0.1, 10.0, 3.141592653589793, 'l2', 0),
 (0.5, 0.1, 10.0, 3.141592653589793, 'l2', 1),
 (0.5, 0.9, 0.1, 0.0, 'l1', 0),
 (0.5, 0.9, 0.1, 0.0, 'l1', 1),
 (0.5, 0.9, 0.1, 0.0, 'l2', 0),
 (0.5, 0.9, 0.1, 0.0, 'l2', 1),
 (0.5, 0.9, 0.1, 3.141592653589793, 'l1', 0),
 (0.5, 0.9, 0.1, 3.141592653589793, 'l1', 1),
 (0.5, 0.9, 0.1, 3.141592653589793, 'l2', 0),
 (0.5, 0.9, 0.1, 3.141592653589793, 'l2', 1),
 (0.5, 0.9, 10.0, 0.0, 'l1', 0),
 (0.5, 0.9, 10.0, 0.0, 