Skip to content

Commit

Permalink
lots and lots and lots of work
Browse files Browse the repository at this point in the history
  • Loading branch information
slinderman committed Mar 3, 2016
1 parent d1c5e84 commit 7100a0e
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 76 deletions.
9 changes: 5 additions & 4 deletions pyglm/internals/weights.py
Expand Up @@ -112,13 +112,14 @@ def initialize_from_prior(self):
W[n1,n2] = np.random.multivariate_normal(Mu[n1,n2], Sigma[n1,n2])
self.W = W

def initialize_with_standard_model(self, standard_model, threshold=75):
def initialize_with_standard_model(self, standard_model, threshold=90):
"""
Initialize with the weights from a standard model
:param standard_model:
:param threshold: percentile [0,100] of minimum weight
:return:
"""
# import ipdb; ipdb.set_trace()
W_std = standard_model.W

# Make sure it is the correct shape before copying
Expand Down Expand Up @@ -158,7 +159,7 @@ class _GibbsSpikeAndSlabGaussianWeights(_SpikeAndSlabGaussianWeightsBase):
def __init__(self, population):
super(_GibbsSpikeAndSlabGaussianWeights, self).__init__(population)

self.resample()
# self.resample()

def resample(self, augmented_data=[]):

Expand Down Expand Up @@ -282,7 +283,6 @@ def _resample_W(self, n_pre, n_post, stats):
class _CollapsedGibbsSpikeAndSlabGaussianWeights(_SpikeAndSlabGaussianWeightsBase):
def __init__(self, population):
super(_CollapsedGibbsSpikeAndSlabGaussianWeights, self).__init__(population)

self.collapsed_resample()

@property
Expand All @@ -298,7 +298,8 @@ def collapsed_resample(self, augmented_data=[]):

def _serial_collapsed_resample(self, augmented_data):
P = self.network.adjacency.P
for n in xrange(self.N):
from tqdm import tqdm
for n in tqdm(xrange(self.N)):
# Compute the prior and posterior sufficient statistics of W
J_prior, h_prior = self._prior_sufficient_statistics(n)
J_lkhd, h_lkhd = self._lkhd_sufficient_statistics(n, augmented_data)
Expand Down
37 changes: 23 additions & 14 deletions pyglm/models.py
Expand Up @@ -235,7 +235,9 @@ def add_data(self, S, F=None, minibatchsize=None):
# Add to the data list
self.data_list.append(augmented_data)

def generate(self, keep=True, T=100, return_Psi=False, verbose=True):
def generate(self, keep=True, T=100, return_Psi=False, verbose=True,
max_spks_per_bin=10, print_intvl=10000,
background=None):
"""
Generate data from the model.
Expand All @@ -260,7 +262,9 @@ def generate(self, keep=True, T=100, return_Psi=False, verbose=True):
X = np.zeros((T+L, N))

# Precompute the impulse responses (LxNxN array)
H = np.tensordot(self.basis.basis, self.weight_model.W, axes=([1], [2]))
H = np.tensordot(self.basis.basis,
self.weight_model.W_effective,
axes=([1], [2]))
assert H.shape == (L,self.N, self.N)

# Transpose H so that it is faster for tensor mult
Expand All @@ -272,8 +276,13 @@ def generate(self, keep=True, T=100, return_Psi=False, verbose=True):
# Simulate the background
X += self.background_model.generate(T+L)

# If given a background, add that too
if background is not None:
T_bkgd = background.shape[0]
assert background.shape[1] == self.N
X[:T_bkgd] += background

# Cap the number of spikes in a time bin
max_spks_per_bin = 10
n_exceptions = 0

# Iterate over each time step and generate spikes
Expand All @@ -282,7 +291,7 @@ def generate(self, keep=True, T=100, return_Psi=False, verbose=True):

for t in np.arange(T):
if verbose:
if np.mod(t,10000)==0:
if np.mod(t,print_intvl)==0:
print "t=%d" % t

# Compute the rate for the given activation
Expand All @@ -298,11 +307,8 @@ def generate(self, keep=True, T=100, return_Psi=False, verbose=True):
# Check Spike limit
if np.any(S[t,:] >= max_spks_per_bin):
n_exceptions += 1

# if np.any(S[t,:]>100):
# print "More than 10 spikes in a bin! " \
# "Decrease variance on impulse weights or decrease simulation bin width."
# import pdb; pdb.set_trace()
if verbose:
print "Exception: more than %d spikes in bin" % max_spks_per_bin

# Cast S to int32
assert np.all(np.isfinite(S[t,:]))
Expand Down Expand Up @@ -458,13 +464,14 @@ class _GibbsPopulation(_BayesianPopulationBase, ModelGibbsSampling):
"""
Implement Gibbs sampling for the population model
"""
def initialize_with_standard_model(self, standard_model):
def initialize_with_standard_model(self, standard_model, N_network_iters=200):
super(_GibbsPopulation, self).\
initialize_with_standard_model(standard_model)

# Update the network model a few times
N_network_updates = 10
for itr in xrange(N_network_updates):
print "Initializing network"
from tqdm import tqdm
for itr in tqdm(xrange(N_network_iters)):
self.network.resample((self.weight_model.A, self.weight_model.W))

def resample_model(self, temperature=1.0):
Expand All @@ -481,7 +488,7 @@ def resample_model(self, temperature=1.0):
self.network.resample((self.weight_model.A, self.weight_model.W))

@line_profiled
def collapsed_resample_model(self, temperature=1.0):
def collapsed_resample_model(self, temperature=1.0, N_network_iters=1):
assert temperature >= 0.0 and temperature <= 1.0

# update model components one at a time
Expand All @@ -492,7 +499,9 @@ def collapsed_resample_model(self, temperature=1.0):
self.background_model.resample(self.data_list)

# Resample the network given the weight model
self.network.resample((self.weight_model.A, self.weight_model.W))
for i in xrange(N_network_iters):
self.network.resample((self.weight_model.A,
self.weight_model.W))

def ais(self, N_samples=100, B=1000, steps_per_B=1,
verbose=True, full_output=False, callback=None):
Expand Down
115 changes: 57 additions & 58 deletions pyglm/simple_models.py
Expand Up @@ -256,7 +256,7 @@ def heldout_log_likelihood(self, S=None, augmented_data=None):
return self.log_likelihood(augmented_data)


def fit(self, L1=True):
def fit(self, L1=True, cs=None):
"""
Use scikit-learn's LogisticRegression model to fit the data
Expand All @@ -268,67 +268,66 @@ def fit(self, L1=True):
F = np.vstack([d["F"] for d in self.data_list])
S = np.vstack([d["S"] for d in self.data_list])

if L1:
# Hold out some data for cross validation
offset = int(0.75 * S.shape[0])
T_xv = S.shape[0] - offset
F_xv = F[offset:, ...]
S_xv = S[offset:, ...]
augmented_xv_data = {"T": T_xv, "S": S_xv, "F": F_xv}

F = F[:offset, ...]
S = S[:offset, ...]

for n_post in xrange(self.N):
# Get a L1 regularization path for inverse penalty C
cs = l1_min_c(F, S[:,n_post], loss='log') * np.logspace(1, 4., 10)
# The intercept is also subject to penalization, even though
# we don't really want to penalize it. To counteract this effect,
# we scale the intercept by a large value
intercept_scaling = 10**6


print "Computing regularization path for neuron %d ..." % n_post
ints = []
coeffs = []
xv_scores = []
lr = LogisticRegression(C=1.0, penalty='l1',
fit_intercept=True, intercept_scaling=intercept_scaling,
tol=1e-6)
for c in cs:
print "Fitting for C=%.5f" % c
lr.set_params(C=c)
lr.fit(F, S[:,n_post])
ints.append(lr.intercept_.copy())
coeffs.append(lr.coef_.ravel().copy())
# xv_scores.append(lr.score(F_xv, S_xv[:,n_post]).copy())

# Temporarily set the weights and bias
self.b[n_post] = lr.intercept_
self.weights[n_post, :] = lr.coef_
xv_scores.append(self.heldout_log_likelihood(augmented_data=augmented_xv_data))
# Hold out some data for cross validation
offset = int(0.75 * S.shape[0])
T_xv = S.shape[0] - offset
F_xv = F[offset:, ...]
S_xv = S[offset:, ...]
augmented_xv_data = {"T": T_xv, "S": S_xv, "F": F_xv}

F = F[:offset, ...]
S = S[:offset, ...]

# Get regularization path for inverse penalty C
if cs is None:
if L1:
cs = l1_min_c(F, S[:,0], loss='log') * np.logspace(1, 6., 10)
else:
cs = np.logspace(-5,1,10)
# cs = sigmas
# The intercept is also subject to penalization, even though
# we don't really want to penalize it. To counteract this effect,
# we scale the intercept by a large value
intercept_scaling = 1

penalty = "l1" if L1 else "l2"

for n_post in xrange(self.N):
print "Computing regularization path for neuron %d ..." % n_post
ints = []
coeffs = []
xv_scores = []
lr = LogisticRegression(C=1.0, penalty=penalty,
fit_intercept=True, intercept_scaling=intercept_scaling,
tol=1e-6)
for c in cs:
print "Fitting for C=%.5f" % c
lr.set_params(C=c)
lr.fit(F, S[:,n_post])
ints.append(lr.intercept_.copy())
coeffs.append(lr.coef_.ravel().copy())
# xv_scores.append(lr.score(F_xv, S_xv[:,n_post]).copy())

# Temporarily set the weights and bias
self.b[n_post] = lr.intercept_
self.weights[n_post, :] = lr.coef_
xv_scores.append(self.heldout_log_likelihood(augmented_data=augmented_xv_data))

# Choose the regularization penalty with cross validation
print "XV Scores: "
for c,score in zip(cs, xv_scores):
print "\tc: %.5f\tscore: %.1f" % (c,score)
best = np.argmax(xv_scores)
print "Best c: ", cs[best]
# Choose the regularization penalty with cross validation
print "XV Scores: "
for c,score in zip(cs, xv_scores):
print "\tc: %.5f\tscore: %.1f" % (c,score)
best = np.argmax(xv_scores)
print "Best c: ", cs[best]

# Save the best weights
self.b[n_post] = ints[best]
self.weights[n_post, :] = coeffs[best]
# Save the best weights
self.b[n_post] = ints[best]
self.weights[n_post, :] = coeffs[best]

else:
# Just use standard L2 regularization
for n_post in xrange(self.N):
sys.stdout.write('.')
sys.stdout.flush()
print " Max w: ", self.weights[n_post].max(), \
" Min w: ", self.weights[n_post].min()

lr = LogisticRegression(fit_intercept=True)
lr.fit(F,S[:,n_post])
self.b[n_post] = lr.intercept_
self.weights[n_post,:] = lr.coef_
assert abs(self.weights[n_post]).max() > 1e-6

print ""

Expand Down

0 comments on commit 7100a0e

Please sign in to comment.