In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

In [2]:
def createMockCluster(rin,rout,nmean,k,smax,m,vmean,threeD=False):
    ntot = np.random.poisson(lam=nmean)
    
    nu0 = ntot/(4*np.pi*(np.log(rout/rin))) if k==3. else ntot*(3.-k)/(4*np.pi*(rout**(3.-k)-rin**(3.-k)))
    #rho = 4*np.pi*nu0*np.random.uniform(low=rin,high=rout,size=ntot)**(2.-k)
    rho = np.power(np.random.uniform(4*np.pi*nu0*rin**(2.-k),4*np.pi*nu0*rout**(2.-k),size=ntot)/(4*np.pi*nu0),1./(2-k))
    phi = np.random.uniform(0.,2*np.pi,ntot)
    theta = np.arccos(np.random.uniform(-1,1,ntot))
    
    x = rho*np.sin(theta)*np.cos(phi)
    y = rho*np.sin(theta)*np.sin(phi)
    
    vobs = np.array([np.random.normal(loc=vmean,scale=np.sqrt(3)*smax/(i**m)) for i in rho])
    
    if threeD:
        z = rho*np.cos(theta)
        return pd.DataFrame(np.hstack((x.reshape((ntot,1)),y.reshape((ntot,1)),z.reshape((ntot,1)),\
                                      vobs.reshape((ntot,1)),rho.reshape((ntot,1)))),columns=['x','y','z','vobs','rho'])
    else:
        r = np.sqrt(np.power(x,2.)+np.power(y,2.))
        return pd.DataFrame(np.hstack((r.reshape((ntot,1)),vobs.reshape((ntot,1)))),columns=['r','vobs'])

In [9]:
#load data

data=pd.read_csv('continuousMock0.csv')

def defineAnnuli(r,nAnn):
    return np.append(0.,np.sort(r)[np.linspace(0,len(r)-1,nAnn).astype(int)][1:])

nAnn = 10
annuli = defineAnnuli(data['r'],nAnn)

[   0.            0.73275537    1.0003572     1.15664037    1.3830914
    1.77197597    2.34198015    3.40917706    6.54028027  429.85607839]


In [10]:
def labelGalaxies(data,annuli):
    #adds a column to data indicating the galaxy's membership in an annulus
    bla = np.zeros(len(data['r']))
    for i in range(len(annuli)-1):
        bla += i*np.array(np.logical_and(data['r']>annuli[i],data['r']<=annuli[i+1]).astype(int))
    return bla

def galInAnn(data,annuli):
    #calculates the number of galaxies in all the annuli
    return np.array([(data['annuli']==i).sum() for i in range(len(annuli)-1)])

def sigInAnn(data,annuli):
    return [np.std(data['vobs'][data['annuli']==i],ddof=1) for i in range(len(annuli)-1)]

def getProfiles(annuli,rin,rout,nmean,k,smax,m,vmean):
    mock = createMockCluster(rin,rout,nmean,k,smax,m,vmean)
    mock['annuli'] = labelGalaxies(mock,annuli)
    return galInAnn(mock,annuli),sigInAnn(mock,annuli)

In [12]:
#all log likelihoods

#make a table of log(n!)-s for n=1,...
factorialTable = np.cumsum(np.append(0,[np.log(i) for i in range(1,len(data['r']))]))

def logPriorNu(k):
    #starting gaussian
    nu0 = 3. #mean prior density index
    sigma = 1.
    return -0.5*np.log(2*np.pi*sigma**2)-0.5*(k-nu0)**2/(2*sigma**2)

def logPriorSigma(k):
    #starting gaussian
    nu0 = -.2 #mean prior sigma index
    sigma = 0.1
    return -0.5*np.log(2*np.pi*sigma**2)-0.5*(k-nu0)**2/(2*sigma**2)

def logPriorSigmaNorm(k):
    #starting gaussian
    nu0 = 1000 #mean prior sigma index
    sigma = 500
    return -0.5*np.log(2*np.pi*sigma**2)-0.5*(k-nu0)**2/(2*sigma**2)
 

def logLikelihoodNu(prediction,data):
    return np.sum(data*np.log(prediction)-prediction-factorialTable[data])

def logLikelihoodSigma(vObs,vPred,nPred):
    # distribution of variances is chisq, as shown, for example, at
    # https://onlinecourses.science.psu.edu/stat414/node/174
    return np.sum(np.log(stats.chi2.pdf((nPred-1.)*np.power(vObs,2.)/np.power(vPred,2.),nPred-1)))

def logPosterior(vObs,vPred,nObs,nPred,kInd,sigmaInd,sigmaNorm):
    return logPriorNu(kInd)+logPriorSigma(sigmaInd)+logPriorSigmaNorm(sigmaNorm)+\
           logLikelihoodNu(nPred,nObs)+logLikelihoodSigma(vObs,vPred,nPred)

The slowest run took 6.10 times longer than the fastest. This could mean that an intermediate result is being cached 
10 loops, best of 3: 22.9 ms per loop


In [17]:
ntot=1000
bla1,bla2 = getProfiles(annuli,1,1000,1000*ntot,3,1000,0.2,0)
bla1/=ntot
bla2/=np.sqrt(ntot)

print bla1
print bla2

[ 99 116 106 111 124 108 104 110 118]
[ 51.37252208  52.09542935  52.20695866  50.39481053  48.439801
  45.76986303  42.83054526  39.02898351  30.96053301]


In [18]:
ntot=1000
bla1,bla2 = getProfiles(annuli,1,1000,1000*ntot,3,1000,0.2,0)
bla1/=ntot
bla2/=np.sqrt(ntot)

print bla1
print bla2

[ 99 116 106 111 124 108 105 109 118]
[ 51.27101747  52.02650253  52.03644484  50.47386708  48.22288653
  45.93556896  43.01930418  39.18609952  30.84876409]


In [19]:
ntot=1000
bla1,bla2 = getProfiles(annuli,1,1000,1000*ntot,3,1000,0.2,0)
bla1/=ntot
bla2/=np.sqrt(ntot)

print bla1
print bla2

[ 99 116 106 111 125 108 104 110 118]
[ 51.56502247  52.07016862  52.13777817  50.32216058  48.27531929
  45.9825211   43.14596094  39.11981342  30.79616311]
