<a href="https://colab.research.google.com/github/krislars/R-uncertainty/blob/master/MonteCarloModeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install astropy
!pip install astroquery



distributed 1.21.8 requires msgpack, which is not installed.


Collecting astroquery
  Downloading https://files.pythonhosted.org/packages/61/50/a7a08f9e54d7d9d97e69433cd88231e1ad2901811c9d1ae9ac7ccaef9396/astroquery-0.3.9.tar.gz (4.2MB)
Collecting keyring>=4.0 (from astroquery)
  Downloading https://files.pythonhosted.org/packages/a1/28/0058032477bfdf2003e605d175629963759220661615443e20711446bfa7/keyring-18.0.0-py2.py3-none-any.whl
Collecting pywin32-ctypes!=0.1.0,!=0.1.1; sys_platform == "win32" (from keyring>=4.0->astroquery)
  Downloading https://files.pythonhosted.org/packages/9e/4b/3ab2720f1fa4b4bc924ef1932b842edf10007e4547ea8157b0b9fc78599a/pywin32_ctypes-0.2.0-py2.py3-none-any.whl
Building wheels for collected packages: astroquery
  Running setup.py bdist_wheel for astroquery: started
  Running setup.py bdist_wheel for astroquery: finished with status 'done'
  Stored in directory: C:\Users\shumans\AppData\Local\pip\Cache\wheels\8a\d1\1e\4124d9ef35e2bbfbaa284c97dd49d9babbab42d966c4bea190
Successfully built astroquery
Installing collected pa

distributed 1.21.8 requires msgpack, which is not installed.
You are using pip version 10.0.1, however version 19.0.2 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [3]:
import random
import numpy as np
import numpy.random as npRand
from matplotlib import pyplot as plt
import scipy.stats as stats
from astroquery.vizier import Vizier

from ipywidgets import interact
from ipywidgets.widgets import FloatSlider

gzip was not found on your system! You should solve this issue for astroquery.eso to be at its best!
On POSIX system: make sure gzip is installed and in your path!On Windows: same for 7-zip (http://www.7-zip.org)!


In [2]:
%matplotlib inline

In [24]:
#Defining some parameters:
numVals = 1000
colorDiffmin = 0.1
colorDiffmax = 3
numSteps = 100

data = 'J/MNRAS/440/3430'
rowLimit = -1 #includes all rows

color1 = 'J-H'
color2 = 'H-Ks'
R_exp = 1.6

In [25]:
catalog_list = Vizier.find_catalogs('Davenport+')
Vizier.ROW_LIMIT = rowLimit
catalog = Vizier.get_catalogs(data)

starData = catalog[data+'/table1']

In [26]:
#MultiDimensional linspace
def ndlinspace(start, end, steps):
    if (start.shape != end.shape):
        print("Arrays must be same size")
        return
    if (start.ndim == 1):
        result = np.array(
            [np.linspace(s, e, steps) for s,e in zip(start, end)]
        )
        return result
    result = np.array(
        [ndlinspace(s, e, steps) for s,e in zip(start, end)]
    )
    return result

Data file format is intrinsic stellar H-K, sigma H-K, J-H, sigma J-H ... 
Davenport, J. R., Ivezic, Z., Becker, A. C., Ruan, J. J., Hunt-Walker, N. M., Covey, K. R., & Lewis, A. R. (2014, June). The SDSS-2MASS-WISE 10-dimensional stellar colour locus [Electronic version]. MNRAS, 440(4), 3430-3438.

$$
R = \frac{A-B}{C-D}
$$

We will choose values such that the numerator is colorDiff.  So, $A=(colorDiff)+B$.  We also choose the value of R to be R_exp, so $C-D=(colorDiff)/R_{exp}$.  Therefore, $C = (colorDiff)/R_{exp}+D$.

In [27]:
#starData = np.loadtxt("https://raw.githubusercontent.com/krislars/R-uncertainty/master/Astro%20Lab%20Star%20Data.txt", unpack=True)

B, dB = np.array(starData[color1]), np.array(starData['e_'+color1])
D, dD = np.array(starData[color2]), np.array(starData['e_'+color2])

n = len(B)
colorDiff = np.linspace(colorDiffmin, colorDiffmax, numSteps)
colorDiff = np.transpose(ndlinspace(colorDiff, colorDiff, n))

B, dB = ndlinspace(B,B,numSteps), ndlinspace(dB,dB,numSteps)
D, dD = ndlinspace(D,D,numSteps), ndlinspace(dD,dD,numSteps)

A = colorDiff + B
C = colorDiff/R_exp + D

dA = dC = np.linspace(0.02, 0.02, n)
dA = dC = ndlinspace(dA, dA, numSteps)

This next line is propagation of error in an arithmetic function.

In [28]:
#calculating theoretical dR
dR = R_exp *((dB**2 + dA**2)/colorDiff**2 + (R_exp/colorDiff)**2 *(dD**2 + dC**2))**0.5

This function takes a vector of values and a vector of associated uncertainties and returns an array of n = numVals samples selected randomly from a normal  probability distribution.

In [29]:
#choosing random data sets for each star type
def ndNormalData(mean, sigma, numVals):
    if (mean.shape != sigma.shape):
        print("Arrays must be same size")
        return
    if (mean.ndim == 1):
        data = np.array(
            [npRand.normal(m, s, numVals) for m, s in zip(mean, sigma)]
        )
        return data
    
    data = np.array(
        [ndNormalData(m, s, numVals) for m, s in zip(mean, sigma)]
    )
    return data

In [30]:
B_vals = ndNormalData(B, dB, numVals)
D_vals = ndNormalData(D, dD, numVals)
A_vals = ndNormalData(A, dA, numVals)
C_vals = ndNormalData(C, dC, numVals)

In [31]:
#calculating the "Monte Carlo" values for R and dR
R_vals = (A_vals - B_vals) / (C_vals - D_vals)
R_vals.shape

(135, 100, 1000)

Now, we can test the distributions of R.

In [32]:
R_mc = np.mean(R_vals, axis=2)
dR_mc = np.std(R_vals, axis=2)

In [36]:
colorDiffinit = (colorDiffmin + colorDiffmax) /2

def findIndex(colorDiffval):
    step = (colorDiffmax - colorDiffmin) / (numSteps-1)
    result = (colorDiffval - colorDiffmin) / step
    return int(result)

def plot_data(colorDiffval):
    x = np.array(starData['g-i'])
    R = np.linspace(R_exp,R_exp,n)
    
    fig = plt.figure(figsize=(8, 5))

    red_ax = plt.axes([0.1, 0.2, 0.8, 0.65])
    
    plt.ylim([1,3])
    plt.title('Mean Reddening Across Star Types')
    plt.xlabel('g-i mag')
    plt.ylabel('R')
    
    plt.errorbar(x, R_mc[:, findIndex(colorDiffval)], yerr=dR_mc[:, findIndex(colorDiffval)], fmt='ro')
    plt.plot(x, R, "b--")
    plt.plot(x, R + dR[:, findIndex(colorDiffval)], 'b--')
    plt.plot(x, R - dR[:, findIndex(colorDiffval)], 'b--')
    plt.show()
    
interact(plot_data, colorDiffval=FloatSlider(colorDiffinit, min=colorDiffmin, max=colorDiffmax, step=0.05))


interactive(children=(FloatSlider(value=1.55, description='colorDiffval', max=3.0, min=0.1, step=0.05), Output…

<function __main__.plot_data(colorDiffval)>

Ideas going forward:
* Calculate the uncertainty in the mean.
* Try a weighted mean



In [18]:
#Defines binning of data to evenly space bins close to the mean
#and group the outer bins together. This avoids an expected bincount of 0
#for calculating chi-squared.
def customBinning(data, numBins=20, dataWidth=8):
    low = dataMin = min(data)
    high = dataMax = max(data)
    mean = np.mean(data)
    stdev = np.std(data)
    
    if (dataMin < mean - dataWidth * stdev):
        numBins = numBins - 1
        low = mean - dataWidth * stdev
    if (dataMax > mean + dataWidth * stdev):
        numBins = numBins - 1
        high = mean + dataWidth * stdev
    
    bins = np.linspace(low, high, numBins + 1)
    
    if (dataMin != low):
        bins = np.concatenate((dataMin, bins), axis = None)
    if (dataMax != high):
        bins = np.concatenate((bins, dataMax), axis = None)
    
    return bins

def ndHistogramCounts(Vals, numBins):
    if (Vals.ndim == 1):
        bins = customBinning(Vals, numBins)
        counts, Bins = np.histogram(Vals, bins)
        return counts
    
    results = np.stack(
        ndHistogramCounts(vals, numBins) for vals in Vals
    )
    
    return results

def ndHistogramBins(Vals, numBins):
    if (Vals.ndim == 1):
        bins = customBinning(Vals, numBins)
        #counts, Bins = np.histogram(Vals, bins)
        return bins
    
    results = np.stack(
        ndHistogramBins(vals, numBins) for vals in Vals
    )
    
    return results

def ndNormalCounts(mean, sigma, Bins):
    if (mean.shape != sigma.shape):
        print("Arrays must be same shape")
        return
    
    if (mean.ndim == 1):
        counts = np.diff(np.stack(
            stats.norm.cdf(bins, m, s) for bins, m, s in zip(Bins, mean, sigma)
        ))
        return counts
    
    counts = np.stack(
        ndNormalCounts(m, s, bins) for m, s, bins in zip(mean, sigma, Bins)
    )
    return counts


In [19]:
#Next I need to test "Goodness of fit"
numBins = 20
counts = ndHistogramCounts(R_vals, numBins)
Bins = ndHistogramBins(R_vals, numBins)

#calc expected with stats cdf and np.diff
exp = ndNormalCounts(R_mc, dR_mc, Bins)

#scale expected percentages by sample size
exp = numVals * exp

#Now calculate the Chi^2 values:
chi2 = np.sum(( (counts - exp)**2 / exp), axis=2)

In [20]:
#Constraints in this case are the calculated mean, stdev, and total number of counts
c = 3
d = numBins - c
red_chi2 = chi2/d