In [36]:
import numpy as np
import cupy as cp
import nvtx
import pickle as pk
import os 
os.environ['picaso_refdata'] = '/nobackup/nbatalh1/reference_data/picaso/reference'
os.environ['PYSYN_CDBS'] ='/nobackup/nbatalh1/reference_data/grp/hst/cdbs'
#picaso
from picaso import justdoit as jdi 
from picaso import justplotit as jpi

from numba import vectorize, float64,int32,float32,int64
from numba import guvectorize,jit

## Get function inputs

In [37]:
opacity = jdi.opannection(
filename_db='/nobackup/nbatalh1/reference_data/picaso/reference/opacities/all_opacities_0.6_6_R60000.db'
)#wave_range=[0.3,1]) #We can use wave_range to control the number of wave points
start_case = jdi.inputs()
#phase angle 
start_case.phase_angle(0) #radians

#define gravity
start_case.gravity(radius=1, radius_unit=jdi.u.Unit('R_jup'),
                   mass=1, mass_unit=jdi.u.Unit('M_jup'),
                  ) #any astropy units available
start_case.inputs['surface_reflect'] = 0 
start_case.inputs['hard_surface'] = 0 
#define star 
start_case.star(opacity, 5000,0,4.0, radius=1, radius_unit=jdi.u.Unit('R_sun')) #opacity db, pysynphot database, temp, metallicity, logg 
start_case.atmosphere(filename=jdi.jupiter_pt(), delim_whitespace=True)
reflected_input, thermal_input, transmission_input = jdi.gpu_test(start_case, opacity)


KeyboardInterrupt



## Compare jit/nopython against cupy

In [34]:
#@jit(nopython=True, cache=True)
def get_transit_1d_cupy(z, dz,nlevel, nwno, rstar, mmw, k_b,amu,
                    player, tlayer, colden, DTAU):
    z = cp.asarray(z)
    dz = cp.asarray(dz)
    mmw = cp.asarray(mmw)
    player = cp.asarray(player)
    tlayer = cp.asarray(tlayer)
    colden= cp.asarray(colden)
    DTAU= cp.asarray(DTAU)
    
    mmw = mmw * amu #make sure mmw in grams

    delta_length=cp.zeros((nlevel,nlevel))
    for i in range(nlevel):
        for j in range(i):
            reference_shell = z[i]
            inner_shell = z[i-j]
            outer_shell = z[i-j-1]
            #this is the path length between two layers 
            #essentially tangent from the inner_shell and toward 
            #line of sight to the outer shell
            integrate_segment=(cp.power(cp.power(outer_shell,2)-cp.power(reference_shell,2), 0.5)-
                    cp.power(cp.power(inner_shell,2)-cp.power(reference_shell,2), 0.5))
            #make sure to use the pressure and temperature  
            #between inner and outer shell
            #this is the same index as outer shell because ind = 0 is the outer-
            #most layer 
            delta_length[i,j]=integrate_segment*player[i-j-1]/tlayer[i-j-1]/k_b
    #remove column density and mmw from DTAU which was calculated in 
    #optics because line of site integration is diff for transit
    #TAU = array([DTAU[:,i]  / colden * mmw  for i in range(nwno)])
    TAU = cp.zeros((nwno, nlevel-1))
    for i in range(nwno):
        TAU[i,:] = DTAU[:,i]  / colden * mmw 
    transmitted=cp.zeros((nwno, nlevel))+1.0
    for i in range(nlevel):
        TAUALL=cp.zeros(nwno)#0.
        for j in range(i):
            #two because symmetry of sphere
            TAUALL = TAUALL + 2*TAU[:,i-j-1]*delta_length[i,j]
        transmitted[:,i]=cp.exp(-TAUALL)

    F=(((min(z))/(rstar))**2 + 
        2./(rstar)**2.*cp.dot((1.-transmitted),z*dz))

    return F

array([0.01058588, 0.01058619, 0.01058676, ..., 0.01057601, 0.01057601,
       0.01057601])

In [35]:
%timeit jdi.get_transit_1d(*transmission_input)
%timeit get_transit_1d_cupy(*transmission_input)

1.87 s ± 4.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
5.17 s ± 19.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## guvectorize, vectorize, jit examples

In [38]:
@guvectorize([(float64[:], float64[:], float64[:])], '(n),(n)->(n)',target='cuda')#, nopython=True,target='parallel')
def g_vc(x, y, res):
    for i in range(x.shape[0]):
        res[i] = x[i] + y[i]

@jit(nopython=True)
def g_c(x, y):
    res = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        res[i] = x[i] + y[i]
    return res

@guvectorize(['void(float64[:], float64[:], float64[:])'],'(n),(n)->(n)')
def g_v(x, y, res):
    res[:] = x + y

In [39]:
t = np.random.randn(1000000)

In [41]:
%timeit u1 = g_vc(t,t)
%timeit u2 =  g_c(t,t)
%timeit u3 = g_v(t,t)
u1 = g_vc(t,t)
u2 =  g_c(t,t)
u3 = g_v(t,t)



128 ms ± 49.4 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
1.17 ms ± 309 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
1.6 ms ± 3.42 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## Transform get_transit_1d using vectorize/guvectorize