# Chapter 5: Astronomical Data Analysis

## Spectral analysis

In [None]:
from astropy.io import fits
import matplotlib.pyplot as plt
import numpy as np

In [None]:
file = "data_files/ADP.2014-10-29T09_42_08.747.fits"
fits_data = fits.open(file)
fits_data.info()

In [None]:
print(fits_data[1].columns)

In [None]:
scidata = fits_data[1].data
fits_data.close()

In [None]:
type(scidata)

In [None]:
isinstance(scidata, np.ndarray)

In [None]:
scidata.shape

In [None]:
wavelength = scidata[0][0]
flux = scidata[0][4]
flux_err = scidata[0][5]

In [None]:
wavelength.size

In [None]:
norm = np.max(flux)
flux = flux/norm
flux_err = flux_err/norm
wavelength = wavelength*0.1

In [None]:
%matplotlib inline 

plt.plot(wavelength, flux, linestyle='-' , color='navy')
plt.xlabel("$\lambda$ / nm")
plt.ylabel("Flux / ADU")
plt.xlim(587,590)

plt.savefig("spectrum_full.pdf")

## Transit light curves

In [None]:
import numpy as np

data = np.loadtxt("data_files/tres2_data.dat")

mjd = data[:,0]
flux = data[:,1]
err = data[:,2]

In [None]:
mjd[-1]

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline 

plt.errorbar(mjd, flux, yerr=err, ecolor='steelblue', 
             linestyle='none', marker='o', color='navy')
plt.xlabel("MJD")
plt.ylabel("Flux / ADU")

plt.savefig("tres2_lightcurve.pdf")

In [None]:
100/(24*60)

In [None]:
T1 = 5.645e4 + 0.445
T4 = 5.645e4 + 0.52

In [None]:
norm1 = np.mean(flux[mjd<T1]) # before transit
norm2 = np.mean(flux[mjd>T4]) # after transit
norm = 0.5*(norm1+norm2)

print(f"Flux normalization factor: {norm:.3f}")

# normalize fluxes
flux /= norm
err /= norm

In [None]:
# width and offset of sample window
offset = 7
width = 2*offset + 1

# compute moving average
flux_smoothed = np.ones(flux.size - width + 1)
for i,val in enumerate(flux_smoothed):
    flux_smoothed[i] = np.sum(flux[i:i+width])/width

flux_min = np.min(flux_smoothed)
print(f"Minimum flux: {flux_min:.3f}")

In [None]:
plt.errorbar(mjd, flux, yerr=err, ecolor='steelblue', 
             linestyle='none', marker='o', color='navy', zorder=1)
plt.xlim(np.min(mjd), np.max(mjd))
plt.xlabel("MJD")
plt.ylabel("rel. flux")

# smoothed flux
plt.plot(mjd[offset:-offset], flux_smoothed, 
         lw=2, color='orange', zorder=2)

# ingress, egress, and minimum flux
plt.axvline(T1, color='crimson', lw=1, linestyle=':')
plt.axvline(T4, color='crimson', lw=1, linestyle=':')
plt.axhline(flux_min, lw=1, linestyle='--', color='black')

plt.savefig("tres2_lightcurve_smooth.pdf")

In [None]:
# ratio of transit duration to period
P = 2.47063
x = np.pi*(T4-T1)/P

In [None]:
a = 0.98**(1/3)*(P/365.25)**(2/3)
print(f"Semi-major axis: {a:.5f} AU")

In [None]:
from scipy.constants import au
from astropy.constants import R_jup

delta_flux = 1 - flux_min 

R_p = a*au * x / (1 + (1/delta_flux)**(1/2))
print("Radius: {:.2f} R_jup".format(R_p/R_jup.value))

In [None]:
fct = np.sin(np.radians(83.6))
print(fct)

R_p = a*au * (1 - fct*(np.cos(2*x)))**(1/2) / (1 + (1/delta_flux)**(1/2))
print("Radius: {:.2f} R_jup".format(R_p/R_jup.value))

## Survey data sets

Retrieve a dataset per  [ADQL-Query](https://gea.esac.esa.int/archive-help/adql/index.html) from the database of the [Gaia-Archivs](https://gea.esac.esa.int/archive/). Navigate to the *Search* page of the archive's website and select the *Advanced (ADQL)* tab. Enter the following ADQL-query and download the dataset in CVS format.

```
SELECT l, b, parallax, parallax_over_error, radial_velocity, phot_g_mean_mag
FROM gaiadr2.gaia_source
WHERE phot_g_mean_mag<12 AND ABS(radial_velocity)>0 AND parallax>=1.0 AND parallax_over_error>=10
```

Copy or move the downloaded file to your work directory.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = np.loadtxt("gaia_12mag_1kpc-result.csv", # you might need to adjust the file name
                  dtype='float64', usecols=(0, 1, 2, 4), 
                  delimiter=',', skiprows=1)

In [None]:
data.shape

In [None]:
d = 1/data[:,2]

fig = plt.figure(figsize=(6, 4), dpi=300)

plt.hist(d, 100)
plt.xlabel('d / kpc')
plt.ylabel('N')
plt.savefig('d_histogram.png')

In [None]:
bin_width = 2.5 # in km/s
rv_lim = 140    # upper limit
bins = np.arange(-rv_lim, rv_lim+bin_width, bin_width)

fig = plt.figure(figsize=(6, 4), dpi=300)

rv_histogram = plt.hist(data[:,3], bins=bins)
plt.xlabel('radial velocity / km/s')
plt.ylabel('N')
plt.savefig('rv_histogram.png')

In [None]:
x = bins[:-1] + bin_width/2 
y = rv_histogram[0]

In [None]:
for i in range(len(x)):
    print(x[i], y[i])

In [None]:
import scipy.optimize as opt

# definition of fit function
def gaussian(x, y0, x0, sigma_sqr):
    return y0*np.exp(-(x-x0)**2/(2*sigma_sqr))

params, params_covariance = opt.curve_fit(gaussian, x, y)

print("Parameters best-fit:", params)
print("Parameters standard deviation:", np.sqrt(np.diag(params_covariance)))

In [None]:
y_gauss = gaussian(x, params[0], params[1], params[2])

fig = plt.figure(figsize=(6, 4), dpi=300)

plt.hist(data[:,3], bins=bins)
plt.plot(x, y_gauss, color='red')
plt.xlim(-100,100)
plt.xlabel('radial velocity / km/s')
plt.ylabel('N')
plt.savefig('rv_histo_fit.png')

In [None]:
from scipy.stats import ks_2samp

ks_2samp(y, y_gauss)

Plot density as a function of distance within the cone

In [None]:
rv = data[:,3]
redshift, blueshift = data[rv > 0], data[rv <= 0]

print("Redshifted stars:", len(redshift))
print("Blueshifted stars:", len(blueshift))

In [None]:
redshift[::100,1].size

In [None]:
fig = plt.figure(figsize=(10, 2.5), dpi=300)
ax = fig.add_subplot(111)

stride = 10

plt.scatter(blueshift[::stride,0], blueshift[::stride,1], 
            s=1, marker='.', color='blue', alpha=0.1)
plt.scatter(redshift[::stride,0], redshift[::stride,1], 
            s=1, marker='.', color='red', alpha=0.1)
plt.xlabel('longitude [deg]')
plt.ylabel('lat. [deg]')
plt.xlim(0,360)
plt.ylim(-30,30)

# set ticks on axis in 30 degree intervals
plt.xticks([30*n for n in range(13)])
plt.yticks([-30, 0, 30])

# ensure that degrees are displayed equally along both axes
ax.set_aspect('equal')

plt.savefig('rv_map.png')

## Image processing

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits

In [None]:
m51r_file = "data_files/h_m51_h_s20_drz_sci.fits"
m51r = fits.open(m51r_file)
m51r.info()

In [None]:
m51r_data = m51r[0].data
m51r.close()

In [None]:
print(np.max(m51r_data))
print(np.min(m51r_data))
print(np.mean(m51r_data))
print(np.median(m51r_data))
print(np.std(m51r_data))

In [None]:
plt.hist(m51r_data.flatten(), log=True, bins=100)
plt.xlabel('Signal')
plt.ylabel('N')
plt.savefig('m51_histogram.png', dpi=300)

In [None]:
plt.imshow(m51r_data, cmap='gray')
plt.clim(0,0.1)
plt.colorbar()
plt.savefig('m51r.png', dpi=300)

In [None]:
m51g_file = "data_files/h_m51_v_s20_drz_sci.fits"
m51g = fits.open(m51g_file)
m51g_data = m51g[0].data
m51g.close()

m51b_file = "data_files/h_m51_b_s20_drz_sci.fits"
m51b = fits.open(m51b_file)
m51b_data = m51b[0].data
m51b.close()

In [None]:
alpha = 0.15

m51rgb = np.zeros([2150, 3050, 3])

m51rgb[:,:,0] = m51r_data.transpose() / np.mean(m51r_data)
m51rgb[:,:,1] = m51g_data.transpose() / np.mean(m51g_data) 
m51rgb[:,:,2] = m51b_data.transpose() / np.mean(m51b_data)

m51rgb *= 255*alpha
m51rgb = np.where(m51rgb > 255, 255, m51rgb)

m51rgb.dtype

In [None]:
from PIL import Image

# convert to 8-bit unsigned integers and turn array into image
img = Image.fromarray(m51rgb.astype(np.uint8))
img.show()
img.save('m51rgb.png')