<h1 style="font-family:Impact,Arial;font-size:50px">Load SQL</h1>
<p> This code loads the raw data from the VAST database. It uses a PSQL connection to pull the raw tables and columns, it then joins and converts the tables to usable format in Pandas in conjunction with the scripts in git. 

In [87]:
import math
import pandas as pd
import numpy as np
import psycopg2 as pg
import pandas.io.sql as psql

In [88]:
# get connected to the database
connection = pg.connect("dbname=postgres user=postgres password=sqg4179eq")

In [89]:
%%time
image_table = psql.read_sql("SELECT id, rms, time, jd, cube_id, ra, dec, name FROM vast_image", connection)
image_table.columns = (['image_id', 'rms', 'time', 'jd', 'cube_id', 'im_ra', 'im_dec', 'image'])

CPU times: user 71.7 ms, sys: 41.1 ms, total: 113 ms
Wall time: 630 ms


In [90]:
%%time
dataframe = psql.read_sql('SELECT id, source_id, image_id, good_fit, ra, err_ra, dec, err_dec, bmaj, bmin, pa, raw_total_flux, err_raw_total_flux, raw_peak_flux, err_raw_peak_flux FROM vast_flux', connection)

CPU times: user 2min 54s, sys: 5min 19s, total: 8min 14s
Wall time: 15min 44s


In [91]:
%%time
raw_data = (pd.merge(dataframe, image_table, left_on='image_id', right_on='image_id'))

CPU times: user 15.2 s, sys: 12 s, total: 27.2 s
Wall time: 34.1 s


In [92]:
def vectorized_distance_on_unit_sphere(df):
    degrees_to_radians = np.pi/180.0
    phi1 = df.dec*degrees_to_radians
    phi2 = df.im_dec*degrees_to_radians

    theta1 = df.ra*degrees_to_radians
    theta2 = df.im_ra*degrees_to_radians
    
    cosine = (np.cos(phi1)*np.cos(phi2)*np.cos(theta1 - theta2) +
           np.sin(phi1)*np.sin(phi2))
    dist_from_centre = np.arccos(cosine)
    return (dist_from_centre/3.142)*180

## Conversions

In [93]:
raw_data['raw_peak_flux'] = raw_data['raw_peak_flux']*(1.0/1000.0) # Conversion to Jy
raw_data['err_raw_peak_flux'] = raw_data['err_raw_peak_flux']*(1.0/1000.0) # Conversion to Jy
raw_data['datetime'] = pd.to_datetime(raw_data.time, utc=True)

## Calculate the distance between the source and the image centre ( then create column)

In [94]:
%%time
raw_data['distance'] = vectorized_distance_on_unit_sphere(raw_data)

CPU times: user 1.57 s, sys: 1.76 s, total: 3.33 s
Wall time: 1.73 s


## Add in the gains to the data file

In [95]:
raw_data["image"]= raw_data["image"].str.split("[").str.get(0) 

In [96]:
%%time
gains = pd.read_table('../all_gains.txt', ',')
raw_data = pd.merge(raw_data, gains, left_on='image', right_on='Image')

CPU times: user 18.9 s, sys: 21.4 s, total: 40.3 s
Wall time: 56.5 s


In [97]:
raw_data = raw_data.drop('Image', 1)

## Filter good fits only

In [98]:
raw_data = raw_data[raw_data.good_fit == True]

## Save the reduced data file

In [99]:
%%time
raw_data= raw_data.reset_index()
raw_data.to_feather('mwats_raw_data_Mar_SQL.fth')

CPU times: user 9.22 s, sys: 21.8 s, total: 31.1 s
Wall time: 37 s
