In [1]:
import math
import string
import datetime
import numpy as np
from pyspark.sql.types import *
from pyspark.sql import SQLContext
from pyspark.sql import Row
import pylab

sqlContext = SQLContext(sc)

In [2]:
def config_softlayer_acct(name, auth_url, username, password):
   prefix = "fs.swift.service.jstartobjectstore"
   hconf = sc._jsc.hadoopConfiguration()
   hconf.set(prefix + ".auth.url", auth_url)
   hconf.set(prefix + ".username", username)
   hconf.set(prefix + ".tenant", username)
   hconf.set(prefix + ".auth.endpoint.prefix", "endpoints")
   hconf.setInt(prefix + ".http.port", 8080)
   hconf.set(prefix + ".apikey", password)
   hconf.setBoolean(prefix + ".public", True)
   hconf.set(prefix + ".use.get.auth", "true")
   hconf.setBoolean(prefix + ".location-aware", False)
   hconf.set(prefix + ".password", password)

config_softlayer_acct("seti","https://dal05.objectstorage.softlayer.net/auth/v1.0","IBMOS294544-2:npoore@us.ibm.com","abde9540378cd1e662de10df155ea50ccd88a6137af5575cc639957e6b635b7d")

In [3]:
# The targets database consists of four columns... TgtId, RA2000Hr, Dec2000Deg, and TgtName
schemaString = "TgtId RA2000Hr Dec2000Deg TgtName simbadURL"
fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]
schema = StructType(fields)

# Load the Target database text file and convert each line to a tuple.
lines = sc.textFile("swift://seti.jstartobjectstore/seti-targets-database.csv")
parts = lines.map(lambda l: l.split(","))
targets = parts.map(lambda p: (p[0], p[1], p[2], p[3].strip('" '), 'http://simbad.u-strasbg.fr/simbad/sim-coo?Coord='+str(15*float(p[1]))+'+'+p[2]))

# Apply the schema to the target RDD.
targetsDF = sqlContext.createDataFrame(targets, schema)

## TargetDB in Parquet
###The next cell shows how to write out the dataframe created above into a parquet, or read a parquet in if it was already created.

If targetDB.parquet is already created, then skip over cell 2 and 3 and just uncomment the
read.parquet and use that dataframe immediately.

In [7]:
# uncomment the next line and run it to create the targetDB parquet for future use. If you get an error, the parquet probably already exists.
#targetsDF.write.parquet('targetDB.parquet')

In [10]:
# uncomment the next line and jump to here to just read parquet ... then search for targets near given coordinates.
# targetsDF = sqlContext.read.parquet('targetDB.parquet')

## Display clickable URLs to show SIMBAD information pages for a given target

In [8]:
# Display a clickable information link for a given target
# Example: target 140500 : 11 UMi b -- Extra-solar Confirmed Planet

t = '140500'
print(t+'\t\t'+targetsDF.filter(targetsDF.TgtId == t).collect()[0][4])

140500		http://simbad.u-strasbg.fr/simbad/sim-coo?Coord=229.27453575+71.8239011


In [22]:
# This example won't run unless confirmedSignalDB.parquet is available. This query is just an example to show how to iterate 
# through the rows of a dataframe and display a clickable link for each target in the table. You would replace the query below 
# with your own signalDB query that isolates targets of interest based on desired  filter conditions in place of the simple IN clause

confirmed = sqlContext.read.parquet('confirmedSignalDB.parquet')
confirmed.registerTempTable("confirmed")
targets_of_interest = sqlContext.sql("SELECT DISTINCT TgtId FROM confirmed WHERE TgtId IN ('161214', '160189','161975', '161848', '161790')")

#Join the table with targets of interest with the targetDB to create a new table with just the targetID and the URL link
targets_with_links = targets_of_interest.join(targetsDF, targets_of_interest.TgtId == targetsDF.TgtId).select(targets_of_interest.TgtId, targetsDF.simbadURL).toPandas()

In [23]:
print('Targets of Interest\n')
print('Target\t\tInformation link\n')

for index, row in targets_with_links.iterrows():
    print(row['TgtId']+'\t\t'+row['simbadURL'])

Targets of Interest

Target		Information link

161214		http://simbad.u-strasbg.fr/simbad/sim-coo?Coord=281.61883335+46.81416667
161790		http://simbad.u-strasbg.fr/simbad/sim-coo?Coord=285.18285+43.8311
161848		http://simbad.u-strasbg.fr/simbad/sim-coo?Coord=284.90295+43.954
160189		http://simbad.u-strasbg.fr/simbad/sim-coo?Coord=284.92183335+45.97222222
161975		http://simbad.u-strasbg.fr/simbad/sim-coo?Coord=284.9058+43.9492
