# Data

**This notebook is about querying and downloading the data from the SDSS Release 15 Database.**

In [2]:
import numpy
import pandas as pd

import matplotlib.image as mpimg

import requests

import PIL as pil
import binascii
import io

## Querying

**Query 1,000 random star/galaxy position, magnitude, and type information of star/galaxy visual magnitude range 15.0~20.0 each (10,000 samples in total).** The expression transforming filter colors to visual magnitude is from [Jester et al. (2005), Transformations between SDSS magnitudes and other systems Introduction](http://www.sdss3.org/dr8/algorithms/sdssUBVRITransform.php#Jester2005).

Send the following SQL query codes on [SDSS SQL Search](http://skyserver.sdss.org/dr15/en/tools/search/sql.aspx) to get `.csv` files. 

for s.class = 'GALAXY' and 'STAR'  
for `v=0.42*p.g+0.58*p.r` from (15,16) to (19,20)  
```SQL
SELECT TOP 1000
   p.objid, p.ra, p.dec,
   p.u, p.g, p.r, p.i, p.z,
   p.run, p.rerun, p.camcol, p.field,
   p.score, p.type, s.class, s.subclass
FROM PhotoObj AS p
JOIN SpecObj AS s ON s.bestobjid = p.objid
WHERE
   p.score > 0.80
   AND p.CLEAN = 1
   AND p.type = 3
   AND s.class = 'GALAXY'
   AND (0.42*p.g+0.58*p.r) BETWEEN 15 AND 16
ORDER BY p.objid
```

## Downloading

**Use the position information to request and download image data from [SDSS Image Cutout API](http://skyserver.sdss.org/dr15/en/help/docs/api.aspx#imgcutout). Save the image in `NumPy` array to `.npy` files.**

In [3]:
path_qry = "../data/query/"
fname = 's19'

f = pd.read_csv(path_qry + fname + ".csv")
f[0:10]

Unnamed: 0,objid,ra,dec,u,g,r,i,z,run,rerun,camcol,field,score,type,class,subclass
0,1237645879562862699,15.896126,1.264845,20.14134,19.28787,19.04397,18.96897,18.79573,94,301,6,274,0.80791,6,STAR,A0
1,1237645942904455278,54.683424,0.216919,20.79859,19.54183,19.04607,18.82743,18.74658,109,301,4,134,0.967092,6,STAR,F9
2,1237645942904520914,54.923634,0.221918,23.40751,20.02904,18.51576,17.8002,17.40633,109,301,4,135,0.968026,6,STAR,M0
3,1237645942905372939,56.812457,0.219559,23.27591,20.35437,18.82604,17.39119,16.5816,109,301,4,148,0.967338,6,STAR,M4
4,1237645942905897089,57.975144,0.175448,20.33587,19.2508,18.86774,18.7108,18.61204,109,301,4,156,0.949746,6,STAR,A0p
5,1237645942906028214,58.321007,0.145993,23.17228,20.57825,19.14963,18.55397,18.15417,109,301,4,158,0.939912,6,STAR,K5
6,1237645942906093728,58.521446,0.185547,22.81635,20.2268,18.74288,17.21303,16.4029,109,301,4,159,0.94558,6,STAR,M4
7,1237645942906290409,58.925466,0.036891,23.22584,20.86104,19.08917,17.84438,17.26167,109,301,4,162,0.965375,6,STAR,M2
8,1237645943435034856,40.292295,0.45806,21.32218,20.11019,19.63111,19.45875,19.27914,109,301,5,38,0.930998,6,STAR,G4V (32923)
9,1237645943973609570,44.1903,0.889276,20.75858,20.02213,19.67522,19.57206,19.56455,109,301,6,64,0.889469,6,STAR,L1


In [4]:
width, height, scale = 64, 64, 0.8

img_arr = np.empty([len(f), 32, 32, 3])
for i in range(len(f)):
    # form and get request
    d = {'ra':f['ra'].iloc[i], 'dec':f['dec'].iloc[i], 'width':width, 'height':height, 'scale':scale}
    r = requests.get("http://skyserver.sdss.org/dr15/SkyServerWS/ImgCutout/getjpeg", 
                     params=d, stream=True)
    
    # convert the respond to PIL images
    pic_stream = io.BytesIO(r.content)
    pic = pil.Image.open(pic_stream)
    pic = pic.crop((16, 16, 48, 48))  # crop to 32*32 

#     path_pic = "../data/image/" + fname + "/"
#     pic.save(path_pic + f['objid'].ix[i].astype(str) + ".jpg")

    # convert PIL images to np arrays
    img = mpimg.pil_to_array(pic)
    img_arr[i, :] = img
    
    if i % 50 == 0:
        print(i, img_arr.nbytes/1000, end=' | ')

path_arr = "../data/array/"
np.save(path_arr+fname, img_arr)

0 24576.0 | 50 24576.0 | 100 24576.0 | 150 24576.0 | 200 24576.0 | 250 24576.0 | 300 24576.0 | 350 24576.0 | 400 24576.0 | 450 24576.0 | 500 24576.0 | 550 24576.0 | 600 24576.0 | 650 24576.0 | 700 24576.0 | 750 24576.0 | 800 24576.0 | 850 24576.0 | 900 24576.0 | 950 24576.0 | 