<a href="https://colab.research.google.com/github/nmcardoso/splus-website/blob/master/sdss_stamps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Image Options:
# p = ["G", "L", "P", "S", "O", "B", "F", "M", "Q", "I", "A", "X"]
# f = ["Grid", "Label", "PhotoObjs", "SpecObjs", "Outline",
#     "BoundingBox", "Fields", "Masks", "Plates", "InvertImage", "APOGEE", "2MASS Images"];

In [0]:
# url_format: http://skyserver.sdss.org/dr15/SkyServerWS/ImgCutout/getjpeg?TaskName=Skyserver.Chart.Navi&ra=229.525575753922&dec=42.7458537608544&scale=0.3&width=300&height=300&opt=X

In [0]:
import requests
from IPython.display import Image, display
import pandas as pd
import shutil
import os
import tarfile
from google.colab import drive
import copy

In [0]:
drive.mount('/gdrive')

Mounted at /gdrive


In [0]:
base_url = 'http://skyserver.sdss.org/dr15/SkyServerWS/ImgCutout/getjpeg'
downloads_path = '/content/downloads'
dataset_path = '/gdrive/My Drive/ml_datasets/splus_crossmatch.csv'
data = None

In [0]:
def load_dataset(force=False):
  global data
  if (not data or force):
    data = pd.read_csv(dataset_path)

def get_params(ra, dec, scale=0.2, width=200, height=200, opt=''):
  return {
      'TaskName': 'Skyserver.Chart.Navi',
      'ra': ra,
      'dec': dec,
      'scale': scale,
      'width': width,
      'height': height,
      'opt': opt
  }

def prepare_downloads_dir(classes):
  shutil.rmtree(downloads_path)
  os.mkdir(downloads_path)
  for c in classes:
    os.mkdir(f'{downloads_path}/{c}')

def download_image(url, params, filename, output_path=downloads_path):
  resp = requests.get(url, params)
  if (resp.status_code == 200):
    with open(f'{output_path}/{filename}', 'wb') as f:
      f.write(resp.content)
  else:
    raise Exception(f'Error: {resp.status_code} [{filename}]')

def make_tarfile(source, output):
  with tarfile.open(output, "w:gz") as tar:
    tar.add(source, arcname=os.path.basename(source))
  print(f'Tarfile created successfully [{output}]')

def batch_download(data, classes_column, limit=None):
  classes = list(final_data.groupby(classes_column).indices.keys())
  prepare_downloads_dir(classes)
  total = limit if limit else data.shape[0]
  i = 0
  for index, row in data.iterrows():
    if (limit and i >= limit):
      break

    try:
      download_image(
          base_url, 
          get_params(row['RA_2'], row['Dec_2'], scale=0.15), 
          f'{row["ID"]}.jpg',
          output_path=f'{downloads_path}/{row[classes_column]}'
      )
      print(f'Success [{row["ID"]}] ({(((i + 1)/total)*100):.2f}%)')
    except Exception as error:
      print(error)

    i += 1

In [0]:
load_dataset()

print(*data.columns, sep='\n')

In [0]:
kmap = {}
for i, r in data.iterrows():
  klass = r['gz2class']
  kmap[klass] = kmap[klass] + 1 if klass in kmap else 1

sorted_kmap = sorted(kmap.items(), key=lambda x: x[1], reverse=True)
print(*[f'{x[0]}: {x[1]}' for x in sorted_kmap[:50]], sep='\n')

Ei: 6166
Er: 4294
Ec: 1683
Ser: 388
Sc2m: 316
Sc?t: 245
Ei(m): 157
SBc2m: 139
Er(m): 130
Sen: 126
Sc2t: 123
Sc2l: 121
Ei(i): 103
Sc?m: 91
Er(o): 90
SBc2l: 71
Sb2m: 69
Ei(o): 68
Sb: 66
Sb(r): 56
Sc: 52
SBb2m: 50
Sc3m: 44
Sd(i): 40
Sb2l: 40
Sb?t: 35
SBb: 32
Sb2t: 31
SBb2t: 30
Sc3t: 29
SBc: 29
SBc2t: 26
Sc1t: 22
Sc(i): 22
Sc1m: 21
Sd2m: 20
Sd2l: 18
Er(i): 17
Ei(r): 16
A: 13
SBb2l: 12
Sd?t: 12
Sd: 12
Er(r): 12
SBd2m: 12
SBb?t: 11
Sc1l: 11
Ec(m): 11
Sb?m: 11
Sc+t: 10


In [0]:
valid_classes = ['Ei', 'Er', 'Ec', 'Ser', 'Sc2m']

final_data = data[data['gz2class'].isin(valid_classes)]
final_data.describe()

Unnamed: 0,specobjid,dr8objid,dr7objid,ra_1,dec_1,total_classifications,total_votes,t01_smooth_or_features_a01_smooth_count,t01_smooth_or_features_a01_smooth_weight,t01_smooth_or_features_a01_smooth_fraction,t01_smooth_or_features_a01_smooth_weighted_fraction,t01_smooth_or_features_a01_smooth_debiased,t01_smooth_or_features_a01_smooth_flag,t01_smooth_or_features_a02_features_or_disk_count,t01_smooth_or_features_a02_features_or_disk_weight,t01_smooth_or_features_a02_features_or_disk_fraction,t01_smooth_or_features_a02_features_or_disk_weighted_fraction,t01_smooth_or_features_a02_features_or_disk_debiased,t01_smooth_or_features_a02_features_or_disk_flag,t01_smooth_or_features_a03_star_or_artifact_count,t01_smooth_or_features_a03_star_or_artifact_weight,t01_smooth_or_features_a03_star_or_artifact_fraction,t01_smooth_or_features_a03_star_or_artifact_weighted_fraction,t01_smooth_or_features_a03_star_or_artifact_debiased,t01_smooth_or_features_a03_star_or_artifact_flag,t02_edgeon_a04_yes_count,t02_edgeon_a04_yes_weight,t02_edgeon_a04_yes_fraction,t02_edgeon_a04_yes_weighted_fraction,t02_edgeon_a04_yes_debiased,t02_edgeon_a04_yes_flag,t02_edgeon_a05_no_count,t02_edgeon_a05_no_weight,t02_edgeon_a05_no_fraction,t02_edgeon_a05_no_weighted_fraction,t02_edgeon_a05_no_debiased,t02_edgeon_a05_no_flag,t03_bar_a06_bar_count,t03_bar_a06_bar_weight,t03_bar_a06_bar_fraction,...,t11_arms_number_a32_2_debiased,t11_arms_number_a32_2_flag,t11_arms_number_a33_3_count,t11_arms_number_a33_3_weight,t11_arms_number_a33_3_fraction,t11_arms_number_a33_3_weighted_fraction,t11_arms_number_a33_3_debiased,t11_arms_number_a33_3_flag,t11_arms_number_a34_4_count,t11_arms_number_a34_4_weight,t11_arms_number_a34_4_fraction,t11_arms_number_a34_4_weighted_fraction,t11_arms_number_a34_4_debiased,t11_arms_number_a34_4_flag,t11_arms_number_a36_more_than_4_count,t11_arms_number_a36_more_than_4_weight,t11_arms_number_a36_more_than_4_fraction,t11_arms_number_a36_more_than_4_weighted_fraction,t11_arms_number_a36_more_than_4_debiased,t11_arms_number_a36_more_than_4_flag,t11_arms_number_a37_cant_tell_count,t11_arms_number_a37_cant_tell_weight,t11_arms_number_a37_cant_tell_fraction,t11_arms_number_a37_cant_tell_weighted_fraction,t11_arms_number_a37_cant_tell_debiased,t11_arms_number_a37_cant_tell_flag,RA_2,Dec_2,X,Y,z_petro,i_petro,r_petro,g_petro,FWHM,MUMAX,A,B,PROB_GAL,Separation
count,12847.0,12560.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,...,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0,12847.0
mean,5.631999e+17,1.237658e+18,5.878301e+17,149.446839,-0.010881,44.918113,159.104149,34.667082,34.491701,0.769213,0.779385,0.766252,0.546275,8.043512,7.959876,0.181884,0.183328,0.19989,0.018604,2.207519,1.653197,0.048886,0.037282,0.036352,0.0,2.416907,2.362078,0.196941,0.190042,0.190042,0.033237,5.628007,5.598889,0.784222,0.788475,0.788475,0.041255,0.892037,0.880144,0.115315,...,0.15472,0.015957,0.107963,0.106604,0.014136,0.013008,0.013008,7.8e-05,0.03495,0.03384,0.007226,0.006584,0.006584,0.0,0.034405,0.033533,0.013184,0.012598,0.012598,0.0,0.902156,0.898174,0.297659,0.295435,0.295435,0.007161,149.446813,-0.010848,5812.741465,5586.06935,15.826534,16.359206,16.842162,17.385443,7.151059,15.420005,5.906142,3.98678,0.905303,0.280811
std,2.879587e+17,1397811000000.0,135128200000000.0,152.374336,0.725399,4.769397,32.200189,8.574477,8.560189,0.164905,0.167074,0.168027,0.497873,7.488746,7.469278,0.170016,0.17255,0.176194,0.135125,1.803407,1.623738,0.039423,0.036301,0.035621,0.0,5.659352,5.638153,0.302751,0.301679,0.301679,0.179263,5.98819,5.980586,0.320503,0.322285,0.322285,0.198887,2.099006,2.093152,0.206667,...,0.303697,0.125314,0.712713,0.710703,0.083965,0.077477,0.077477,0.008823,0.254694,0.251714,0.064629,0.060273,0.060273,0.0,0.207817,0.205017,0.097449,0.09493,0.09493,0.0,1.60093,1.598441,0.405004,0.404335,0.404335,0.084324,152.374345,0.725397,2626.627041,2410.454187,6.919599,3.736211,0.695474,5.586092,8.207547,0.829699,4.068108,1.865983,0.117474,0.163322
min,-9999.0,1.237657e+18,5.877308e+17,0.006464,-1.257865,25.0,79.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0065,-1.2578,886.006,853.308,-99.0,-99.0,7.86,-99.0,2.37,12.05,1.87,1.16,0.5,0.001855
25%,4.357255e+17,1.237657e+18,5.877312e+17,19.36378,-0.64292,42.0,141.0,31.0,30.375,0.7115,0.722,0.702052,0.0,3.0,3.0,0.068,0.069,0.079,0.0,1.0,0.3705,0.022,0.009,0.008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.667,0.667,0.667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.36375,-0.6429,3579.011,3533.2265,15.92,16.18,16.55,17.3,4.08,14.88,4.25,3.04,0.863,0.163245
50%,4.481241e+17,1.237657e+18,5.877315e+17,45.51903,-0.016915,45.0,152.0,36.0,36.0,0.818,0.83,0.814,1.0,6.0,5.321,0.125,0.124,0.143493,0.0,2.0,1.099,0.043,0.027,0.027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.519,-0.0169,5849.279,5552.522,16.42,16.66,17.04,17.8,5.31,15.45,5.09,3.6,0.957,0.253305
75%,4.627887e+17,1.23766e+18,5.880155e+17,328.68161,0.615204,48.0,168.0,40.0,40.0,0.875,0.888,0.879,1.0,10.0,10.0,0.238,0.239,0.27,0.0,3.0,2.167,0.068,0.052,0.051,0.0,2.0,1.544,0.297,0.2595,0.2595,0.0,7.0,7.0,1.0,1.0,1.0,0.0,1.0,1.0,0.167,...,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.667,0.625,0.625,0.0,328.6816,0.61525,8049.6105,7578.158,16.72,16.96,17.34,18.15,8.21,15.98,6.37,4.38,0.99,0.365629
max,2.971319e+18,1.23766e+18,5.880155e+17,359.98883,1.258636,66.0,448.0,61.0,61.0,1.0,1.0,1.0,1.0,53.0,53.0,1.0,1.0,1.0,1.0,24.0,24.0,0.5,0.484,0.484,0.0,50.0,50.0,1.0,1.0,1.0,1.0,52.0,52.0,1.0,1.0,1.0,1.0,25.0,25.0,1.0,...,1.0,1.0,19.0,19.0,1.0,1.0,1.0,1.0,6.0,6.0,1.0,1.0,1.0,0.0,5.0,5.0,1.0,1.0,1.0,0.0,16.0,16.0,1.0,1.0,1.0,1.0,359.9888,1.2587,10649.478,10398.768,17.53,17.74,18.18,19.17,324.34,17.95,112.8,39.99,1.0,0.998808


In [0]:
batch_download(final_data, 'gz2class')

Success [SPLUS.STRIPE82-0099.13710.griz] (0.01%)
Success [SPLUS.STRIPE82-0099.18853.griz] (0.02%)
Success [SPLUS.STRIPE82-0101.10970.griz] (0.02%)
Success [SPLUS.STRIPE82-0101.13310.griz] (0.03%)
Success [SPLUS.STRIPE82-0101.17108.griz] (0.04%)
Success [SPLUS.STRIPE82-0101.18989.griz] (0.05%)
Success [SPLUS.STRIPE82-0101.19089.griz] (0.05%)
Success [SPLUS.STRIPE82-0103.17908.griz] (0.06%)
Success [SPLUS.STRIPE82-0103.11839.griz] (0.07%)
Success [SPLUS.STRIPE82-0103.18950.griz] (0.08%)
Success [SPLUS.STRIPE82-0103.09685.griz] (0.09%)
Success [SPLUS.STRIPE82-0103.18223.griz] (0.09%)
Success [SPLUS.STRIPE82-0103.13744.griz] (0.10%)
Success [SPLUS.STRIPE82-0103.15392.griz] (0.11%)
Success [SPLUS.STRIPE82-0103.15231.griz] (0.12%)
Success [SPLUS.STRIPE82-0103.12462.griz] (0.12%)
Success [SPLUS.STRIPE82-0103.15574.griz] (0.13%)
Success [SPLUS.STRIPE82-0103.10375.griz] (0.14%)
Success [SPLUS.STRIPE82-0103.15655.griz] (0.15%)
Success [SPLUS.STRIPE82-0105.11679.griz] (0.16%)
Success [SPLUS.STRIP

In [0]:
make_tarfile(downloads_path, '/gdrive/My Drive/ml_datasets/sdss_stripe82.tar.gz')

Tarfile created successfully [/gdrive/My Drive/ml_datasets/sdss_stripe82.tar.gz]
