In [1]:
from urllib.request import urlopen
import pandas as pd
import os
HOME = os.environ['HOME']

In [2]:
url = 'http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2001/index/index.tab'
base_url = 'http://pds-rings.seti.org/volumes/COISS_2xxx/'

In [6]:
labelfname = os.path.join(HOME, 'data/ciss/index.lbl')
tabfname = os.path.join(HOME, 'data/ciss/index.tab')

In [8]:
df = pd.read_csv(tabfname, sep=',', skipinitialspace=True, header=None)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3745 entries, 0 to 3744
Columns: 139 entries, 0 to 138
dtypes: float64(64), int64(25), object(50)
memory usage: 4.0+ MB


# Parsing label file

In [10]:
def tokenize(line):
    tokens = line.split('=')
    return [i.strip() for i in tokens]

In [11]:
class PDSColumn:
    def __init__(self, buffer):
        d = {}
        i = 0
        for line in buffer:
            if i > 10:
                break
            tokens = tokenize(line)
            if tokens[0] == 'OBJECT':
                continue
            elif tokens[0] == 'END_OBJECT':
                break
            try:
                d[tokens[0].strip()] = tokens[1].strip()
            except IndexError:
                continue
            i += 1
        self.d = d

In [12]:
class PDSLabelReader:
    def __init__(self, fname):
        f = open(fname)
        # pre-parse the stuff away we don't need
        while True:
            tokens = tokenize(f.readline())
            try:
                if tokens[1] == 'COLUMN':
                    break
            except IndexError:
                continue
        self.f = f
        self.parse_columns()
        self.expand_columns()
    def parse_columns(self):
        columns = []
        while True:
            col = PDSColumn(self.f)
            if len(col.d) != 0:
                columns.append(col)
            else:
                break
        self.columns = columns
    def expand_columns(self):
        final_cols = []
        for pdscol in self.columns:
            dic = pdscol.d
            if 'ITEMS' in dic:
                no_items = int(dic['ITEMS'])
                for i in range(no_items):
                    final_cols.append("{}_{}".format(dic['NAME'],i))
            else:
                final_cols.append(dic['NAME'])
        self.final_cols = final_cols
labelfile = PDSLabelReader(labelfname)
df.columns = labelfile.final_cols

In [13]:
def map_string_to_bool(x):
    if x.strip() == 'YES':
        return True
    else:
        return False

In [14]:
df.RINGS_FLAG = df.RINGS_FLAG.map(map_string_to_bool)

In [15]:
rings = df[df.RINGS_FLAG]

In [16]:
rings.TARGET_NAME.value_counts()

SATURN                      2935
SKY                           43
EPIMETHEUS                     7
ATLAS                          7
ENCELADUS                      7
JANUS                          6
PANDORA                        6
PROMETHEUS                     5
DIONE                          3
CALYPSO                        2
HELENE                         1
dtype: int64

In [86]:
s = str(res.read())

In [87]:
from html.parser import HTMLParser

In [93]:
class MyHTMLParser(HTMLParser):
    pdfs = []
    def handle_data(self, data):
        if data.endswith('.pdf'):
            self.pdfs.append(data)
parser = MyHTMLParser()
parser.feed(s)

In [103]:
from urllib.request import urlretrieve
import sys
def report(blocknr, blocksize, size):
    current = blocknr*blocksize
    sys.stdout.write("\r{0:.2f}%".format(100.0*current/size))

def downloadFile(url):
    print("\n",url)
    fname = url.split('/')[-1]
    print(fname)
    urlretrieve(url, fname, report)

In [106]:
for url in parser.pdfs:
    dlurl = "http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/"
    downloadFile(dlurl + url)


 http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/Albers-NewMoonInducedStructure.pdf
Albers-NewMoonInducedStructure.pdf
100.13%
 http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/Becker-MeasuringSubCentimeterParticles.pdf
Becker-MeasuringSubCentimeterParticles.pdf
100.01%
 http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/Bloom-HousekeepingAnnouncement.pdf
Bloom-HousekeepingAnnouncement.pdf
100.41%
 http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/Bradley-UVISReflectanceSpectraComparison.pdf
Bradley-UVISReflectanceSpectraComparison.pdf
100.33%
 http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/Brooks-TowardsUnderstandingThermalThroughput.pdf
Brooks-TowardsUnderstandingThermalThroughput.pdf
100.03%
 http://lasp.colorado.edu/media/projects/workshops/2014planetaryrings/Presentations/Colwell-Studying_Sizes_and_Shapes.pdf
Col

# Using JSON

In [185]:
import json

In [186]:
json.__version__

'2.0.9'

In [5]:
url = 'http://pds-rings-tools.seti.org/opus/api/files.json?&primaryfilespec=N1695760475_1'

In [12]:
from urllib.request import urlopen

In [22]:
urlopen(url).read().decode()

'{"data": {"S_IMG_CO_ISS_1695760475_N": {"RAW_IMAGE": ["http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2069/data/1695427520_1695761375/N1695760475_1.LBL", "http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2069/data/1695427520_1695761375/N1695760475_1.IMG", "http://pds-rings.seti.org/volumes/COISS_2xxx//COISS_2069/data/1695427520_1695761375/N1695760475_1.IMG", "http://pds-rings.seti.org/volumes/COISS_2069/LABEL/TLMTAB.FMT", "http://pds-rings.seti.org/volumes/COISS_2069/LABEL/PREFIX3.FMT"], "CALIBRATED": ["http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2069/LABEL/TLMTAB.FMT", "http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2069/LABEL/PREFIX3.FMT", "http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2069/data/1695427520_1695761375/N1695760475_1_CALIB.LBL", "http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2069/data/1695427520_1695761375/N1695760475_1_CALIB.IMG", "http://pds-rings.seti.org/derived/COISS_2xxx//COISS_2069/data/1695427520_1695761375/N1695760475_1_CALIB.IMG"]}}

In [190]:
import json
from urllib.request import urlopen
from urllib.parse import urlparse

class OPUSImage(object):

  def __init__(self, jsonlist):
    self.jsonlist = jsonlist
    for item in jsonlist:
      parsed = urlparse(item)
      if '//' in parsed.path:
        continue
      if item.upper().endswith(".LBL"):
        self.label_url = item
      elif item.upper().endswith('.IMG'):
        self.image_url = item
  def __repr__(self):
    s = "Label:\n{}\nImage:\n{}".format(self.label_url,
                                      self.image_url)
    return s
  
class OPUSObsID(object):

  def __init__(self, jsondic):
    self.jsondic = jsondic
    self.raw = OPUSImage(jsondic['RAW_IMAGE'])
    self.calib = OPUSImage(jsondic['CALIBRATED'])

  def __call__(self):
    s = "Raw:\n{}\nCalibrated:\n{}".format(self.raw, self.calib)
    return s

  def __repr__(self):
    return self.__call__()

  
class OPUS(object):
  scheme = 'http'
  netloc = 'pds-rings.seti.org'
  base_api = '/opus/api'

  def __init__(self, url):
    self.data = json.loads(urlopen(url).read().decode())['data']
    

In [135]:
for i in imgdata['raw_image'.upper()]:
  print(urlparse(i))

ParseResult(scheme='http', netloc='pds-rings.seti.org', path='/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.LBL', params='', query='', fragment='')
ParseResult(scheme='http', netloc='pds-rings.seti.org', path='/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.IMG', params='', query='', fragment='')
ParseResult(scheme='http', netloc='pds-rings.seti.org', path='/volumes/COISS_2xxx//COISS_2017/data/1512175979_1512194653/N1512191062_1.IMG', params='', query='', fragment='')
ParseResult(scheme='http', netloc='pds-rings.seti.org', path='/derived/COISS_2xxx/COISS_2017/LABEL/TLMTAB.FMT', params='', query='', fragment='')
ParseResult(scheme='http', netloc='pds-rings.seti.org', path='/derived/COISS_2xxx/COISS_2017/LABEL/PREFIX3.FMT', params='', query='', fragment='')


In [86]:
url

'http://pds-rings-tools.seti.org/opus/api/image/small/S_IMG_CO_ISS_1695760475_N.html'

In [87]:
from IPython.display import HTML

In [91]:
HTML(url)

In [103]:
url = 'http://pds-rings-tools.seti.org/opus/api/files.json?&target=S+RINGS&instrumentid=Cassini+ISS&projectedradialresolution1=&projectedradialresolution2=0.5'

In [110]:
data = get_url(url)

In [198]:
for obsid, item in data.items():
  

{'RAW_IMAGE': ['http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.LBL', 'http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.IMG', 'http://pds-rings.seti.org/volumes/COISS_2xxx//COISS_2017/data/1512175979_1512194653/N1512191062_1.IMG', 'http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2017/LABEL/TLMTAB.FMT', 'http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2017/LABEL/PREFIX3.FMT'], 'CALIBRATED': ['http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/LABEL/TLMTAB.FMT', 'http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/LABEL/PREFIX3.FMT', 'http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1_CALIB.LBL', 'http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1_CALIB.IMG', 'http://pds-rings.seti.org/derived/COISS_2xxx//COISS_2017/data/1512175979_1512194653/N1512191062_1_CALIB.IMG']}
{'RAW_IMAGE': ['ht

In [192]:

obsid.raw.label_url

'http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.LBL'

In [189]:
obsid

Raw:
Label:
http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.LBLImage:
http://pds-rings.seti.org/volumes/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1.IMG
Calibrated:
Label:
http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1_CALIB.LBLImage:
http://pds-rings.seti.org/derived/COISS_2xxx/COISS_2017/data/1512175979_1512194653/N1512191062_1_CALIB.IMG

In [177]:
for item in obsid.raw.jsonlist:
  if item.upper().endswith('.LBL'):
    print(type(item))

<class 'str'>


In [147]:
obsid.raw.image_url

'http://pds-rings.seti.org/volumes/COISS_2xxx//COISS_2017/data/1512175979_1512194653/N1512191062_1.IMG'

In [133]:
from urllib.parse import urlparse

In [138]:
from urllib.request import urlunparse

In [139]:
urlunparse?