# Asteroids, Comets, and Small Body Spitzer Dataset

In [1]:
import pandas as pd

import urllib.request as urllib
from bs4 import BeautifulSoup

bodies = urllib.urlopen("https://pdssbn.astro.umd.edu/data_other/Spitzer.shtml")
soup = BeautifulSoup(bodies.read())
classifications = urllib.urlopen("https://pdssbn.astro.umd.edu/data_other/objclass.shtml")
soup2 = BeautifulSoup(classifications.read())

bodydf = pd.DataFrame(columns = ["Name", "NAIF ID Number", "Classification", "Distance", "Records", "Link"])
classificationdf = pd.DataFrame(columns = ["Abbreviation", "Title", "Distance", "Description"])


In [2]:
atags = soup.find_all("a")
bodytypes = []

for atag in atags:
    if atag.get("href") != None:
        if "sptz_01_COMET" in atag.get("href"):
            bodytypes.append("Comet")
        elif "sptz_02_INNER" in atag.get("href"):
            bodytypes.append("Inner Solar System Asteroids")
        elif "sptz_03_MAIN" in atag.get("href"):
            bodytypes.append("Main Belt Asteroids")
        elif "sptz_04_OUTER" in atag.get("href"):
            bodytypes.append("Outer Solar System Asteroids")
        elif "sptz_05_SAT" in atag.get("href"):
            bodytypes.append("Satellite")
            
bodydf = pd.DataFrame(index = bodytypes, columns = ["Name", "NAIF ID Number", "Classification", "Records", "Link"])


In [3]:
trtags = soup.find_all("tr")
names = []

for tag in trtags:
    if tag.find("td").get_text() != "Object Name":
        names.append(tag.find("td").get_text())
        
bodydf["Name"] = names

In [4]:
trtags = soup.find_all("tr")
atags = []
idnumbers = []
for tag in trtags:
    if tag.find("a", {"onclick":"this.target='_blank'"}) != None:
        idnumbers.append(tag.find("a", {"onclick":"this.target='_blank'"}).get_text())

bodydf["NAIF ID Number"] = idnumbers

In [5]:
atags = soup.find_all("a")
classifications = []
for atag in atags:
    if atag.get("href") != None:
        if "objclass" in atag.get("href"):
            classifications.append(atag.get_text())

bodydf["Classification"] = classifications


In [32]:
trtags = soup2.find_all("tr")
classdistance = []

for trtag in trtags:
    for tdtag in (trtag.find_all("td"))[2:3]:
        if tdtag.get_text() != "Description":
            left, delimit, right = tdtag.get_text().partition("(")
            left2, delimit2, right2 = right.partition(")")
            classdistance.append(left2)

classificationdf["Distance"] = classdistance

In [33]:
trtags = soup.find_all("tr")
records = []

for trtag in trtags:
    for tdtag in (trtag.find_all("td"))[3:]:
        if tdtag.get_text() != "Records":
            records.append(tdtag.get_text())
            
bodydf["Records"] = records

In [34]:
trtags = soup.find_all("tr")
links = []

for tag in trtags:
    if tag.find("td").find("a") != None:
        links.append("https://pdssbn.astro.umd.edu/data_other/" + tag.find("td").find("a").get("href"))
    
bodydf["Link"] = links

In [35]:
trtags = soup2.find_all("tr")
abbreviations = []

for trtag in trtags:
    if trtag.find("td").find("a") != None:
        abbreviations.append(trtag.find("td").find("a").get("name"))

classificationdf["Abbreviation"] = abbreviations

In [36]:
trtags = soup2.find_all("tr")
titles = []

for trtag in trtags:
    for tdtag in (trtag.find_all("td"))[1:2]:
        if tdtag.get_text() != "Title":
            titles.append(tdtag.get_text())
            
classificationdf["Title"] = titles

In [37]:
trtags = soup2.find_all("tr")
descriptions = []

for trtag in trtags:
    for tdtag in (trtag.find_all("td"))[2:3]:
        if tdtag.get_text() != "Description":
            descriptions.append(tdtag.get_text())
            
classificationdf["Description"] = descriptions

In [119]:
bodydistance = []
abbrevstrings = []
distancestrings = []
alltuples = []

for abbrev in classificationdf["Abbreviation"]:
    abbrevstrings.append(abbrev)
for distance in classificationdf["Distance"]:
    distancestrings.append(distance)
for i in range(0, len(abbrevstrings)):
    alltuples.append((abbrevstrings[i], distancestrings[i]))

for classification in bodydf["Classification"]:
    if classification != "":
        for etuple in alltuples:
            if etuple[0] == classification:
                bodydistance.append(etuple[1])
    else:
        bodydistance.append("N/A")
            
bodydf["Distance"] = bodydistance


In [120]:
classificationdf

Unnamed: 0,Abbreviation,Title,Description,Distance
0,COM,Comet,Comet orbit not matching any defined orbit class.,
1,CTc,Chiron-type Comet,"Chiron-type comet, as defined by Levison and D...",TJupiter > 3; a > aJupiter
2,ETc,Encke-type Comet,"Encke-type comet, as defined by Levison and Du...",TJupiter > 3; a < aJupiter
3,HTC,Halley-type Comet*,"Halley-type comet, classical definition (20 y ...",20 y < P < 200 y
4,HYP,Hyperbolic Comet,Comets on hyperbolic orbits (e > 1.0).,e > 1.0
5,JFc,Jupiter-family Comet,"Jupiter-family comet, as defined by Levison an...",2 < TJupiter < 3
6,JFC,Jupiter-family Comet*,"Jupiter-family comet, classical definition (P ...",P < 20 y
7,PAR,Parabolic Comet,Comets on parabolic orbits (e = 1.0).,e = 1.0
8,AMO,Amor,Near-Earth asteroid orbits similar to that of ...,a > 1.0 AU; 1.017 AU < q < 1.3 AU
9,APO,Apollo,Near-Earth asteroid orbits which cross the Ear...,a > 1.0 AU; q < 1.017 AU


In [121]:
bodydf

Unnamed: 0,Name,NAIF ID Number,Classification,Records,Link,Distance
Comet,148P/Anderson-LINEAR 1 (1963 W1),1000328,JFc,3,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,50P/Arend 1 (1951 T1),1000001,JFc,2,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,49P/Arend-Rigaux 1 (1951 C2),1000002,JFc,4,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,47P/Ashbrook-Jackson 1 (1948 Q1),1000003,JFc,2,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,19P/Borrelly 1 (1904 Y2),1000005,JFc,2,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,16P/Brooks 2 (1889 N1),1000008,JFc,3,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,P/Broughton 1 (2005 T5),1001674,JFc,3,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,C/Cardinal (2008 T2),1003000,HYP,3,https://pdssbn.astro.umd.edu/data_other/sptz_0...,e > 1.0
Comet,P/Catalina 3 (2005 JQ5),1000567,JFc,2,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3
Comet,P/Catalina-NEAT 1 (2005 JD108),1001650,JFc,3,https://pdssbn.astro.umd.edu/data_other/sptz_0...,2 < TJupiter < 3


In [122]:
bodydf.to_csv('res/asteroids_comets_bodies_dataset1.csv')

In [123]:
classificationdf.to_csv('res/classifications_dataset2.csv')