# Adding Caldwell Catalogue

In [132]:
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_row', None)

Add:
1. Raw list from wikipedia
1. Images
1. Dec RA from NGC/IC catalogue
1. params
1. Rename types

In [143]:
caldwell_df = pd.read_csv('raw_data/caldwell_raw.csv') 
caldwell_df = caldwell_df[:-1]
del caldwell_df['Image']
caldwell_df.at[13, "NGC number"] = "NGC 869"
caldwell_df.rename({"Caldwell number": "params", "NGC number": "ngcic", "Distance(1000 ly)": "dist", "Common name": "common"}, axis=1, inplace=True)
caldwell_df = caldwell_df.fillna('-')
caldwell_df.head()

Unnamed: 0,params,ngcic,common,Type,dist,Constellation,Magnitude
0,C1,NGC 188,-,Open Cluster,4.8,Cepheus,8.1
1,C2,NGC 40,Bow-Tie Nebula,Planetary Nebula,3.5,Cepheus,11.0
2,C3,NGC 4236,-,Barred Spiral Galaxy,7000.0,Draco,9.7
3,C4,NGC 7023,Iris Nebula,Open Cluster and Nebula,1.4,Cepheus,7.0
4,C5,IC 342,Hidden Galaxy,Spiral Galaxy,10000.0,Camelopardalis,9.0


In [151]:
for i in range(len(caldwell_df.Type)):
    if caldwell_df["Type"][i].find("Galaxy") != -1:
        caldwell_df["Type"][i] = "Galaxy"
    if caldwell_df["Type"][i] == "Nebula":
        caldwell_df["Type"][i] = "Emission nebula"
    if caldwell_df["Type"][i] == "Open Cluster and Nebula":
        caldwell_df["Type"][i] = "Open Cluster"
        
caldwell_df.head()

Unnamed: 0,params,ngcic,common,Type,dist,Constellation,Magnitude
0,C1,NGC 188,-,Open Cluster,4.8,Cepheus,8.1
1,C2,NGC 40,Bow-Tie Nebula,Planetary Nebula,3.5,Cepheus,11.0
2,C3,NGC 4236,-,Galaxy,7000.0,Draco,9.7
3,C4,NGC 7023,Iris Nebula,Open Cluster,1.4,Cepheus,7.0
4,C5,IC 342,Hidden Galaxy,Galaxy,10000.0,Camelopardalis,9.0


In [147]:
html = urlopen('https://en.wikipedia.org/wiki/Caldwell_catalogue')
bs = BeautifulSoup(html, 'html.parser')
images = bs.find_all('img', {'src':re.compile(r'(jpg|png|jpeg)')} )

all_images = []

for image in images: 
    bigger_image = image['src'].replace("60px","500px").replace("//","")
    all_images.append("https://" + bigger_image)

all_images.pop(0)
all_images.pop(0)
all_images.pop(0)
all_images.pop(0)
all_images.pop(0)
all_images.pop(0)
all_images.pop(0)

all_images.pop()
all_images.pop()
all_images.pop()
all_images.pop()
all_images.pop()
all_images.pop()

all_images.insert(61,"https://upload.wikimedia.org/wikipedia/commons/thumb/a/a8/Wide_Field_Imager_view_of_the_spiral_galaxy_NGC_247.jpg/500px-Wide_Field_Imager_view_of_the_spiral_galaxy_NGC_247.jpg")

In [66]:
image_df = pd.DataFrame(all_images, columns =['image'])
image_df.head()

Unnamed: 0,image
0,https://upload.wikimedia.org/wikipedia/commons...
1,https://upload.wikimedia.org/wikipedia/commons...
2,https://upload.wikimedia.org/wikipedia/commons...
3,https://upload.wikimedia.org/wikipedia/commons...
4,https://upload.wikimedia.org/wikipedia/commons...


In [121]:
caldwell_coords = pd.read_csv('raw_data/caldwell_coords_clean.csv') 
caldwell_coords.head()

Unnamed: 0,C,NGC,Type,Mag,Size,Distance (ly),RA,Dec,Con,Viewing Season
0,1,188,Oc,8.1,14,4800,00 44.4,+85 20,Cep,autumn
1,2,40,Pl,11.6,0.6,3500,00 13.0,+72 32,Cep,autumn
2,3,4236,Sb,9.7,21x7,7 million,12 16.7,+69 28,Dra,spring
3,4,7023,Bn,6.8,18x18,1400,21 01.8,+68 12,Cep,autumn
4,5,IC 342,Sp,9.2,18x17,13 million,03 46.8,+68 06,Cam,winter


In [118]:
example_str = caldwell_coords["Dec"][0]
example_str[4:6]

'20'

In [120]:
caldwell_dec = []

for x in caldwell_coords["Dec"]:
    degree = x[0:3]+"d"
    arcmin = x[4:6]+"m"
    arcsec = "00s"
    caldwell_dec.append(degree+arcmin+arcsec)

In [112]:
caldwell_ra = []

for x in caldwell_coords["RA"]:
    hours = x[0:2]+"h"
    mins = x[3:5]+"m"
    seconds = str(float("0" + x[5:7])*60)+"s"
    caldwell_ra.append(hours+mins+seconds)

# caldwell_ra

In [157]:
ra_df = pd.DataFrame(caldwell_ra, columns =['ra'])
dec_df = pd.DataFrame(caldwell_dec, columns =['dec'])
caldwell_coords_processed = pd.concat([ra_df.reset_index(drop=True),dec_df.reset_index(drop=True)],axis=1)
caldwell_coords_processed.head()

Unnamed: 0,ra,dec
0,00h44m24.0s,+85d20m00s
1,00h13m0.0s,+72d32m00s
2,12h16m42.0s,+69d28m00s
3,21h01m48.0s,+68d12m00s
4,03h46m48.0s,+68d06m00s


Add name

In [207]:
name_list = ["Caldwell " + str(i+1) for i in range(109)]
name_df = pd.DataFrame(name_list, columns =['name'])
# name_df

In [208]:
caldwell_with_coords = pd.concat([caldwell_df.reset_index(drop=True),caldwell_coords_processed.reset_index(drop=True)],axis=1)
caldwell_with_names = pd.concat([caldwell_with_coords.reset_index(drop=True),name_df.reset_index(drop=True)],axis=1)
complete_caldwell = pd.concat([caldwell_with_names.reset_index(drop=True),image_df.reset_index(drop=True)],axis=1)

complete_caldwell = complete_caldwell[["params", "name", "ngcic", "common", "Type", "ra", "dec", "Magnitude", "dist", "Constellation", "image"]]
complete_caldwell.loc[complete_caldwell.shape[0]] = ['sh2248', 'Sharpless 248', 'IC 443', 'Jellyfish Nebula', 'Supernova remnant', '06h17m13s', '+22d31m05s', 12, 5, 'Gemini', 'https://upload.wikimedia.org/wikipedia/commons/7/78/IC443.jpeg']

In [209]:
complete_caldwell.head()

Unnamed: 0,params,name,ngcic,common,Type,ra,dec,Magnitude,dist,Constellation,image
0,C1,Caldwell 1,NGC 188,-,Open Cluster,00h44m24.0s,+85d20m00s,8.1,4.8,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
1,C2,Caldwell 2,NGC 40,Bow-Tie Nebula,Planetary Nebula,00h13m0.0s,+72d32m00s,11.0,3.5,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
2,C3,Caldwell 3,NGC 4236,-,Galaxy,12h16m42.0s,+69d28m00s,9.7,7000.0,Draco,https://upload.wikimedia.org/wikipedia/commons...
3,C4,Caldwell 4,NGC 7023,Iris Nebula,Open Cluster,21h01m48.0s,+68d12m00s,7.0,1.4,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
4,C5,Caldwell 5,IC 342,Hidden Galaxy,Galaxy,03h46m48.0s,+68d06m00s,9.0,10000.0,Camelopardalis,https://upload.wikimedia.org/wikipedia/commons...


Final process

In [210]:
for i in range(len(complete_caldwell.params)):
    complete_caldwell.params[i] = complete_caldwell.params[i].lower()
    
for i in range(len(complete_caldwell.Type)):
    complete_caldwell.Type[i] = complete_caldwell.Type[i].capitalize()
    
for i in range(len(complete_caldwell.common)):
    complete_caldwell.common[i] = complete_caldwell.common[i].replace(",", " ")
    
for i in range(len(complete_caldwell.dist)):
    complete_caldwell.dist[i] = complete_caldwell.dist[i].replace(",", "")

AttributeError: 'int' object has no attribute 'replace'

In [211]:
complete_caldwell

Unnamed: 0,params,name,ngcic,common,Type,ra,dec,Magnitude,dist,Constellation,image
0,c1,Caldwell 1,NGC 188,-,Open cluster,00h44m24.0s,+85d20m00s,8.1,4.8,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
1,c2,Caldwell 2,NGC 40,Bow-Tie Nebula,Planetary nebula,00h13m0.0s,+72d32m00s,11,3.5,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
2,c3,Caldwell 3,NGC 4236,-,Galaxy,12h16m42.0s,+69d28m00s,9.7,7000,Draco,https://upload.wikimedia.org/wikipedia/commons...
3,c4,Caldwell 4,NGC 7023,Iris Nebula,Open cluster,21h01m48.0s,+68d12m00s,7,1.4,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
4,c5,Caldwell 5,IC 342,Hidden Galaxy,Galaxy,03h46m48.0s,+68d06m00s,9,10000,Camelopardalis,https://upload.wikimedia.org/wikipedia/commons...
5,c6,Caldwell 6,NGC 6543,Cat's Eye Nebula,Planetary nebula,17h58m36.0s,+66d38m00s,9,3,Draco,https://upload.wikimedia.org/wikipedia/commons...
6,c7,Caldwell 7,NGC 2403,-,Galaxy,07h36m54.0s,+65d36m00s,8.4,14000,Camelopardalis,https://upload.wikimedia.org/wikipedia/commons...
7,c8,Caldwell 8,NGC 559,-,Open cluster,01h29m30.0s,+63d18m00s,9.5,3.7,Cassiopeia,https://upload.wikimedia.org/wikipedia/commons...
8,c9,Caldwell 9,Sh2-155,Cave Nebula,Emission nebula,22h56m48.0s,+62d37m00s,7.7,2.8,Cepheus,https://upload.wikimedia.org/wikipedia/commons...
9,c10,Caldwell 10,NGC 663,-,Open cluster,01h46m0.0s,+61d15m00s,7.1,7.2,Cassiopeia,https://upload.wikimedia.org/wikipedia/commons...


Load time!

In [212]:
complete_caldwell.to_csv('complete_caldwell.csv', index=False, header=False)