In [3]:
# Import the relevant modules:

# Import the modules for vector calculations
import pandas as pd
import polars as pL
import numpy as np
import scipy as sp
import math

# Import the modules for data preprocessing
import os
from astroquery.sdss import SDSS
from astroquery.vizier import Vizier
from astropy import coordinates as coords
import asyncio 
import aiohttp
from astropy.io import fits
from GalaxyHelperFunc import *
from tqdm import tqdm
import time

# Import the modules for data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Import the modules for Deep-Learning
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data_utils
import torch.distributions as distributions
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint

In [4]:
# Download the galaxy data from the SDSS database

astro = """
SELECT TOP 10000
    p.objid, p.ra, p.dec, p.u, p.g, p.r, p.i, p.z,
    p.run, p.rerun, p.camcol, p.field, p.field, p.specobjid, 
    s.z AS redshift, s.plate, s.mjd, s.fiberid
FROM PhotoObj AS p
JOIN SpecObj AS s 
    ON s.bestobjid = p.objid
WHERE
    p.type = 3
    AND s.class = 'GALAXY'
    AND p.clean = 1
"""

Galaxy_Dataset = SDSS.query_sql(astro, timeout = 600)

# Convert the data into a pandas dataframe
Galaxy_Dataframe = Galaxy_Dataset.to_pandas()

# Convert data into a polars dataframe
Galaxy_Polars = pL.DataFrame(Galaxy_Dataframe)

# Display the first 5 rows of the dataframe
Galaxy_Polars.head()

objid,ra,dec,u,g,r,i,z,run,rerun,camcol,field,field1,specobjid,redshift,plate,mjd,fiberid
u64,f64,f64,f64,f64,f64,f64,f64,i32,i32,i32,i32,i32,u64,f64,i32,i32,i32
1237648703509037704,196.428899,-0.630877,22.59034,22.28877,21.18809,20.0661,19.74049,752,301,2,353,353,4561064317748008960,0.6838333,4051,55337,159
1237648703509037971,196.389789,-0.694249,23.15419,22.82531,20.76846,19.84812,19.58682,752,301,2,353,353,4562238875456788480,0.4224373,4052,55600,336
1237648703509037997,196.417728,-0.667848,22.90089,22.56379,20.98486,19.68926,18.92513,752,301,2,353,353,4561063218236381184,0.6842009,4051,55337,155
1237648703509038045,196.479788,-0.718012,23.89636,22.43867,20.76097,19.67946,19.32264,752,301,2,353,353,4562228704789682176,0.5035014,4052,55589,299
1237648703509038048,196.482531,-0.745879,24.28602,23.22475,20.93204,19.74528,19.13798,752,301,2,353,353,4561052223120103424,0.6223917,4051,55337,115


In [38]:
spectra = SDSS.get_spectra(coordinates = coords.SkyCoord(ra = Galaxy_Dataframe.iloc[:1]['ra'], dec = Galaxy_Dataframe.iloc[:1]['dec'], unit = 'deg'))
spectra[0][1].data

FITS_rec([(-7.2427044 , 3.5523, 0.04732653, 0,         0, 1.4336151, 19.032124, -0.06347468),
          (-2.9550261 , 3.5524, 0.        , 0,         0, 1.4331353, 18.532917, -0.06267089),
          ( 1.3320469 , 3.5525, 0.04835141, 0,         0, 1.4328188, 17.931519, -0.06174889),
          ...,
          ( 1.692989  , 4.0118, 0.7944306 , 0,  76349440, 0.7782283, 21.122545,  1.4698546 ),
          ( 1.513173  , 4.0119, 0.8329747 , 0, 209715200, 0.7790329, 11.859117,  1.466913  ),
          (-0.19774014, 4.012 , 0.5363163 , 0, 201326592, 0.7790949, 11.937514,  1.4649478 )],
         dtype=(numpy.record, [('flux', '>f4'), ('loglam', '>f4'), ('ivar', '>f4'), ('and_mask', '>i4'), ('or_mask', '>i4'), ('wdisp', '>f4'), ('sky', '>f4'), ('model', '>f4')]))