In [1]:
!pip install numpy pandas astropy torch torchvision matplotlib


Collecting torchvision
  Downloading torchvision-0.21.0-cp311-cp311-win_amd64.whl.metadata (6.3 kB)
Collecting torch
  Downloading torch-2.6.0-cp311-cp311-win_amd64.whl.metadata (28 kB)
Downloading torchvision-0.21.0-cp311-cp311-win_amd64.whl (1.6 MB)
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   ---------------------------------------- 1.6/1.6 MB 13.8 MB/s eta 0:00:00
Downloading torch-2.6.0-cp311-cp311-win_amd64.whl (204.2 MB)
   ---------------------------------------- 0.0/204.2 MB ? eta -:--:--
    --------------------------------------- 2.6/204.2 MB 15.1 MB/s eta 0:00:14
   - -------------------------------------- 5.5/204.2 MB 13.4 MB/s eta 0:00:15
   - -------------------------------------- 7.9/204.2 MB 12.5 MB/s eta 0:00:16
   -- ------------------------------------- 10.2/204.2 MB 12.5 MB/s eta 0:00:16
   -- ------------------------------------- 13.4/204.2 MB 12.7 MB/s eta 0:00:16
   --- ------------------------------------ 16.5/204.2 MB 13.2 MB/s eta 0


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import os
from glob import glob
from astropy.io import fits

class BLLacQSO_Dataset(Dataset):
    def __init__(self, bl_path, qso_path, max_len=1024):
        self.samples = []
        self.labels = []
        self.max_len = max_len

        # BL Lac loading
        for file in glob(os.path.join(bl_path, "*.txt")):
            self.samples.append((file, "txt"))
            self.labels.append(1)

        # QSO
        for file in glob(os.path.join(qso_path, "*.fits")):
            self.samples.append((file, "fits"))
            self.labels.append(0)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        file_path, file_type = self.samples[idx]
        label = self.labels[idx]

        try:
            if file_type == "txt":
                df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
                df = df.dropna()
                df = df.apply(pd.to_numeric, errors='coerce').dropna()
                flux = df.iloc[:, 1].values  # assuming flux is in the second column
            else:
                with fits.open(file_path) as hdul:
                    flux = np.array(hdul[1].data["flux"], dtype=np.float32)
        except Exception as e:
            print(f"Error reading file: {file_path} | {e}")
            flux = np.zeros(self.max_len)  # fallback dummy data

        # Normalize
        flux = (flux - np.mean(flux)) / (np.std(flux) + 1e-6)

        # Pad/truncate
        if len(flux) > self.max_len:
            flux = flux[:self.max_len]
        else:
            flux = np.pad(flux, (0, self.max_len - len(flux)), 'constant')

        return torch.tensor(flux, dtype=torch.float32).unsqueeze(0), torch.tensor(label)



In [6]:
import torch.nn as nn

class SpectrumCNN(nn.Module):
    def __init__(self):
        super(SpectrumCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, 7, padding=3)
        self.conv2 = nn.Conv1d(16, 32, 5, padding=2)
        self.pool = nn.MaxPool1d(2)
        self.dropout = nn.Dropout(0.25)
        self.fc1 = nn.Linear(32 * 256, 64)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))  # [B, 16, 512]
        x = self.pool(torch.relu(self.conv2(x)))  # [B, 32, 256]
        x = self.dropout(x.view(x.size(0), -1))   # Flatten
        x = torch.relu(self.fc1(x))
        return self.fc2(x)


In [None]:
#loading
bl_path = r"C:\Users\jimmy\OneDrive\Astron100\BLL"
qso_path = r"C:\Users\jimmy\OneDrive\Astron100\qso_spectra"

dataset = BLLacQSO_Dataset(bl_path, qso_path)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SpectrumCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training
#need to change epoch format
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()

    acc = correct / len(dataset)
    print(f"Epoch {epoch+1} | Loss: {total_loss:.4f} | Accuracy: {acc:.4f}")


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  df = pd.read_csv(file_path, delim_whitespace=Tru

Epoch 1 | Loss: 6.2039 | Accuracy: 0.7636


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, o

Epoch 2 | Loss: 1.9576 | Accuracy: 0.9500


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True,

Epoch 3 | Loss: 1.4307 | Accuracy: 0.9636


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True,

Epoch 4 | Loss: 0.6797 | Accuracy: 0.9818


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True,

Epoch 5 | Loss: 0.3092 | Accuracy: 0.9909


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = 

Epoch 6 | Loss: 0.1614 | Accuracy: 1.0000


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=Tru

Epoch 7 | Loss: 0.1381 | Accuracy: 1.0000


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=Tru

Epoch 8 | Loss: 0.1481 | Accuracy: 1.0000


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=Tru

Epoch 9 | Loss: 0.0692 | Accuracy: 1.0000


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True,

Epoch 10 | Loss: 0.0542 | Accuracy: 1.0000


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)
  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', header=None)


In [8]:
def predict_single(file_path, file_type, model, max_len=1024):
    if file_type == "txt":
        df = pd.read_csv(file_path, delim_whitespace=True, comment='#', names=["Wavelength", "Flux"])
        flux = df["Flux"].values
    else:
        with fits.open(file_path) as hdul:
            flux = np.array(hdul[1].data["flux"])

    flux = (flux - np.mean(flux)) / (np.std(flux) + 1e-6)
    if len(flux) > max_len:
        flux = flux[:max_len]
    else:
        flux = np.pad(flux, (0, max_len - len(flux)), 'constant')

    flux_tensor = torch.tensor(flux, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        pred = model(flux_tensor)
        predicted_class = torch.argmax(pred).item()
    return "BL Lac" if predicted_class == 1 else "QSO"


In [11]:
file = r"C:\Users\jimmy\OneDrive\Astron100\qso_bottom5\bottom_qso_004_z3.00.fits"
print(predict_single(file, "fits", model))

file2 = r"C:\Users\jimmy\Downloads\ZBLLAC_PKS_B1256-229_1_spectrum.txt"
print(predict_single(file2, "txt", model))


QSO
BL Lac


  df = pd.read_csv(file_path, delim_whitespace=True, comment='#', names=["Wavelength", "Flux"])
