In [5]:
import os
from astropy.io import fits
import pandas as pd

# Define paths
dataset_path = '/home/parlange/datasets/mini_sample/images'
metadata_path = '/home/parlange/datasets/mini_sample/'

# Function to analyze FITS files
def analyze_fits_files(directory):
    fits_data = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.fits'):
                file_path = os.path.join(root, file)
                try:
                    with fits.open(file_path) as hdul:
                        header = hdul[0].header
                        fits_data.append({
                            'File': file_path,
                            'Dimensions': hdul[0].data.shape if hdul[0].data is not None else None,
                            'Bitpix': header.get('BITPIX'),
                            'NAXIS': header.get('NAXIS'),
                            'Object': header.get('OBJECT', 'Unknown'),
                            'Instrument': header.get('INSTRUME', 'Unknown'),
                            'Date': header.get('DATE-OBS', 'Unknown')
                        })
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    return fits_data

# Analyze FITS files in both directories
images_fits_data = analyze_fits_files(dataset_path)
metadata_fits_data = analyze_fits_files(metadata_path)

# Combine results and create a DataFrame
fits_analysis_df = pd.DataFrame(images_fits_data)
metadata_analysis_df = pd.DataFrame(metadata_fits_data)

# Save the analysis results to a CSV file
output_csv_path = 'fits_files_analysis.csv'
fits_analysis_df.to_csv(output_csv_path, index=False)
metadata_analysis_df.to_csv(output_csv_path, index=False)

# Display the DataFrame
fits_analysis_df
metadata_analysis_df

Unnamed: 0,File,Dimensions,Bitpix,NAXIS,Object,Instrument,Date
0,/home/parlange/datasets/mini_sample/metadata_c...,,8,0,Unknown,Unknown,Unknown
1,/home/parlange/datasets/mini_sample/images/ima...,"(20000, 61, 61)",-64,3,Unknown,Unknown,Unknown
2,/home/parlange/datasets/mini_sample/images/ima...,"(20000, 61, 61)",-64,3,Unknown,Unknown,Unknown
3,/home/parlange/datasets/mini_sample/images/ima...,"(20000, 61, 61)",-64,3,Unknown,Unknown,Unknown
4,/home/parlange/datasets/mini_sample/images/ima...,"(20000, 61, 61)",-64,3,Unknown,Unknown,Unknown
5,/home/parlange/datasets/mini_sample/images/ima...,"(20000, 61, 61)",-64,3,Unknown,Unknown,Unknown


In [10]:
print("Images DataFrame:")
print(images_df['File'].head())

print("\nMetadata DataFrame:")
print(metadata_df['File'].head())


Images DataFrame:
0    image_catalog_y.fits
1    image_catalog_i.fits
2    image_catalog_z.fits
3    image_catalog_r.fits
4    image_catalog_g.fits
Name: File, dtype: object

Metadata DataFrame:
0    metadata_catalog.fits
1     image_catalog_y.fits
2     image_catalog_i.fits
3     image_catalog_z.fits
4     image_catalog_r.fits
Name: File, dtype: object


In [2]:
fits.info('/home/parlange/datasets/mini_sample/metadata_catalog.fits')

Filename: /home/parlange/datasets/mini_sample/metadata_catalog.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       4   ()      
  1                1 BinTableHDU     24   20000R x 8C   [D, D, D, D, D, D, D, D]   


In [3]:
hdul = fits.open('/home/parlange/datasets/mini_sample/metadata_catalog.fits')
print(hdul[1].columns)

ColDefs(
    name = 'label'; format = 'D'
    name = 'z_l'; format = 'D'
    name = 'z_s'; format = 'D'
    name = 'vel_disp'; format = 'D'
    name = 'logM'; format = 'D'
    name = 'theta_e'; format = 'D'
    name = 'source_mag'; format = 'D'
    name = 'lens_mag'; format = 'D'
)


In [7]:
fits.info('/home/parlange/datasets/mini_sample/images/image_catalog_g.fits')
fits.info('/home/parlange/datasets/mini_sample/images/image_catalog_r.fits')
fits.info('/home/parlange/datasets/mini_sample/images/image_catalog_i.fits')
fits.info('/home/parlange/datasets/mini_sample/images/image_catalog_z.fits')
fits.info('/home/parlange/datasets/mini_sample/images/image_catalog_y.fits')

Filename: /home/parlange/datasets/mini_sample/images/image_catalog_g.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       7   (61, 61, 20000)   float64   
Filename: /home/parlange/datasets/mini_sample/images/image_catalog_r.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       7   (61, 61, 20000)   float64   
Filename: /home/parlange/datasets/mini_sample/images/image_catalog_i.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       7   (61, 61, 20000)   float64   
Filename: /home/parlange/datasets/mini_sample/images/image_catalog_z.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU       7   (61, 61, 20000)   float64   
Filename: /home/parlange/datasets/mini_sample/images/image_catalog_y.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     

In [4]:
# Load the FITS file
fits_file = '/home/parlange/datasets/mini_sample/metadata_catalog.fits'
with fits.open(fits_file) as hdul:
    # Access the table data
    data = hdul[1].data

# Convert all columns to a pandas DataFrame
columns = data.columns.names  # Get all column names
data_dict = {col: data[col] for col in columns}  # Extract data for all columns
df = pd.DataFrame(data_dict)

# Save all parameters to a CSV file
output_csv = '/home/parlange/datasets/mini_sample/labels_parameters.csv'
df.to_csv(output_csv, index=False)

print(f"All parameters extracted and saved as '{output_csv}'")

All parameters extracted and saved as '/home/parlange/datasets/mini_sample/labels_parameters.csv'


In [18]:
import os
import numpy as np
from astropy.io import fits

# Define paths
images_dir = "/home/parlange/datasets/mini_sample/images"
metadata_file = "/home/parlange/datasets/mini_sample/metadata_catalog.fits"

# Bands and corresponding files
bands = ['g', 'r', 'i', 'z', 'y']
image_files = {
    'g': 'image_catalog_g.fits',
    'r': 'image_catalog_r.fits',
    'i': 'image_catalog_i.fits',
    'z': 'image_catalog_z.fits',
    'y': 'image_catalog_y.fits'
}

band_data = []
shapes = []

for b in bands:
    file_path = os.path.join(images_dir, image_files[b])
    with fits.open(file_path) as hdul:
        data = hdul[0].data
        print(f"Shape for {b} band: {data.shape}")
        band_data.append(data)
        shapes.append(data.shape)

# Check if all arrays have the same shape
unique_shapes = set(shapes)
if len(unique_shapes) != 1:
    raise ValueError(f"Inconsistent shapes among bands: {unique_shapes}")

# Since all have the same shape, we can stack them
X = np.stack(band_data, axis=1)
print("Final X shape:", X.shape)

# Load labels from the metadata file
with fits.open(metadata_file) as hdul:
    meta_data = hdul[1].data
    y = np.array(meta_data['label'])
    print("y shape:", y.shape)

Shape for g band: (20000, 61, 61)
Shape for r band: (20000, 61, 61)
Shape for i band: (20000, 61, 61)
Shape for z band: (20000, 61, 61)
Shape for y band: (20000, 61, 61)
Final X shape: (20000, 5, 61, 61)
y shape: (20000,)
