In [None]:
from google.colab import drive
import os
import shutil

# Remove existing content in the mountpoint if it exists
if os.path.exists('/content/drive'):
    shutil.rmtree('/content/drive')
    print("Removed existing /content/drive directory.")

drive.mount('/content/drive', force_remount=True)

## Bibliotecas

In [None]:
!pip install astropy;
!pip install healpy;
!pip install numpy;
!pip install mathplotlib-venn;
!pip install pandas;
!pip install dask[complete];
!pip install SciencePlots;

In [None]:
from astropy.cosmology import Planck15 as cosmo
import matplotlib.pyplot as plt
import numpy as np
import healpy as hp
import astropy.units as u
import astropy.cosmology.units as cu
import os
import dask.dataframe as dd
import pandas as pd
z = 1100 * cu.redshift
from astropy.coordinates import SkyCoord
from astropy.cosmology import FlatLambdaCDM
from astropy.table import Table

# Processamento de gráficos

Processamento de dados em fits, formação de gráficos para parâmetros MFMTK's em eixos de M* vs fil_dist e M* vs redshift (nos limites impostos).

In [None]:
from astropy.table import Table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

#Criar tabelas
tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

# Colunas
col_logMstars = "logmstars"
col_fildist = "fil_dist"
col_redshift = "redshift_1"
# Parametros
parametros_restantes = ["a1", "a3", "c1", "c2", "s1", "s3", "h", "nfit2d", "redshift_1", "l1.0", "l0.5", "ng.5", "ng1", "len", "g", "m20", "sigma_psi", "logsfr"]

# Limites dos setores
lim_logmstars = (9.5, 12.5, 1.0)
lim_fildist = (0.0, 10.0, 1.0)
lim_redshift = (0.01, 0.05, 0.01)

# Criar bins
bins_logMstars = np.arange(lim_logmstars[0], lim_logmstars[1] + lim_logmstars[2], lim_logmstars[2])
bins_fildist = np.arange(lim_fildist[0], lim_fildist[1] + lim_fildist[2], lim_fildist[2])
bins_redshift = np.arange(lim_redshift[0], lim_redshift[1] + lim_redshift[2], lim_redshift[2])

#Criar colunas de setores
df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
df["setor_fildist"] = pd.cut(df[col_fildist], bins=bins_fildist, include_lowest=True)
df["setor_redshift"] = pd.cut(df[col_redshift], bins=bins_redshift, include_lowest=True)

#Erro fixo
erro_constante = 0.001

#Pastas de saída
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
os.makedirs(base_dir, exist_ok=True)

#Combinando Setores
sectors = df.groupby(["setor_logMstars", "setor_fildist", "setor_redshift"], observed=False).groups.keys()

#Setores
logmstars_sectors = sorted(df["setor_logMstars"].dropna().unique())
fildist_sectors = sorted(df["setor_fildist"].dropna().unique())


#Cores dos setores (optei deixar com cor sólida devido a densidade deixar os setores misturados, poderia fazer um eixo de densidade separado em 3 partes quais sejam compátiveis aos 3 setores, porém não o fiz ainda).
color_map = {}

default_cmap = plt.colormaps.get_cmap('tab10')
num_logmstars_sectors = len(logmstars_sectors)

for i, sector in enumerate(logmstars_sectors):
    sector_str = str(sector)
    if "(10.5, 11.5]" in sector_str:
        color_map[sector_str] = 'green'
    elif "(11.5, 12.5]" in sector_str:
        color_map[sector_str] = 'red'
    else:
        color_map[sector_str] = default_cmap(i / (num_logmstars_sectors - 1) if num_logmstars_sectors > 1 else 0)

#Análise e plotagem
for param in parametros_restantes:
    print(f"Geração de: {param}");

    num_rows = len(logmstars_sectors)
    num_cols = len(fildist_sectors)

    if num_rows == 0 or num_cols == 0:
        print(f"Pulo de '{param}' indefinido.")
        continue

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(num_cols * 5, num_rows * 5), squeeze=False)
    fig.suptitle(f"{param} vs fil_dist por setor de logMstars e fildist", fontsize=16)

    for i, logmstars_sector in enumerate(logmstars_sectors):
        for j, fildist_sector in enumerate(fildist_sectors):
            ax = axes[i, j]

            # Filtro para a combinação atual de setores
            df_sector = df[
                (df["setor_logMstars"] == logmstars_sector) &
                (df["setor_fildist"] == fildist_sector)
            ].dropna(subset=[param])

            # Se vazio, pular este subplot
            if df_sector.empty:
                ax.set_title(f"logMstars: {logmstars_sector}\nfildist: {fildist_sector}\n(N=0)")
                ax.set_xlabel("fil_dist")
                ax.set_ylabel(param)
                ax.grid(alpha=0.3)
                ax.minorticks_on()
                continue

            # Obter a cor do setor logMstars atual
            color = color_map.get(str(logmstars_sector), 'gray')

            # Traçar os dados
            ax.scatter(df_sector["fil_dist"], df_sector[param], color=color, s=10)

            # Traçar os dados principais
            ax.scatter(df_sector["fil_dist"], df_sector[param], color=color, s=10)

            # Título do subplot
            ax.set_title(f"logMstars: {logmstars_sector}\nfildist: {fildist_sector}\n(N={len(df_sector)})")
            ax.set_xlabel("fil_dist")
            ax.set_ylabel(param)
            ax.grid(alpha=0.3)
            ax.minorticks_on()

            #Log
            if (df_sector[param] > 0).any():
                ax.set_xscale('log'), ax.set_yscale('log')

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])

    # Gerar diretório para os plots combinados
    combined_plots_dir = os.path.join(base_dir, "combined_plots_subplots")
    os.makedirs(combined_plots_dir, exist_ok=True)

    # Salvar
    plot_filename = os.path.join(combined_plots_dir, f"{param}_combinado.png")
    plt.savefig(plot_filename, dpi=300)
    plt.close(fig)

    #Limpar memória
    del fig
    del axes


## Unir os gráficos de diferentes valores logMstars e fildist

In [None]:
#Fazer gráfico de densidade.
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"

subdirectories = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

if subdirectories:
    example_subdir = subdirectories[0]
    example_subdir_path = os.path.join(base_dir, example_subdir)
    files_in_subdir = os.listdir(example_subdir_path)
    print(f"\nFiles in '{example_subdir}':")
    print(files_in_subdir)
else:
    print("Não encontrado.")

subdirectories = [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]


In [None]:
from astropy.table import Table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

#Leitura da tabela
tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

#Colunas e parâmetros
col_logMstars = "logmstars"
col_fildist = "fil_dist"
col_ng_5 = "ng.5"
col_ng1 = "ng1"
col_len = "len"
col_redshift = "redshift_1"

#Definir bins para setores
lim_logmstars = (9.5, 12.5, 1.0)
lim_fildist = (0.0, 10.0, 1.0)

bins_logMstars = np.arange(lim_logmstars[0], lim_logmstars[1] + lim_logmstars[2], lim_logmstars[2])
bins_fildist = np.arange(lim_fildist[0], lim_fildist[1] + lim_fildist[2], lim_fildist[2])

df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
df["setor_fildist"] = pd.cut(df[col_fildist], bins=bins_fildist, include_lowest=True)

#Densidde
r_fil = 0.5  # h^-1 Mpc
r_parede = 1.0
df["rho_fil"] = df[col_ng_5] / (np.pi * (r_fil**2) * df[col_len])
df["rho_wall"] = (df[col_ng1] - df[col_ng_5]) / (np.pi * ((r_parede**2 - r_fil**2)) * df[col_len])

parametros_restantes = ["a1", "a3", "c1", "c2", "s1", "s3", "h", "nfit2d", "redshift_1", "l1.0", "l0.5", "ng.5", "ng1", "len", "g", "m20", "sigma_psi", "logsfr",]


#Cores consistentes por setor de logMstars
logmstars_sectors = sorted(df["setor_logMstars"].dropna().unique())
color_map = {}
default_cmap = plt.colormaps.get_cmap('tab10')
for i, sector in enumerate(logmstars_sectors):
    sector_str = str(sector)
    if "(10.5, 11.5]" in sector_str:
        color_map[sector_str] = 'green'
    elif "(11.5, 12.5]" in sector_str:
        color_map[sector_str] = 'red'
    else:
        color_map[sector_str] = default_cmap(i / (len(logmstars_sectors)-1) if len(logmstars_sectors) > 1 else 0)

#Pasta de saída
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
output_dir = os.path.join(base_dir, "combined_plots_log")
os.makedirs(output_dir, exist_ok=True)

#Gerar gráficos logarítmicos para cada parâmetro
for param in parametros_restantes:
    print(f"Gerando gráfico logarítmico para: {param}");

    fig, ax1 = plt.subplots(figsize=(12, 8))

    ax1.set_title(f"{param} vs fil_dist por setor de logMstars (log y-escala)")
    ax1.set_xlabel("fil_dist")
    ax1.set_ylabel(param)
    ax1.grid(alpha=0.3)
    ax1.minorticks_on()


    # Plotar cada setor de logMstars
    for logmstars_sector in logmstars_sectors:
        df_logm = df[df["setor_logMstars"] == logmstars_sector]
        color = color_map[str(logmstars_sector)]

        for fildist_sector in sorted(df_logm["setor_fildist"].dropna().unique(), key=lambda x: x.left):
            df_sector = df_logm[df_logm["setor_fildist"] == fildist_sector].dropna(subset=[param, col_redshift]) # Include redshift in dropna subset
            if df_sector.empty:
                continue
            ax1.scatter(df_sector["fil_dist"], df_sector[param], color=color, s=10, label=f"logMstars: {logmstars_sector}, fildist: {fildist_sector} (N={len(df_sector)})")


    # Escala logarítmica no eixo Y principal se aplicável
    if (df[param] > 0).any():
         ax1.set_yscale("log")
    else:
         print(f"Skipping log scale for {param} on primary y-axis as it contains non-positive values.")

    # Legenda organizada
    lines1, labels1 = ax1.get_legend_handles_labels()
    ax1.legend(lines1, labels1, title="Setores", bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)


    plt.tight_layout(rect=[0, 0, 0.75, 1])

    # Salvar
    filename = os.path.join(output_dir, f"{param}_log.png")
    plt.savefig(filename, dpi=300)
    plt.close(fig)

##Massa estelar vs redshift (memso processo acima, não quis misturar)

In [None]:
from astropy.table import Table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

#Criar tabelas
tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

# Colunas
col_logMstars = "logmstars"
#col_fildist = "fil_dist"
col_redshift = "redshift_1"
# Parametros
parametros_restantes = ["a1", "a3", "c1", "c2", "s1", "s3", "h", "nfit2d", "fil_dist", "l1.0", "l0.5", "ng.5", "ng1", "len", "g", "m20", "sigma_psi", "logsfr"]

# Limites dos setores
lim_logmstars = (9.5, 12.5, 1.0)
#lim_fildist = (0.0, 10.0, 1.0)
lim_redshift = (0.01, 0.05, 0.02) # Increased step size

# Criar bins
bins_logMstars = np.arange(lim_logmstars[0], lim_logmstars[1] + lim_logmstars[2], lim_logmstars[2])
#bins_fildist = np.arange(lim_fildist[0], lim_fildist[1] + lim_fildist[2], lim_fildist[2])
bins_redshift = np.arange(lim_redshift[0], lim_redshift[1] + lim_redshift[2], lim_redshift[2])

#Criar colunas de setores
df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
#df["setor_fildist"] = pd.cut(df[col_fildist], bins=bins_fildist, include_lowest=True)
df["setor_redshift"] = pd.cut(df[col_redshift], bins=bins_redshift, include_lowest=True)

#Erro fixo
erro_constante = 0.001

#Pastas de saída
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
os.makedirs(base_dir, exist_ok=True)

#Combinando Setores
sectors = df.groupby(["setor_logMstars", "setor_redshift"], observed=False).groups.keys()

#Setores
logmstars_sectors = sorted(df["setor_logMstars"].dropna().unique())
fildist_sectors = sorted(df["setor_redshift"].dropna().unique())


#Cores dos setores
color_map = {}

default_cmap = plt.colormaps.get_cmap('tab10')
num_logmstars_sectors = len(logmstars_sectors)

for i, sector in enumerate(logmstars_sectors):
    sector_str = str(sector)
    if "(10.5, 11.5]" in sector_str:
        color_map[sector_str] = 'green'
    elif "(11.5, 12.5]" in sector_str:
        color_map[sector_str] = 'red'
    else:
        color_map[sector_str] = default_cmap(i / (num_logmstars_sectors - 1) if num_logmstars_sectors > 1 else 0)

valid_parametros = [param for param in parametros_restantes if np.isfinite(df[param]).all()]

#Análise e plotagem
for param in valid_parametros:
    print(f"Geração de gráfico: {param}");

    num_rows = len(logmstars_sectors)
    num_cols = len(fildist_sectors)

    if num_rows == 0 or num_cols == 0:
        print(f"'{param}' não definido.")
        continue

    fig, axes = plt.subplots(num_rows, num_cols, figsize=(num_cols * 5, num_rows * 5), squeeze=False)
    fig.suptitle(f"{param} vs redshift", fontsize=16)

    for i, logmstars_sector in enumerate(logmstars_sectors):
        for j, fildist_sector in enumerate(fildist_sectors):
            ax = axes[i, j]

            # Filtro para a combinação atual de setores
            df_sector = df[
                (df["setor_logMstars"] == logmstars_sector) &
                (df["setor_redshift"] == fildist_sector)
            ].dropna(subset=[param])

            # Se vazio, pular este subplot
            if df_sector.empty:
                ax.set_title(f"logMstars: {logmstars_sector}\nredshift: {fildist_sector}\n(N=0)")
                ax.set_xlabel("redshift_1")
                ax.set_ylabel(param)
                ax.grid(alpha=0.3)
                ax.minorticks_on()
                continue

            # Obter a cor do setor logMstars atual
            color = color_map.get(str(logmstars_sector), 'gray')

            # Traçar os dados
            ax.scatter(df_sector["redshift_1"], df_sector[param], color=color, s=10)

            # Traçar os dados principais
            ax.scatter(df_sector["redshift_1"], df_sector[param], color=color, s=10)

            # Título do subplot
            ax.set_title(f"logMstars: {logmstars_sector}\nredshift: {fildist_sector}\n(N={len(df_sector)})")
            ax.set_xlabel("redshift_1")
            ax.set_ylabel(param)
            ax.grid(alpha=0.3)
            ax.minorticks_on()

            #Log
            if (df_sector["redshift_1"] > 0).all():
                 ax.set_xscale('log')
            if (df_sector[param] > 0).all():
                ax.set_yscale('log')


    plt.tight_layout(rect=[0, 0.03, 1, 0.95])

    # Gerar diretório para os plots combinados
    combined_plots_dir = os.path.join(base_dir, "combined_plots_subplots")
    os.makedirs(combined_plots_dir, exist_ok=True)

    # Salvar
    plot_filename = os.path.join(combined_plots_dir, f"{param}_combinado_redshift.png")
    plt.savefig(plot_filename, dpi=300)
    plt.close(fig)

    #Limpar memória
    del fig
    del axes

In [None]:
from astropy.table import Table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

#Leitura da tabela
tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

#Colunas e parâmetros
col_logMstars = "logmstars"
col_redshift = "redshift_1"

#Definir bins para setores
lim_logmstars = (9.5, 12.5, 1.0)
lim_redshift = (0.01, 0.05, 0.02)

bins_logMstars = np.arange(lim_logmstars[0], lim_logmstars[1] + lim_logmstars[2], lim_logmstars[2])
bins_redshift = np.arange(lim_redshift[0], lim_redshift[1] + lim_redshift[2], lim_redshift[2])

df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
df["setor_redshift"] = pd.cut(df[col_redshift], bins=bins_redshift, include_lowest=True)

parametros_restantes = ["a1", "a3", "c1", "c2", "s1", "s3", "h", "nfit2d", "fil_dist", "l1.0", "l0.5", "ng.5", "ng1", "len", "g", "m20", "sigma_psi", "logsfr",]


#Cores consistentes por setor de logMstars
logmstars_sectors = sorted(df["setor_logMstars"].dropna().unique())
color_map = {}
default_cmap = plt.colormaps.get_cmap('tab10')
for i, sector in enumerate(logmstars_sectors):
    sector_str = str(sector)
    if "(10.5, 11.5]" in sector_str:
        color_map[sector_str] = 'green'
    elif "(11.5, 12.5]" in sector_str:
        color_map[sector_str] = 'red'
    else:
        color_map[sector_str] = default_cmap(i / (len(logmstars_sectors)-1) if len(logmstars_sectors) > 1 else 0)

valid_parametros = [param for param in parametros_restantes if np.isfinite(df[param]).all()]

#Pasta de saída
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
output_dir = os.path.join(base_dir, "combined_plots_log")
os.makedirs(output_dir, exist_ok=True)

#Gerar gráficos logarítmicos para cada parâmetro
for param in valid_parametros:
    print(f"Gerando gráfico logarítmico para: {param}");

    fig, ax1 = plt.subplots(figsize=(12, 8))

    ax1.set_title(f"{param} vs redshift por setor de logMstars (log y-scale)")
    ax1.set_xlabel("redshift_1")
    ax1.set_ylabel(param)
    ax1.grid(alpha=0.3)
    ax1.minorticks_on()


    # Plotar cada setor de logMstars
    for logmstars_sector in logmstars_sectors:
        df_logm = df[df["setor_logMstars"] == logmstars_sector]
        color = color_map[str(logmstars_sector)]

        for redshift_sector in sorted(df_logm["setor_redshift"].dropna().unique(), key=lambda x: x.left):
            df_sector = df_logm[df_logm["setor_redshift"] == redshift_sector].dropna(subset=[param, col_redshift]) # Include redshift in dropna subset
            if df_sector.empty:
                continue
            # Plot no eixo y
            ax1.scatter(df_sector["redshift_1"], df_sector[param], color=color, s=10, label=f"logMstars: {logmstars_sector}, redshift: {redshift_sector} (N={len(df_sector)})")


    # Escala logarítmica no eixo Y principal se aplicável
    if (df_sector["redshift_1"] > 0).all() and (df_sector[param] > 0).all():
        ax1.set_xscale('log')
        ax1.set_yscale('log')
    else:
        print(f"Skipping log scale for {param} on primary y-axis as it contains non-positive values.")


    # Legenda organizada
    lines1, labels1 = ax1.get_legend_handles_labels()
    ax1.legend(lines1, labels1, title="Setores", bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)


    plt.tight_layout(rect=[0, 0, 0.75, 1])

    # Salvar
    filename = os.path.join(output_dir, f"{param}_log.png")
    plt.savefig(filename, dpi=300)
    plt.close(fig)

# Comparação cor-fil_dist

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from astropy.table import Table

tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

df.dropna(subset=['mag_u_1', 'mag_g_1', 'mag_r_1'], inplace=True)

# Diretório
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
cor_fildist_dir = os.path.join(base_dir, "cor-cor-fildist")
os.makedirs(cor_fildist_dir, exist_ok=True)

# Acesso aos dados
if 'mag_u_1' in df.columns and 'mag_g_1' in df.columns and 'mag_r_1' in df.columns:
    df['u_g'] = df.loc[:, 'mag_u_1'] - df.loc[:, 'mag_g_1']
    df['g_r'] = df.loc[:, 'mag_g_1'] - df.loc[:, 'mag_r_1']

    M_sun = 4.64
    #Magnitude absoluta via lumr
    if 'lumr' in df.columns:
        df['M_r'] = -2.5 * np.log10(df['lumr']) + M_sun
        color_data = df['M_r']
        c_label = 'M_r (from lumr)'
    elif 'm-r' in df.columns:
      color_data = df['m-r']
      c_label = 'M_r (from m-r)'
    elif 'mag_r_1' in df.columns:
      color_data = df['mag_r_1']
      c_label = 'r-band magnitude'
    else:
      print("Neither 'lumr', 'm-r', nor 'mag_r_1' columns found to determine color data.");
      color_data = None

    #Filtro
    if color_data is not None:
      dfp = df[
          (df['u_g'] > -0.5) & (df['u_g'] < 3.0) &
          (df['g_r'] > -0.2) & (df['g_r'] < 2.0)
      ].copy()

      #Classificar azul/vermelha (ajuste o limite se quiser)
      limite = 0.6
      df['tipo'] = ['Azul' if gr < limite else 'Vermelha' for gr in df['g_r']]

      #Define azul and vermelha DataFrames here so they are available for both plots
      azul = df[df['tipo'] == 'Azul']
      vermelha = df[df['tipo'] == 'Vermelha']

      #Histograma da distribuição de fildist por tipo de galáxia em um único plot
      plt.figure(figsize=(12, 6))

      plt.hist(azul['fil_dist'], bins=30, alpha=0.6, label=f'Galáxias Azuis (N={len(azul)})', color='blue')
      plt.hist(vermelha['fil_dist'], bins=30, alpha=0.6, label=f'Galáxias Vermelhas (N={len(vermelha)})', color='red')

      plt.xlabel('fil_dist')
      plt.ylabel('Número de galáxias')
      plt.title('Distribuição de fildist — Galáxias Azuis e Vermelhas')
      plt.legend()
      plt.grid(alpha=0.3)
      plt.minorticks_on()


      plt.tight_layout()
      plot_filename = os.path.join(cor_fildist_dir, f"cor-cor-fildist_combined_histogram.png")
      plt.savefig(plot_filename, dpi=300)
      plt.show()
    else:
      print("Could not generate plot due to missing color data.");

else:
    print("Magnitude columns (mag_u_1, mag_g_1, mag_r_1) not found in the DataFrame after dropping NaNs. Cannot create color or tipo columns.");

# Comparando a classificação por setores com a cor

In [None]:
df.dropna(subset=['mag_u_1', 'mag_g_1', 'mag_r_1'], inplace=True)
if 'mag_u_1' in df.columns and 'mag_g_1' in df.columns and 'mag_r_1' in df.columns:
    df['u_g'] = df['mag_u_1'] - df['mag_g_1']
    df['g_r'] = df['mag_g_1'] - df['mag_r_1']
    limite = 0.6
    df['tipo'] = ['Azul' if gr < limite else 'Vermelha' for gr in df['g_r']]
else:
    print("Sem valores")

#Colunas
col_logMstars = "logmstars"
col_fildist = "fil_dist"
lim_logmstars = (9.5, 12.5, 1.0)
lim_fildist = (0.0, 10.0, 1.0)
bins_logMstars = np.arange(lim_logmstars[0], lim_logmstars[1] + lim_logmstars[2], lim_logmstars[2])
bins_fildist = np.arange(lim_fildist[0], lim_fildist[1] + lim_fildist[2], lim_fildist[2])
df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
df["setor_fildist"] = pd.cut(df[col_fildist], bins=bins_fildist, include_lowest=True)


#Diretório
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
cor_fildist_dir = os.path.join(base_dir, "cor-cor-fildist")
os.makedirs(cor_fildist_dir, exist_ok=True)

if 'tipo' in df.columns:
    df["setor_cor"] = df.apply(
        lambda row: f"logMstars: {row['setor_logMstars']}, fildist: {row['setor_fildist']}, tipo: {row['tipo']}",
        axis=1
    )

    sector_color_counts = df.groupby(["setor_logMstars", "setor_fildist", "tipo"], observed=False).size().unstack(fill_value=0)

    #Salvar
    df.to_csv('RGB-galáxias-setorM*fildist.csv', index=False)
    sector_color_counts.to_csv(os.path.join(cor_fildist_dir, 'galaxy_color_counts_per_sector.csv'))
else:
    print("salvo")

# Galáxia por cor - parâmetros - M* - fildist

In [None]:
tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_redshift_logMstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

#Magnitudes e bandas
df.dropna(subset=['mag_u', 'mag_g', 'mag_r'], inplace=True)
if 'mag_u' in df.columns and 'mag_g' in df.columns and 'mag_r' in df.columns:
    df['u_g'] = df['mag_u'] - df['mag_g']
    df['g_r'] = df['mag_g'] - df['mag_r']
    limite = 0.6
    df['tipo'] = ['Azul' if gr < limite else 'Vermelha' for gr in df['g_r']]
else:
    print("Magnitude columns (mag_u, mag_g, mag_r) not found in the DataFrame after dropping NaNs. Cannot create color or tipo columns.")


base_dir = '/content/drive/MyDrive/analises_galaxias_setore_novo'
cor_fildist_dir = os.path.join(base_dir, "RGB-fildist-M*")
col_fildist = 'fil_dist'
col_logMstars = 'logmstars'
lim_fildist = (0.0, 10.0, 0.5)
lim_logMstars = (9.5, 11.5, 0.5)

#Erro fixo
erro_constante = 0.001

#Criar colunas de setores
bins_logMstars = np.arange(lim_logMstars[0], lim_logMstars[1] + lim_logMstars[2], lim_logMstars[2])
bins_fildist = np.arange(lim_fildist[0], lim_fildist[1] + lim_fildist[2], lim_fildist[2])

df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
df["setor_fildist"] = pd.cut(df[col_fildist], bins=bins_fildist, include_lowest=True)

if 'tipo' in df.columns:
    df["setor_cor"] = df.apply(
        lambda row: f"logMstars: {row['setor_logMstars']}, fildist: {row['setor_fildist']}, tipo: {row['tipo']}",
        axis=1
    )


#Combinando Setores
sectors = df.groupby(["setor_logMstars", "setor_fildist"], observed=False).groups.keys()

#Setores
logmstars_sectors = df["setor_logMstars"].unique()

#Cores dos setores
color_map = {}

default_cmap = plt.colormaps.get_cmap('tab10')
num_logmstars_sectors = len(logmstars_sectors)

for i, sector in enumerate(logmstars_sectors):
    sector_str = str(sector)
    if "(10.5, 11.5]" in sector_str:
        color_map[sector_str] = 'red'
    elif "(11.5, 12.5]" in sector_str:
        color_map[sector_str] = 'yellow'
    else:

        color_map[sector_str] = default_cmap(i / (num_logmstars_sectors - 1) if num_logmstars_sectors > 1 else 0)

#Parâmetros
parametros_restantes = ["a1", "a3", "c1", "c2", "s1", "s3", "h", "nfit2d", "redshift", "l1.0", "l0.5", "ng.5", "ng1", "g", "m20", "sigma_psi", "logsfr"]

#Diretório
combined_plots_dir = os.path.join(base_dir, "RGB-fildist-M*_combined")
os.makedirs(combined_plots_dir, exist_ok=True)

#Plots
for param in parametros_restantes:
    print(f"Generating combined plot for parameter: {param}")
    plt.figure(figsize=(12, 8))
    plt.title(f"{param} vs fil_dist por setor de logMstars e tipo de galáxia")
    plt.xlabel("fil_dist")
    plt.ylabel(param)
    plt.grid(alpha=0.3)
    plt.minorticks_on()

    #logMstars e cor
    sorted_logmstars_sectors = sorted(logmstars_sectors, key=lambda x: x.left if pd.notna(x) else float('inf'))
    for logmstars_sector in sorted_logmstars_sectors:
        df_logmstars_sector = df[df["setor_logMstars"] == logmstars_sector].dropna(subset=[param])
        if df_logmstars_sector.empty:
            continue

        color = color_map.get(str(logmstars_sector), 'gray')

        #Azuis
        df_blue = df_logmstars_sector[df_logmstars_sector['tipo'] == 'Azul']
        plt.scatter(df_blue["fil_dist"], df_blue[param], color='blue', marker='o', s=10, label=f"logMstars: {logmstars_sector}, Azul (N={len(df_blue)})")

        #Vermelhas
        df_red = df_logmstars_sector[df_logmstars_sector['tipo'] == 'Vermelha']
        plt.scatter(df_red["fil_dist"], df_red[param], color='red', marker='x', s=10, label=f"logMstars: {logmstars_sector}, Vermelha (N={len(df_red)})")


    #Legenda
    plt.legend(title="Setores e Tipo", bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout(rect=[0, 0, 0.85, 1])
    #Salvar
    plot_filename = os.path.join(combined_plots_dir, f"{param}_fildist_logMstars.png")
    plt.savefig(plot_filename, dpi=300)
    plt.close()


#Comparando dados morfológicos com Morfométircos

In [None]:
from astropy.table import Table
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

#Colunas
col_morf = 'morf'
col_fildist = 'fil_dist'
col_len = 'len'
col_logMstars = 'logmstars'
col_cor = 'tipo'

#Limites
lim_fildist = (0.0, 10.0, 0.5)
lim_logMstars = (9.5, 11.5, 0.5)

if col_morf in df.columns:
    #Bins setores morfológicos (0, 1, 2)
    bins_morf = np.array([-0.5, 0.5, 1.7, 2.5])
    df["setor_morf"] = pd.cut(df[col_morf], bins=bins_morf, include_lowest=True, right=True, labels=[0, 1, 2])

else:
    print(f"Sem valores")
    bins_morf = None



bins_logMstars = np.arange(lim_logMstars[0], lim_logMstars[1] + lim_logMstars[2], lim_logMstars[2])
bins_fildist = np.arange(lim_fildist[0], lim_fildist[1] + lim_fildist[2], lim_fildist[2])

#Setores
df["setor_logMstars"] = pd.cut(df[col_logMstars], bins=bins_logMstars, include_lowest=True)
df["setor_fildist"] = pd.cut(df[col_fildist], bins=bins_fildist, include_lowest=True)
if bins_morf is not None:
    df["setor_morf"] = pd.cut(df[col_morf], bins=bins_morf, include_lowest=True, right=True, labels=[0, 1, 2])


#Parâmetros MFMTK's
parametros_to_plot = ["fil_dist", "logmstars", "s1", "s3", "h", "nfit2d", "redshift_1", "l1.0", "l0.5", "ng.5", "ng1", "len", "g", "m20", "sigma_psi", "logsfr"]

#Saída
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"
morf_plots_dir = os.path.join(base_dir, "morfologia_comparacao_histograms")
os.makedirs(morf_plots_dir, exist_ok=True)

if "setor_morf" in df.columns:
    morf_sectors = sorted(df["setor_morf"].dropna().unique())

    for param in parametros_to_plot:
        print(f"Historograma morfologia vs {param}")
        plt.figure(figsize=(10, 6))
        plt.title(f"Distribuição de {param} por setor morfológico")
        plt.xlabel(param)
        plt.ylabel("Frequência")
        plt.grid(alpha=0.3)
        plt.minorticks_on()

        #Histogramas
        for sector in morf_sectors:
            data_to_plot = df[df["setor_morf"] == sector].dropna(subset=[param])[param]
            if not data_to_plot.empty:
                plt.hist(data_to_plot, bins=30, alpha=0.5, label=f"Morfologia: {sector} (N={len(data_to_plot)})")

        plt.legend(title="Setores Morfológicos")
        plt.tight_layout()


        #Salvar
        plot_filename = os.path.join(morf_plots_dir, f"morfologia_vs_{param}_histogram.png")
        plt.savefig(plot_filename, dpi=300)
        plt.close()

#Cálculo de densidade ρ(r) dos filamentos

raio do filamento (Bisous) $r = 0.5 h^{-1} Mpc$, logo essa é a região interna do filamento modelado, para $d = 1.0 h^{-1} Mpc$ estamos na parede ou campo, assim, a densidade do filamento deve ser calculada para sua região interna, assim relacionarei o número de galáxias com o Len e $π r^{2}$, por ser um modelo cilindrico.

In [None]:

tabela = Table.read("/content/drive/MyDrive/FURG/Mestrado - Física/Pré-projeto - FURG/Amostragem e datasets/Database/MFMTK_SDSS_clusters_z_logmstars.fits", format="fits")
df = tabela.to_pandas()
df.columns = [c.lower() for c in df.columns]

#raio de bisous
r_fil = 0.5  # h^-1 Mpc
r_parede = 1.0

#Parâmetros
parametros_restantes = ["s1", "s3","redshift_1", "g", "m20", "sigma_psi"]

#Colunas para densidade de filamento, parede
col_ng_5 = "ng.5"
col_ng1 = "ng1"
col_len = "len"

# Limites
limit_filamento = (0.0, 0.5)
limit_parede = (0.5, 1.0)
limit_campo = (1.0, 10)

#Cálculo de densidade para filamento, parede e campo
df["rho_fil"] = df[col_ng_5] / (np.pi * (r_fil**2) * df[col_len]) #função para calcular a densidade de filamento
df["rho_wall"] = (df[col_ng1] - df[col_ng_5]) / (np.pi * ((r_parede**2 - r_fil**2)) * df[col_len]) #função para calcular a densidade de parede
rho_med = df['ng1'].sum() / (np.pi * (r_parede**2) * df['len'].sum()) #Média de densidade
df['rho_field'] = rho_med - (df['rho_fil'] + df['rho_wall']) #função para calcular a densidade do campo


#Médias
mean_rho_fil = df["rho_fil"].mean()
mean_rho_wall = df["rho_wall"].mean()
mean_rho_field = df["rho_field"].mean()


#Número de galáxias por setor
galaxias_na_filamento = df[(df['fil_dist'] > limit_filamento[0]) & (df['fil_dist'] <= limit_filamento[1])].shape[0]
galaxias_na_parede = df[(df['fil_dist'] > limit_parede[0]) & (df['fil_dist'] <= limit_parede[1])].shape[0]
galaxias_no_campo = df[(df['fil_dist'] > limit_campo[0]) & (df['fil_dist'] <= limit_campo[1])].shape[0]

#Número de galáxias total
total_galaxias = len(df)

print(f"Média rho_fil: {mean_rho_fil}")
print(f"Média rho_wall: {mean_rho_wall}")
print(f"Média rho_field: {mean_rho_field}")
print(f"Número de galáxias no {limit_filamento} h^-1 Mpc (filamento): {galaxias_na_filamento}")
print(f"Número de galáxias na {limit_parede} h^-1 Mpc (parede): {galaxias_na_parede}")
print(f"Número de galáxias no {limit_campo} h^-1 Mpc (campo): {galaxias_no_campo}")
print(f"Total de galáxias (amostra): {total_galaxias}")

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(df['fil_dist'], df['rho_fil'], label='rho_fil', s=10)
plt.scatter(df['fil_dist'], df['rho_wall'], label='rho_wall', s=10)
plt.scatter(df['fil_dist'], df['rho_field'], label='rho_field', s=10)

plt.xlabel('fil_dist')
plt.ylabel('Densidade')
plt.title('Densidade vs fil_dist')
plt.yscale('log')
plt.xscale('log')
plt.legend()
plt.grid(alpha=0.3)
plt.minorticks_on()
plt.show()

##Morfometria-densidade

In [None]:
import matplotlib.pyplot as plt
import os

#Diretório
base_dir = "/content/drive/MyDrive/analises_galaxias_setore_novo"

density_plots_dir = os.path.join(base_dir, "morfometria_densidade")
os.makedirs(density_plots_dir, exist_ok=True)

for param in parametros_restantes:
    print(f"Geração de gráfico de densidade: {param}")
    for density_col in density_columns:
        plt.figure(figsize=(10, 6))
        plt.scatter(df[density_col], df[param], s=5, alpha=0.5)

        plt.xlabel(density_col)
        plt.ylabel(param)
        plt.title(f"{param} vs {density_col}")
        #Log
        if (df[density_col] > 0).any():
            plt.xscale('log')
        if (df[param] > 0).any():
            plt.yscale('log')

        plt.grid(alpha=0.3)
        plt.minorticks_on()
        plt.tight_layout()

        #Salvar
        plot_filename = os.path.join(density_plots_dir, f"{param}_vs_{density_col}.png")
        plt.savefig(plot_filename, dpi=300)
        plt.close()


Galáxias azuius compõem 9225 do total da população, já as vermelhas são no total de 14068 galáxias, enquanto isso, para as morf. temos: 12192 - espirais, 3720 - elipticas e 7381 - indefinidas. Fazendo uma simples comparação, chegamos a estimativa de que 5505 serão azuis e 1876 vermelhas. %A confirmar.



##Densidade-largura do filamento