In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

# Define the file name
file_name = "NED_Redshift_Independent_Galaxy_Distances.txt"

# Read the file into a pandas DataFrame
data = pd.read_csv(file_name, skiprows=13, header=None)

# Set the correct column names based on the header information provided
data.columns = ["", "Index", "Exclusion Code", "Galaxy ID", "m-M", "err", "D (Mpc)", "Method", "REFCODE", "", "Redshift", "Hubble const.", "Adopted LMC modulus", "Date (Yr. - 1980)", "Notes"]

# Filter out rows where Distance or Hubble Constant is not provided
data = data.dropna(subset=["D (Mpc)", "Hubble const."])
unique_methods = data.Method.unique()



# Convert necessary columns to numeric values
data["D (Mpc)"] = pd.to_numeric(data["D (Mpc)"], errors='coerce')
data["Hubble const."] = pd.to_numeric(data["Hubble const."], errors='coerce')

# Filter out rows where conversion resulted in NaN
data = data.dropna(subset=["D (Mpc)", "Hubble const."])

# Calculate redshift using the formula: z = (Hubble constant * Distance) / speed of light
speed_of_light = 299792.458  # in km/s
data["Redshift"] = (data["Hubble const."] * data["D (Mpc)"]) / speed_of_light

# Drop rows where Redshift could not be calculated
data = data.dropna(subset=["Redshift"])

# Extract columns for the fitting
redshift = data["Redshift"].values
distance = data["D (Mpc)"].values

# Display some debug information
print(data.head())

# Check for outliers in redshift and distance
print("Max Redshift:", redshift.max())
print("Max Distance:", distance.max())

# Remove potential outliers
redshift_filtered = redshift[redshift < 1]  # Assuming redshifts < 1 are more reasonable
distance_filtered = distance[redshift < 1]

# Define a linear function for the LSQ fitting
def linear_model(z, H0):
    return z * speed_of_light/H0

# Perform LSQ linear fitting
popt, pcov = curve_fit(linear_model, redshift_filtered, distance_filtered)

# Extract the fitted Hubble constant
Hubble_constant = popt[0]

print(f"Extracted Hubble Constant: {Hubble_constant:.2f} km/s/Mpc")

# Plot the data and the fitting result
plt.figure(figsize=(10, 6))
plt.scatter(redshift_filtered, distance_filtered, label='Data', color='blue')
plt.plot(redshift_filtered, linear_model(redshift_filtered, *popt), label=f'Fit: H0={Hubble_constant:.2f} km/s/Mpc', color='red')
plt.xlabel('Redshift (z)')
plt.ylabel('Distance (Mpc)')
plt.title('Redshift vs. Distance')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

# Define the file name
file_name = "NED_Redshift_Independent_Galaxy_Distances.txt"

# Read the file into a pandas DataFrame
data = pd.read_csv(file_name, skiprows=13, header=None)

# Set the correct column names based on the header information provided
data.columns = ["", "Index", "Exclusion Code", "Galaxy ID", "m-M", "err", "D (Mpc)", "Method", "REFCODE", "", "Redshift", "Hubble const.", "Adopted LMC modulus", "Date (Yr. - 1980)", "Notes"]

# Filter out rows where Distance or Hubble Constant is not provided
data = data.dropna(subset=["D (Mpc)", "Hubble const."])

# Convert necessary columns to numeric values
data["D (Mpc)"] = pd.to_numeric(data["D (Mpc)"], errors='coerce')
data["Hubble const."] = pd.to_numeric(data["Hubble const."], errors='coerce')

# Filter out rows where conversion resulted in NaN
data = data.dropna(subset=["D (Mpc)", "Hubble const."])

# Calculate redshift using the formula: z = (Hubble constant * Distance) / speed of light
speed_of_light = 299792.458  # in km/s
data["Redshift"] = (data["Hubble const."] * data["D (Mpc)"]) / speed_of_light

# Drop rows where Redshift could not be calculated
data = data.dropna(subset=["Redshift"])

# Extract unique methods
unique_methods = data["Method"].unique()

# Define a linear function for the LSQ fitting
def linear_model(z, H0):
    return z * speed_of_light/H0

# Initialize plot
plt.figure(figsize=(14, 8))

# Colors for plotting
colors = plt.cm.viridis(np.linspace(0, 1, len(unique_methods)))

# Dictionary to store Hubble constants for each method
hubble_constants = {}

# Perform LSQ fitting and plotting for each method
for i, method in enumerate(unique_methods):
    method_data = data[data["Method"] == method]
    redshift = method_data["Redshift"].values
    distance = method_data["D (Mpc)"].values
    
    # Remove potential outliers
    redshift_filtered = redshift[redshift < 1]  # Assuming redshifts < 1 are more reasonable
    distance_filtered = distance[redshift < 1]
    
    if len(redshift_filtered) > 0 and len(distance_filtered) > 0:
        # Perform LSQ linear fitting
        popt, pcov = curve_fit(linear_model, redshift_filtered, distance_filtered)
        
        # Extract the fitted Hubble constant
        Hubble_constant = popt[0]
        hubble_constants[method] = Hubble_constant
        
        # Plot the data and the fitting result
        plt.scatter(redshift_filtered, distance_filtered, label=f'{method} Data', color=colors[i])
        plt.plot(redshift_filtered, linear_model(redshift_filtered, *popt), label=f'{method} Fit: H0={Hubble_constant:.2f} km/s/Mpc', color=colors[i], linestyle='--')

# Plot settings
plt.xlabel('Redshift (z)')
plt.ylabel('Distance (Mpc)')
plt.title('Redshift vs. Distance by Method')
plt.legend()
plt.grid(True)
plt.show()

# Create a DataFrame for the Hubble constants
hubble_constants_df = pd.DataFrame(list(hubble_constants.items()), columns=["Method", "Hubble Constant (km/s/Mpc)"])

# Display the table of Hubble constants
print(hubble_constants_df)



In [None]:
data[["Galaxy ID", "Method", "D (Mpc)", "Redshift", "Hubble const."]]

In [None]:
70*1700/ (0.396941*speed_of_light)