In [6]:
import pandas as pd

def read_csv_to_dataframe(file_path):
    """
    Reads a comma-separated CSV file and converts it into a pandas DataFrame.
    
    Parameters:
        file_path (str): Path to the CSV file.
    
    Returns:
        pd.DataFrame: DataFrame containing the CSV data.
    """
    df = pd.read_csv(file_path)  # assumes the file is comma-separated
    return df
    

In [2]:
# Ensure the function is defined (run the cell with the function definition if needed)
terminos_df = read_csv_to_dataframe(r"C:\git_RobertVZ\CursoPython\bases_datos\Terminos_lagoon_TA_DIC_2023_RawData.csv")
print(terminos_df.head())  # show first 5 rows

   sample      date     estuary   area station layer_depth season  \
0  CDL01S  5/3/2020  Candelaria  River   CDL01     Surface    Dry   
1  CDL01F  5/3/2020  Candelaria  River   CDL01      Bottom    Dry   
2  CDL02S  5/3/2020  Candelaria  River   CDL02     Surface    Dry   
3  CDL02F  5/3/2020  Candelaria  River   CDL02      Bottom    Dry   
4  CDL03S  5/3/2020  Candelaria  River   CDL03     Surface    Dry   

   chlorophy_microg_l  cond_microsiemens_cm  depth_m  ...  do_mg_l  sal_psu  \
0                0.36                7015.4    0.464  ...     7.12     3.56   
1                4.19               29886.1    7.792  ...     4.90    16.97   
2                0.92               16691.1    0.453  ...     6.99     8.94   
3                2.23               24847.4    1.261  ...     6.52    13.87   
4                0.58               46341.6    0.465  ...     6.24    28.06   

   sp_cond_microsiemens_cm  turbidity_fnu  temp_c  latitude  longitude  \
0                   6547.7          

In [3]:
DIC_data = terminos_df["dic_micromol_kg"]
print(DIC_data)

0      3915
1      3698
2      3724
3      3667
4      2928
       ... 
101    2715
102    2638
103    2608
104    2605
105    2542
Name: dic_micromol_kg, Length: 106, dtype: int64


In [4]:
import pandas as pd
from scipy.stats import shapiro

# Prueba de Shapiro-Wilk
stat, p_value = shapiro(DIC_data)

print("Shapiro-Wilk Test")
print(f"Statistic = {stat:.4f}, p-value = {p_value:.4f}")

# Interpretación
alpha = 0.05
if p_value > alpha:
    print("✅ The data looks Gaussian (fail to reject H0)")
else:
    print("❌ The data does not look Gaussian (reject H0)")


Shapiro-Wilk Test
Statistic = 0.8600, p-value = 0.0000
❌ The data does not look Gaussian (reject H0)


In [9]:
salinity_data = terminos_df["sal_psu"]
print(salinity_data)

0       3.56
1      16.97
2       8.94
3      13.87
4      28.06
       ...  
101    22.81
102    22.57
103    24.53
104    22.57
105    27.52
Name: sal_psu, Length: 106, dtype: float64


In [11]:
import pandas as pd
from scipy.stats import shapiro

# Prueba de Shapiro-Wilk
stat, p_value = shapiro(salinity_data)

print("Shapiro-Wilk Test")
print(f"Statistic = {stat:.4f}, p-value = {p_value:.4f}")

# Interpretación
alpha = 0.05
if p_value > alpha:
    print("✅ The data looks Gaussian (fail to reject H0)")
else:
    print("❌ The data does not look Gaussian (reject H0)")


Shapiro-Wilk Test
Statistic = 0.8867, p-value = 0.0000
❌ The data does not look Gaussian (reject H0)
