In [27]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer  # To enable the IterativeImputer
from sklearn.impute import IterativeImputer

# Load the dataset
file_path = 'Complete.csv'
df = pd.read_csv(file_path)

# List of columns to fill based on their correlation with Phytoplankton (cells/ml)
columns_to_fill = ['pH (units)', 'Ammonia (mg/L)', 'Nitrate (mg/L)', 'Inorganic Phosphate (mg/L)', 'BOD (mg/l)', 'Dissolved Oxygen (mg/l)']

# Define the predictors to be used
predictors = ['Phytoplankton (cells/ml)']

# Create a subset of the DataFrame including the predictors and the columns to fill
df_subset = df[predictors + columns_to_fill]

# Initialize the MICE (Iterative Imputer)
mice_imputer = IterativeImputer(max_iter=10, random_state=0)

# Apply MICE imputation to the relevant subset of the DataFrame
df_imputed = pd.DataFrame(mice_imputer.fit_transform(df_subset), columns=df_subset.columns)

# Replace the original columns with the imputed ones
df[columns_to_fill] = df_imputed[columns_to_fill]

# Check if missing values are filled
print(df[columns_to_fill].isnull().sum())

# Save the updated dataset with imputed values to a new CSV file
df.to_csv('Complete_MICE_Imputed.csv', index=False)


pH (units)                    0
Ammonia (mg/L)                0
Nitrate (mg/L)                0
Inorganic Phosphate (mg/L)    0
BOD (mg/l)                    0
Dissolved Oxygen (mg/l)       0
dtype: int64
