In [None]:
import pandas as pd

# Load the data
data = pd.read_csv("/content/Sleep_Efficiency.csv")

# Check for missing values
missing_values = data.isnull().sum()
print("Missing values before imputation:\n", missing_values)

# Perform data imputation
# For example, you can fill missing values with the mean of each column
data_imputed = data.fillna(data.mean())

# Check again for missing values after imputation
missing_values_imputed = data_imputed.isnull().sum()
print("\nMissing values after imputation:\n", missing_values_imputed)

# Save the imputed data to a new CSV file
data_imputed.to_csv("/content/Sleep_Efficiency_Imputed.csv", index=False)


Missing values before imputation:
 ID                         0
Age                        0
Gender                     0
Bedtime                    0
Wakeup time                0
Sleep duration             0
Sleep efficiency           0
REM sleep percentage       0
Deep sleep percentage      0
Light sleep percentage     0
Awakenings                20
Caffeine consumption      25
Alcohol consumption       14
Smoking status             0
Exercise frequency         6
dtype: int64

Missing values after imputation:
 ID                        0
Age                       0
Gender                    0
Bedtime                   0
Wakeup time               0
Sleep duration            0
Sleep efficiency          0
REM sleep percentage      0
Deep sleep percentage     0
Light sleep percentage    0
Awakenings                0
Caffeine consumption      0
Alcohol consumption       0
Smoking status            0
Exercise frequency        0
dtype: int64


  data_imputed = data.fillna(data.mean())


In [1]:
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import OrdinalEncoder

# Load the data
data = pd.read_csv("/content/Sleep_Efficiency.csv")

# Encode categorical variables
encoder = OrdinalEncoder()
data_encoded = pd.DataFrame(encoder.fit_transform(data), columns=data.columns)

# Define imputation methods
imputation_methods = {
    'Next Value': lambda df: df.fillna(method='bfill'),
    'Previous Value': lambda df: df.fillna(method='ffill'),
    'K Nearest Neighbors': lambda df: pd.DataFrame(KNNImputer().fit_transform(df), columns=df.columns),
    'Maximum Value': lambda df: df.fillna(df.max()),
    'Minimum Value': lambda df: df.fillna(df.min()),
    'Most Frequent Value': lambda df: df.apply(lambda x: x.fillna(x.value_counts().index[0])),
    'Average Value': lambda df: df.fillna(df.mean().round()),
    'Linear Interpolation': lambda df: df.interpolate(method='linear', axis=0, limit_direction='both'),
    'Mean': lambda df: df.fillna(df.mean().round()),
    'Median Value': lambda df: df.fillna(df.median()),
    'Fixed Value': lambda df: df.fillna(0)  # Replace 0 with your desired fixed value
}

# Perform data imputation for each method
imputed_data = {}
for method_name, imputation_method in imputation_methods.items():
    imputed_data[method_name] = imputation_method(data_encoded.copy())

# Decode categorical variables
for method_name, imputed_df in imputed_data.items():
    imputed_data[method_name] = pd.DataFrame(encoder.inverse_transform(imputed_df), columns=imputed_df.columns)

# Save imputed data to new CSV files
for method_name, imputed_df in imputed_data.items():
    imputed_df.to_csv(f"/content/Sleep_Efficiency_Imputed_{method_name}.csv", index=False)
