# Missing Value Estimation

## Data Exploration

In [5]:
import pandas as pd
import numpy as np

# File paths for the missing data datasets
file_paths = {
    1: "../data/MissingData1.txt",
    2: "../data/MissingData2.txt",
    3: "../data/MissingData3.txt"
}

# Dictionary to store loaded datasets
missing_datasets = {}

# Load each dataset and replace missing values (1.00000000000000e+99) with NaN
for dataset_index, file_path in file_paths.items():
    print(f"Loading Dataset {dataset_index}...")
    
    # Load the dataset (assume tab-separated for .txt files)
    data = pd.read_csv(file_path, sep="\t", header=None)
    
    # Replace missing value placeholder with NaN
    data.replace(1.00000000000000e+99, np.nan, inplace=True)
    
    # Store the dataset in the dictionary
    missing_datasets[dataset_index] = data
    
    # Print basic information about the dataset
    total_values = data.size
    missing_values = data.isna().sum().sum()
    missing_percentage = (missing_values / total_values) * 100
    print(f"Dataset {dataset_index} loaded:")
    print(f" - Shape: {data.shape}")
    print(f" - Total Values: {total_values}")
    print(f" - Missing Values: {missing_values} ({missing_percentage:.2f}%)")
    print("-" * 40)

Loading Dataset 1...
Dataset 1 loaded:
 - Shape: (242, 14)
 - Total Values: 3388
 - Missing Values: 118 (3.48%)
----------------------------------------
Loading Dataset 2...
Dataset 2 loaded:
 - Shape: (758, 50)
 - Total Values: 37900
 - Missing Values: 3762 (9.93%)
----------------------------------------
Loading Dataset 3...
Dataset 3 loaded:
 - Shape: (273, 79)
 - Total Values: 21567
 - Missing Values: 17752 (82.31%)
----------------------------------------
