# Data Cleaning File 
- Cleans and Removes the negative values 
- Checking the Null Values 
- Drops the rows which cotains the null or 0 values

#### Removing all the negative value rows from the outputs:

{'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
'current_rise_time_pulse1', 'current_rise_time_pulse2',
'current_fall_time_pulse1', 'current_fall_time_pulse2'}

In [3]:

import os
import pandas as pd

# Input and ouput paths 
input_dir = "merged_mosfets_cleaned"
output_dir = "mosfets_step1_no_negative"
os.makedirs(output_dir, exist_ok=True)

# Time-based EMI targets columns to check for negatives
# As the time rise and fall cannot be negative and we must remove it
time_columns = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2'
]

print(" Removing negative rise/fall times from each MOSFET file \n")

for filename in os.listdir(input_dir):
    if not filename.endswith(".csv"):
        continue

    path = os.path.join(input_dir, filename)
    df = pd.read_csv(path)

    print(f" {filename}")
    print(f"   Initial rows: {len(df)}")

    # Removing the rows with any negative rise/fall/current time
    condition = (df[time_columns] >= 0).all(axis=1)
    df_clean = df[condition]

    # Checking for NaNs / null values in the cleaned df
    null_counts = df_clean.isnull().sum()
    total_nulls = null_counts.sum()

    # Printing all the removed and final checks 

    print(f"   Removed rows with negative time values: {len(df) - len(df_clean)}")
    print(f"   Remaining rows: {len(df_clean)}")
    print(f"   Null values remaining: {total_nulls}")
    if total_nulls > 0:
        print(null_counts[null_counts > 0])

    # Saving the cleaned data inro csv files into the output directory  for manual checks
    output_path = os.path.join(output_dir, filename.replace("_merged.csv", "_no_negative.csv"))
    df_clean.to_csv(output_path, index=False)
    print(f" Saved cleaned file: {output_path}\n")

print(" All files processed (Step 1 complete).\nNow ready for outlier removal in Step 2.")


 Removing negative rise/fall times from each MOSFET file 

 C2M0025120D_merged.csv
   Initial rows: 158082
   Removed rows with negative time values: 38296
   Remaining rows: 119786
   Null values remaining: 0
 Saved cleaned file: mosfets_step1_no_negative\C2M0025120D_no_negative.csv

 C2M0040120D_merged.csv
   Initial rows: 404934
   Removed rows with negative time values: 106516
   Remaining rows: 298418
   Null values remaining: 0
 Saved cleaned file: mosfets_step1_no_negative\C2M0040120D_no_negative.csv

 C2M0080120D_merged.csv
   Initial rows: 410961
   Removed rows with negative time values: 6018
   Remaining rows: 404943
   Null values remaining: 0
 Saved cleaned file: mosfets_step1_no_negative\C2M0080120D_no_negative.csv

 C2M0160120D_merged.csv
   Initial rows: 158538
   Removed rows with negative time values: 268
   Remaining rows: 158270
   Null values remaining: 0
 Saved cleaned file: mosfets_step1_no_negative\C2M0160120D_no_negative.csv

 C2M0280120D_merged.csv
   Initial 

**Now that all the negatives are removed we need to remove the rows which have the zero or null values and save the files or csv.**

In [5]:
import os
import pandas as pd

# === Setup ===
input_dir = "mosfets_step1_no_negative"
output_dir = "mosfets_step2_no_zeros"
os.makedirs(output_dir, exist_ok=True)

# Checking the Time-based EMI targets for zero values
time_columns = [
    'voltage_rise_time_pulse1', 'voltage_rise_time_pulse2',
    'voltage_fall_time_pulse1', 'voltage_fall_time_pulse2',
    'current_rise_time_pulse1', 'current_rise_time_pulse2',
    'current_fall_time_pulse1', 'current_fall_time_pulse2'
]

print(" Removing rows with ZERO rise/fall times \n")

for filename in sorted(os.listdir(input_dir)):
    if not filename.endswith(".csv"):
        continue

    path = os.path.join(input_dir, filename)
    df = pd.read_csv(path)

    print(f" {filename}")
    print(f" Initial rows: {len(df)}")

    # Removing the rows with any zero time values
    condition = (df[time_columns] != 0).all(axis=1)
    df_clean = df[condition]

    # null values
    null_counts = df_clean.isnull().sum()
    total_nulls = null_counts.sum()

    print(f"   Removed rows with 0.0 time values: {len(df) - len(df_clean)}")
    print(f"   Remaining rows: {len(df_clean)}")
    print(f"   Null values remaining: {total_nulls}")

    # Saving the cleaned files again for the manual checks
    output_path = os.path.join(output_dir, filename.replace("_no_negative.csv", "_no_zeros.csv"))
    df_clean.to_csv(output_path, index=False)
    print(f" Saved cleaned file: {output_path}\n")

print(" All files processed — Step 2 complete.\n Next steps: Visualizations.")


 Removing rows with ZERO rise/fall times 

 C2M0025120D_no_negative.csv
 Initial rows: 119786
   Removed rows with 0.0 time values: 0
   Remaining rows: 119786
   Null values remaining: 0
 Saved cleaned file: mosfets_step2_no_zeros\C2M0025120D_no_zeros.csv

 C2M0040120D_no_negative.csv
 Initial rows: 298418
   Removed rows with 0.0 time values: 0
   Remaining rows: 298418
   Null values remaining: 0
 Saved cleaned file: mosfets_step2_no_zeros\C2M0040120D_no_zeros.csv

 C2M0080120D_no_negative.csv
 Initial rows: 404943
   Removed rows with 0.0 time values: 0
   Remaining rows: 404943
   Null values remaining: 0
 Saved cleaned file: mosfets_step2_no_zeros\C2M0080120D_no_zeros.csv

 C2M0160120D_no_negative.csv
 Initial rows: 158270
   Removed rows with 0.0 time values: 0
   Remaining rows: 158270
   Null values remaining: 0
 Saved cleaned file: mosfets_step2_no_zeros\C2M0160120D_no_zeros.csv

 C2M0280120D_no_negative.csv
 Initial rows: 429245
   Removed rows with 0.0 time values: 0
   Rem