In [5]:
import os
import pandas as pd

In [6]:
# Define the main directory containing the CSV files
main_folder = r"C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg"  # Change this to your folder path

# Define the new child directory for processed data
processed_folder = os.path.join(main_folder, "processed_data")
filtered_folder = os.path.join(main_folder, "filtered_data")
trial_folder = os.path.join(main_folder, "trial_data")
# Create the processed_data directory if it doesn't exist
os.makedirs(processed_folder, exist_ok=True)
# Create the filtered_data directory if it doesn't exist
os.makedirs(filtered_folder, exist_ok=True)
os.makedirs(trial_folder, exist_ok=True)

In [7]:
# Loop through all CSV files in the main folder
for file in os.listdir(main_folder):
    if file.endswith(".csv"):
        file_path = os.path.join(main_folder, file)

        try:
            # Open the file and detect the correct header row
            with open(file_path, "r", encoding="utf-8") as f:
                lines = f.readlines()

            # Find the header row index where "Prog Time" appears
            header_row_index = None
            for i, line in enumerate(lines):
                if "Prog Time" in line and "Current" in line and "Voltage" in line and "AhAccu" in line:
                    header_row_index = i
                    break
            
            if header_row_index is None:
                print(f"Skipping {file}: No valid header found.")
                continue  # Skip this file

            # Read the CSV file starting from the detected header row
            df = pd.read_csv(
                file_path,
                encoding="utf-8",
                engine="python",
                skiprows=header_row_index,  # Skip metadata rows
                
            )

            # Strip spaces from column names
            df.columns = df.columns.str.strip()

            # Extract required columns if they exist
            required_columns = ["Prog Time", "Current", "Voltage", "AhAccu"]
            if all(col in df.columns for col in required_columns):
                extracted_data = df[required_columns]

                # Save the cleaned data in the processed folder
                new_file_path = os.path.join(processed_folder, f"processed_{file}")
                extracted_data.to_csv(new_file_path, index=False)
                print(f"Processed and saved: {new_file_path}")
            else:
                print(f"Skipping {file}: Required columns not found in detected data.")

        except pd.errors.ParserError as e:
            print(f"Skipping {file} due to parsing error: {e}")
        except Exception as e:
            print(f"Error processing {file}: {e}")

print("Processing complete. Extracted files are saved in 'processed_data' folder.")

Processed and saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\processed_data\processed_57511_TS055759.csv
Processed and saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\processed_data\processed_57513_TS055761.csv
Processed and saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\processed_data\processed_57514_TS055762.csv
Processing complete. Extracted files are saved in 'processed_data' folder.


In [8]:
# Function to clean data and remove initial charge/discharge
def clean_and_filter_data(df):
    # Drop the second row if it contains text (units like [A], [V])
    if not df.iloc[1].str.isnumeric().all():
        df = df.iloc[2:].reset_index(drop=True)  # Skip the second row
    
    # Convert columns to numeric
    df["Prog Time"] = pd.to_numeric(df["Prog Time"], errors="coerce")
    df["Current"] = pd.to_numeric(df["Current"], errors="coerce")
    df["Voltage"] = pd.to_numeric(df["Voltage"], errors="coerce")
    
    # Drop any rows with NaN values (if conversion failed)
    df = df.dropna()

    # Remove initial long charge/discharge phase
    threshold = 1.2  # Small threshold to detect current fluctuations
    steady_time = 5000  # Ignore first 5000 points if current is steady

    cycling_start_idx = (df["Current"].diff().abs() > threshold).idxmax()
    if cycling_start_idx > steady_time:
        df = df.iloc[cycling_start_idx:].reset_index(drop=True)

    return df

In [9]:
# Process each CSV file
for file in os.listdir(processed_folder):
    if file.endswith(".csv"):
        file_path = os.path.join(processed_folder, file)

        try:
            # Read the CSV file
            df = pd.read_csv(file_path)

            # Clean and filter data
            df_cleaned = clean_and_filter_data(df)

            # Save the cleaned data
            new_file_path = os.path.join(filtered_folder, f"filtered_{file}")
            df_cleaned.to_csv(new_file_path, index=False)
            print(f"Filtered and saved: {new_file_path}")

        except Exception as e:
            print(f"Error processing {file}: {e}")

print("Filtering complete. Cleaned files are saved in 'filtered_data' folder.")

  df = pd.read_csv(file_path)


Filtered and saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\filtered_data\filtered_processed_57511_TS055759.csv


  df = pd.read_csv(file_path)


Filtered and saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\filtered_data\filtered_processed_57513_TS055761.csv


  df = pd.read_csv(file_path)


Filtered and saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\filtered_data\filtered_processed_57514_TS055762.csv
Filtering complete. Cleaned files are saved in 'filtered_data' folder.


In [10]:
# Process each CSV file
for file in os.listdir(filtered_folder):
    if file.endswith(".csv"):
        file_path = os.path.join(filtered_folder, file)

        try:
            # Read the CSV file
            df = pd.read_csv(file_path)

            filtered_df = df[df["Prog Time"] >= 215000]

            # Save the cleaned data
            new_file_path = os.path.join(trial_folder, f"trial_{file}")
            filtered_df.to_csv(new_file_path, index=False)
            print(f"saved: {new_file_path}")

        except Exception as e:
            print(f"Error processing {file}: {e}")

print("Filtering complete. Cleaned files are saved in 'filtered_data' folder.")

saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\trial_data\trial_filtered_processed_57511_TS055759.csv
saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\trial_data\trial_filtered_processed_57513_TS055761.csv
saved: C:\UM\Project\PyBaMM\src\pybamm\Diffusion_Fit\PulseTest\TVS_21700_HybridPulse_40Deg\trial_data\trial_filtered_processed_57514_TS055762.csv
Filtering complete. Cleaned files are saved in 'filtered_data' folder.
