Python Script
1) to convert .txt files to .csv files
2) Remove header lines starting with %
3) Remove unneccesary columns
4) Save all files in .csv format


In [None]:
import os
import pandas as pd

# Function to remove specified columns from a DataFrame
def remove_columns(data, columns_to_remove):
    data_modified = data.drop(columns=columns_to_remove, errors='ignore')
    return data_modified

# Function to remove header lines from a file
def remove_header(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    lines = [line for line in lines if not line.startswith('%')]
    return lines

# Function to convert text files to CSV
def convert_to_csv(input_directory, output_directory):
    # Ensure the output directory exists
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # Loop through each text file in the input directory
    for filename in os.listdir(input_directory):
        if filename.endswith(".txt"):
            txt_file_path = os.path.join(input_directory, filename)
            csv_file_path = os.path.join(output_directory, filename[:-4] + ".csv")

            # Remove header lines
            lines = remove_header(txt_file_path)

            # Save as CSV
            with open(csv_file_path, 'w', newline='') as csv_file:
                csv_file.write(''.join(lines))

            print(f"Converted '{filename}' to CSV.")

# Specify the directories
input_txt_directory = 'Marathi Trial-1 (76-100 words)'
converted_csv_directory = 'converted_csv_files' #csv files will be created here automatically
# After csv files are created, then only files will be saved in output_csv_directory
output_csv_directory = 'Marathi Trial-1 (76-100 words) CSV Files'  #create this directory first and then write it here

# Columns to remove from each DataFrame
columns_to_remove = [' Accel Channel 0', ' Accel Channel 1',
                     ' Accel Channel 2', ' Other', ' Other.1', ' Other.2',
                     ' Other.3', ' Other.4', ' Other.5', ' Other.6',
                     ' Analog Channel 0', ' Analog Channel 1', ' Analog Channel 2',
                     ' Timestamp', ' Other.7', ' Timestamp (Formatted)']

try:
    # Convert text files to CSV
    convert_to_csv(input_txt_directory, converted_csv_directory)

    # Loop through each CSV file in the converted directory
    for filename in os.listdir(converted_csv_directory):
        if filename.endswith(".csv"):
            csv_file_path = os.path.join(converted_csv_directory, filename)

            # Read CSV file into a pandas DataFrame skipping header lines
            data = pd.read_csv(csv_file_path)

            # Remove specified columns
            data_modified = remove_columns(data, columns_to_remove)

            # Save the modified DataFrame to a new CSV file
            output_csv_path = os.path.join(output_csv_directory, f"modified_{filename}")
            data_modified.to_csv(output_csv_path, index=False)

            print(f"Modified data saved to '{output_csv_path}'.")

except FileNotFoundError:
    print(f"Error: Input directory '{input_txt_directory}' not found.")
except pd.errors.EmptyDataError:
    print(f"Error: No CSV files found in '{converted_csv_directory}'.")
except Exception as e:
    print(f"An error occurred: {e}")
