**Raw Data Cleaning**

In [39]:
import pandas as pd
import os 

Adjust the file path to specific file location

In [40]:
file_path = r"C:\Users\U1078446\Downloads\TestingMFGData.csv"

In [41]:

df = pd.read_csv(file_path)  

df.head()



Unnamed: 0.1,Unnamed: 0,A201-BCP001-BIO0001-TT00026.BIO-BIO-TEM-CTR,A201-BCP001-BIO0001-AIT0024.BIO-DOX-DOX-CTR,A201-BCP001-BIO0001-AIT0022.BIO-PH1-PHZ-CTR,Unnamed: 4
0,Time,Value | First,Value | First,Value | First,
1,6/17/2025 11:45:00 PM,36.93,61.39,6.86,
2,6/18/2025 12:00:00 AM,36.82,58.43,6.87,
3,6/18/2025 12:15:00 AM,36.95,62.15,6.87,
4,6/18/2025 12:30:00 AM,36.94,63.31,6.89,


To remove the 1st row and remove unnecessary columns

In [42]:
# Remove the first row 
df = df.iloc[1:]

# Remove columns that are empty
df = df.loc[:, ~((df.isna() | df.isnull() | (df == '')).all())]

df.head()

Unnamed: 0.1,Unnamed: 0,A201-BCP001-BIO0001-TT00026.BIO-BIO-TEM-CTR,A201-BCP001-BIO0001-AIT0024.BIO-DOX-DOX-CTR,A201-BCP001-BIO0001-AIT0022.BIO-PH1-PHZ-CTR
1,6/17/2025 11:45:00 PM,36.93,61.39,6.86
2,6/18/2025 12:00:00 AM,36.82,58.43,6.87
3,6/18/2025 12:15:00 AM,36.95,62.15,6.87
4,6/18/2025 12:30:00 AM,36.94,63.31,6.89
5,6/18/2025 12:45:00 AM,37.06,59.93,6.89


Rename and parse the time column 

In [43]:
# Rename the first column to 'Time'
df = df.rename(columns={df.columns[0]: 'Time'})

# Parse the 'Time' column
df['Time'] = pd.to_datetime(df['Time'], format='%m/%d/%Y %I:%M:%S %p', errors='coerce')
df.set_index('Time', inplace=True)
df.sort_index(inplace=True)

df.head()


Unnamed: 0_level_0,A201-BCP001-BIO0001-TT00026.BIO-BIO-TEM-CTR,A201-BCP001-BIO0001-AIT0024.BIO-DOX-DOX-CTR,A201-BCP001-BIO0001-AIT0022.BIO-PH1-PHZ-CTR
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-06-17 23:45:00,36.93,61.39,6.86
2025-06-18 00:00:00,36.82,58.43,6.87
2025-06-18 00:15:00,36.95,62.15,6.87
2025-06-18 00:30:00,36.94,63.31,6.89
2025-06-18 00:45:00,37.06,59.93,6.89


**OPTIONAL** to rename the column head into its respective parameter names instead of its equipment ID name for easy viewing

In [44]:
# rename the columns
df.columns = [f'Parameter {i+1}' for i in range(len(df.columns))]

final check 

In [45]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1154 entries, 2025-06-17 23:45:00 to 2025-06-30 00:00:00
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Parameter 1  1154 non-null   object
 1   Parameter 2  1154 non-null   object
 2   Parameter 3  1154 non-null   object
dtypes: object(3)
memory usage: 36.1+ KB


Unnamed: 0_level_0,Parameter 1,Parameter 2,Parameter 3
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-06-17 23:45:00,36.93,61.39,6.86
2025-06-18 00:00:00,36.82,58.43,6.87
2025-06-18 00:15:00,36.95,62.15,6.87
2025-06-18 00:30:00,36.94,63.31,6.89
2025-06-18 00:45:00,37.06,59.93,6.89


SAVING THE FILE


In [46]:
original_filename = os.path.basename(file_path)
processed_filename = f"Processed_{original_filename}"
processed_filepath = os.path.join(os.path.dirname(file_path), processed_filename)

df.to_csv(processed_filepath, index=True)