In [2]:
import pandas as pd
import glob
import os

### Caching lxx data to prevent re-runs

In [3]:
directory = 'Data/qw 16 unit oper data 14 may 24/'

condition = lambda x: x['PD1'] != '0.00'

skip_cols = ['STATE', 'CRC', 'PF-F', 'TC-F', 'PBIT-F', 'CRC-F', 'CNT']

pattern = r'^L\d+'

processed_dfs = []

for file_path in glob.glob(os.path.join(directory, '*.csv')):
    initial_df = pd.read_csv(file_path, skiprows=1, usecols=lambda col: col not in skip_cols, low_memory=False)

    initial_df.rename(columns={initial_df.columns[0]: 'Date_Time'}, inplace=True)

    filtered_df = initial_df[initial_df.apply(condition, axis=1)].reset_index(drop=True)

    final_df = filtered_df[filtered_df["Lxx"].str.match(pattern, na=False)].reset_index(drop=True)

    processed_dfs.append(final_df)

lxx_data = pd.concat(processed_dfs, ignore_index=True)

### Caching psu data to prevent re-runs

In [4]:
directory = 'Data/qw 16 unit oper data 14 may 24/'

skip_cols = ['V_LIM', 'I_LIM', 'V_OP', 'I_OP', 'V_MEAS', 'O_REG', 'Q_REG', 'CNT']

pattern = r'^PSU\d+'

processed_dfs = []

for file_path in glob.glob(os.path.join(directory, '*.csv')):
    initial_df = pd.read_csv(file_path, skiprows=3, usecols=lambda col: col not in skip_cols, low_memory=False)

    initial_df.rename(columns={initial_df.columns[0]: 'Date_Time'}, inplace=True)

    final_df = initial_df[initial_df["PSUx"].str.match(pattern, na=False)].reset_index(drop=True)

    processed_dfs.append(final_df)

psu_data = pd.concat(processed_dfs, ignore_index=True)

In [5]:
lxx_data.dropna(inplace=True)

lxx_data.drop(lxx_data[lxx_data['PD2'] == '0.00'].index, inplace=True)
psu_data.drop(psu_data[psu_data['I_MEAS'] == '0.00'].index, inplace=True)

In [6]:
final_lxx_data = lxx_data
final_psu_data = psu_data

In [7]:
datetime_format = "%Y-%m-%d %H:%M:%S:%f"
final_lxx_data['Date_Time'] = pd.to_datetime(final_lxx_data['Date_Time'], format=datetime_format)
final_psu_data['Date_Time'] = pd.to_datetime(final_psu_data['Date_Time'], format=datetime_format)

In [9]:
lxx_csv_path = 'Created_files/final_lxx_data.csv'
psu_csv_path = 'Created_files/final_psu_data.csv'

final_lxx_data.to_csv(lxx_csv_path, index=False)
final_psu_data.to_csv(psu_csv_path, index=False)

### Datasets are now cleaned and pushed to MongoDB via a batch file