## Import packages and Load Data

In [1]:
import csv

def load_data(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()
        for line in lines:
            data.append((line.split(',')[0], line.split(',')[1].strip()))
    return data

train = load_data('../data/FMC-MWO2KG/train.txt')
test = load_data('../data/FMC-MWO2KG/test.txt')
val = load_data('../data/FMC-MWO2KG/dev.txt')
data = train + test + val

In [2]:
# Get the failure mode codes from the lines of the file
def get_codes(lines):
    """ Get the failure mode codes from the lines of the file. """
    failure_mode_codes = {}
    for line in lines:
        event, code = line[0], line[1]
        if code not in failure_mode_codes:
            failure_mode_codes[code] = 1
        else:
            failure_mode_codes[code] += 1
    # Order the failure mode codes by the number of occurrences
    failure_mode_codes = dict(sorted(failure_mode_codes.items(), key=lambda item: item[1], reverse=True))
    return failure_mode_codes

# Show the failure mode codes from the lines of the file
def show_codes(lines):
    """ Show the failure mode codes from the lines of the file. """
    codes = get_codes(lines)
    total = 0
    for key, value in codes.items():
        print("{:<30} {}".format(key, value))
        total += value
    print("Total:", total)

In [3]:
# FMC-MWO2KG raw dataset analysis and alignment
def raw_mwo2kg_analysis(obs_data):
    """ Analyse the raw data from the MWO2KG dataset. """
    with open('../data/FMC-MWO2KG/raw.csv', 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        lines = list(reader)

    raw_data = []
    for line in lines:
        if line[2] != "Not an observation":
            full = line[3].split("   ")[0].replace('"', '')
            raw_data.append((full, line[2]))

    aligned_data = []
    for d in obs_data:
        for r in raw_data:
            if d[0] in r[0] and d[1] == r[1] and r not in aligned_data:
                aligned_data.append(r)

    # Aligns the full mwo sentences to its failure mode based on its labelled observations
    print(f"Aligned Data: {len(aligned_data)}")
    with open('../data/FMC-MWO2KG/aligned.txt', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerows(aligned_data)

## FMC-MWO2KG Dataset Analysis

In [4]:
# Show failure mode codes and their total number of occurrences
show_codes(data)

Minor in-service problems      137
Breakdown                      81
Structural deficiency          69
Electrical                     58
Plugged / choked               50
Leaking                        44
Failure to start on demand     28
Abnormal instrument reading    23
Overheating                    20
Fail to function               20
Vibration                      17
Low output                     17
Other                          12
Spurious stop                  10
Noise                          8
Contamination                  8
Erratic output                 8
Failure to stop on demand      7
High output                    5
Fail to open                   2
Fail to close                  1
Failure to rotate              1
Total: 626


In [5]:
# Aligns the full mwo sentences to its failure mode based on its labelled observations
raw_mwo2kg_analysis(data)

Aligned Data: 570
