In [None]:
import scipy.io

filepath='data/raw/'
matfile = scipy.io.loadmat(filepath)
print(matfile.keys())

In [2]:
import os
def find_all_mat_files_in_order(root_dir):   
    mat_files = []
    for dirpath, _, filenames in sorted(os.walk(root_dir)):
        for file in filenames:
            if file.endswith('.mat'):
                mat_files.append(os.path.join(dirpath, file))
    return mat_files

In [11]:
import math

def load_torque_to_hp(torque_nm, rpm):
    power_watts = (2 * math.pi * torque_nm * rpm) / 60
    power_hp = power_watts / 745.7
    return power_hp


In [69]:
def specific_info(base_info):
    info_mapping = {
        'KI04': {'extent_damage': 1, 'damage_method': 'LFT', 'manufacturer':'MTK'},
        'KI05': {'extent_damage': 1, 'damage_method': 'ENG', 'manufacturer':'IBU'},
        'KI07': {'extent_damage': 2, 'damage_method': 'ENG', 'manufacturer':'IBU'},
        'KI08': {'extent_damage': 2, 'damage_method': 'ENG', 'manufacturer':'IBU'},
        'KI14': {'extent_damage': 1, 'damage_method': 'LFT', 'manufacturer':'MTK'},
        'KI16': {'extent_damage': 3, 'damage_method': 'LFT', 'manufacturer':'FAG'},
        'KI17': {'extent_damage': 1, 'damage_method': 'LFT', 'manufacturer':'MTK'},
        'KI18': {'extent_damage': 2, 'damage_method': 'LFT', 'manufacturer':'MTK'},
        'KI21': {'extent_damage': 1, 'damage_method': 'LFT', 'manufacturer':'FAG'},
    }
    return info_mapping[base_info]

specific_info('KI04')

{'extent_damage': 1, 'damage_method': 'LFT', 'manufacturer': 'MTK'}

<ul>
<li>ENG - Electric Engraver
<li>EDM - Electrical Discharge Machining
<li>DRL - Drilled
<li>LFT - Lifetime Test
</ul>

<p>The KA08 bearing data were not used, because part of the data from one of the working condition is corrupted.</p>


In [68]:
import os
import re

dir = 'data/raw/paderborn/KI21'

list_files = find_all_mat_files_in_order(dir)
for file in list_files:
    base = re.findall(r'_(K[0AI]\d{2})_', file)[0]
    s_info = specific_info(base)
    
    key = re.findall(r'_K[0AI]\d{2}_', file)[0][2]
    dataset_name = 'Paderborn'
    filename = os.path.basename(file).split('.')[0]
    label = {'0':'N', 'I':'I', 'A':'O'}[key]
    sampling_rate = 64000
    rpm = {'09':'900', '15':'1500'}[re.findall(r'N(\d{2})', file)[0]]
    load = {'04':'400', '10':'1000'}[re.findall(r'_F(\d{2})_', file)[0]]
    load_torque = re.findall(r'_M(\d{2})_', file)[0][1]
    hp = round(load_torque_to_hp(int(load_torque)*0.1, int(rpm)), 2)
    extent_damage = s_info['extent_damage'] # verify
    damage_method = s_info['damage_method'] # verify
    bearing_type = 6203
    condition_bearing_health = {'0':'healthy', 'A':'faulty', 'I':'faulty'}[key]
    manufacturer = s_info['manufacturer'] # verify
    print(f'{dataset_name},{filename},{label},{sampling_rate},{rpm},{hp},{load},{extent_damage},{damage_method},{bearing_type},{condition_bearing_health},{manufacturer}')

Paderborn,N15_M07_F04_KI21_8,I,64000,1500,0.15,400,1,LFT,6203,faulty,FAG
Paderborn,N15_M07_F04_KI21_13,I,64000,1500,0.15,400,1,LFT,6203,faulty,FAG
Paderborn,N09_M07_F10_KI21_3,I,64000,900,0.09,1000,1,LFT,6203,faulty,FAG
Paderborn,N15_M01_F10_KI21_4,I,64000,1500,0.02,1000,1,LFT,6203,faulty,FAG
Paderborn,N09_M07_F10_KI21_20,I,64000,900,0.09,1000,1,LFT,6203,faulty,FAG
Paderborn,N15_M07_F10_KI21_9,I,64000,1500,0.15,1000,1,LFT,6203,faulty,FAG
Paderborn,N15_M07_F04_KI21_7,I,64000,1500,0.15,400,1,LFT,6203,faulty,FAG
Paderborn,N15_M01_F10_KI21_3,I,64000,1500,0.02,1000,1,LFT,6203,faulty,FAG
Paderborn,N09_M07_F10_KI21_17,I,64000,900,0.09,1000,1,LFT,6203,faulty,FAG
Paderborn,N15_M07_F10_KI21_7,I,64000,1500,0.15,1000,1,LFT,6203,faulty,FAG
Paderborn,N15_M01_F10_KI21_13,I,64000,1500,0.02,1000,1,LFT,6203,faulty,FAG
Paderborn,N09_M07_F10_KI21_7,I,64000,900,0.09,1000,1,LFT,6203,faulty,FAG
Paderborn,N15_M07_F10_KI21_18,I,64000,1500,0.15,1000,1,LFT,6203,faulty,FAG
Paderborn,N09_M07_F10_KI21_12,I,64000,90

In [43]:
import yaml
import re

def load_filter():
    with open('config/filters_config.yaml', 'r') as file:
        return yaml.safe_load(file)

list_values = []
for dataset, config in load_filter().items():
    values = list(config.items())[0][1]
    if isinstance(values, list):
        list_values += values

list_values        

['N', 'I', 'O', 'B', 'N', 'I', 'O', 'B', 'N', 'I', 'O', 'B']

In [2]:
import csv

data = []
with open('data/annotation_file.csv', mode='r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        data.append(row)
data

[{'dataset_name': 'CWRU',
  'filename': '97',
  'label': 'N',
  'sampling_rate': '12000',
  'rpm': '1797',
  'hp': '0',
  'load': 'no',
  'extent_damage': '000',
  'damage_method': 'no',
  'bearing_type': '6203',
  'condition_bearing_health': 'healthy',
  'manufacturer': 'manu_cwru'},
 {'dataset_name': 'CWRU',
  'filename': '98',
  'label': 'N',
  'sampling_rate': '12000',
  'rpm': '1772',
  'hp': '1',
  'load': 'no',
  'extent_damage': '000',
  'damage_method': 'no',
  'bearing_type': '6203',
  'condition_bearing_health': 'healthy',
  'manufacturer': 'manu_cwru'},
 {'dataset_name': 'CWRU',
  'filename': '99',
  'label': 'N',
  'sampling_rate': '12000',
  'rpm': '1750',
  'hp': '2',
  'load': 'no',
  'extent_damage': '000',
  'damage_method': 'no',
  'bearing_type': '6203',
  'condition_bearing_health': 'healthy',
  'manufacturer': 'manu_cwru'},
 {'dataset_name': 'CWRU',
  'filename': '100',
  'label': 'N',
  'sampling_rate': '12000',
  'rpm': '1730',
  'hp': '3',
  'load': 'no',
  'ex

In [3]:
import yaml

def load_config(file_path):
    with open(file_path, 'r') as file:
        return yaml.safe_load(file)

def filter_data(data, filters):
    filtered_data = []

    for dataset, config in filters.items():
        for item in data:
            matches = all(
                item.get(key) in value and item.get("dataset_name")==dataset if isinstance(value, list) 
                else item.get(key) == value and item.get("dataset_name")==dataset
                for key, value in config.items()
            )
            if matches:
                filtered_data.append(item)
    
    return filtered_data

config = load_config('config/filters_config.yaml')
result = filter_data(data, config)
result

[{'dataset_name': 'CWRU',
  'filename': '105',
  'label': 'I',
  'sampling_rate': '12000',
  'rpm': '1797',
  'hp': '0',
  'load': 'no',
  'extent_damage': '007',
  'damage_method': 'EDM',
  'bearing_type': '6205',
  'condition_bearing_health': 'faulty',
  'manufacturer': 'manu_cwru'},
 {'dataset_name': 'CWRU',
  'filename': '106',
  'label': 'I',
  'sampling_rate': '12000',
  'rpm': '1772',
  'hp': '1',
  'load': 'no',
  'extent_damage': '007',
  'damage_method': 'EDM',
  'bearing_type': '6205',
  'condition_bearing_health': 'faulty',
  'manufacturer': 'manu_cwru'},
 {'dataset_name': 'CWRU',
  'filename': '107',
  'label': 'I',
  'sampling_rate': '12000',
  'rpm': '1750',
  'hp': '2',
  'load': 'no',
  'extent_damage': '007',
  'damage_method': 'EDM',
  'bearing_type': '6205',
  'condition_bearing_health': 'faulty',
  'manufacturer': 'manu_cwru'},
 {'dataset_name': 'CWRU',
  'filename': '108',
  'label': 'I',
  'sampling_rate': '12000',
  'rpm': '1730',
  'hp': '3',
  'load': 'no',
  

In [None]:
from src.data_processing.dataset_manager import DatasetManager

dm = DatasetManager()
dm.filter_dataset(filename='97')

True