In [22]:
import pandas as pd
import os

In [23]:
file_names = [f'train_FD00{num}.txt' for num in range(1, 5)]
file_paths = [os.path.join('..', 'data', 'raw', file_name) for file_name in file_names]

In [None]:
for path in file_paths:
    try:
        with open(path, 'r') as file:
            lines = [line.strip() for line in file]
        with open(path, 'w') as file:
            file.write('\n'.join(lines))

        index_cols = ['unit number', 'cycle time']
        setting_cols = [f'operational setting {num}' for num in range(1, 4)]
        sensor_cols = [f'sensor measurement {num}' for num in range(1, 22)]
        column_names = index_cols + setting_cols + sensor_cols
        
        df = pd.read_csv(path, sep=' ', header=None, names=column_names, index_col=False, skipinitialspace=True)
        df.dropna(axis=1, how='all', inplace=True)
        
        print(df.iloc[0, :])
        
        print('--- Engine Statistics ---')
        print(f'Current engine: {path}')
        print(f'Max cycle length: {df['cycle time'].max()}')
        print(f'Min cycle length: {df['cycle time'].min()}')

        cycle_lengths = []
        current_unit = -1
        current_cycle_start = 0

        for idx, row in df.iterrows():
            unit_num = row['unit number']
            cycle_time = row['cycle time']

            if unit_num != current_unit:
                if current_unit != -1:
                    cycle_lengths.append(df.iloc[current_cycle_start:idx]['cycle time'].max())
                current_unit = unit_num
                current_cycle_start = idx

        cycle_lengths.append(df.iloc[current_cycle_start:]['cycle time'].max())

        if cycle_lengths:
            average_cycle_length = sum(cycle_lengths) / len(cycle_lengths)
            print(f'Average cycle length: {average_cycle_length}')
        else:
            print('Average cycle length: No cycles found')

    except FileNotFoundError:
        print(f"Error: Training file not found at {path}")
        print("Please ensure the data is placed correctly in the data/raw directory.")
    except Exception as e:
        print(f"An error occurred: {e}")

unit number                 1.0000
cycle time                  1.0000
operational setting 1      -0.0007
operational setting 2      -0.0004
operational setting 3     100.0000
sensor measurement 1      518.6700
sensor measurement 2      641.8200
sensor measurement 3     1589.7000
sensor measurement 4     1400.6000
sensor measurement 5       14.6200
sensor measurement 6       21.6100
sensor measurement 7      554.3600
sensor measurement 8     2388.0600
sensor measurement 9     9046.1900
sensor measurement 10       1.3000
sensor measurement 11      47.4700
sensor measurement 12     521.6600
sensor measurement 13    2388.0200
sensor measurement 14    8138.6200
sensor measurement 15       8.4195
sensor measurement 16       0.0300
sensor measurement 17     392.0000
sensor measurement 18    2388.0000
sensor measurement 19     100.0000
sensor measurement 20      39.0600
sensor measurement 21      23.4190
Name: 0, dtype: float64
--- Engine Statistics ---
Current engine: ../data/raw/train_FD001.

In [15]:
df = pd.DataFrame({'Name': ['Alice', 'Bob', 'Aritra'],
                   'Age': [25, 30, 35],
                   'Location': ['Seattle', 'New York', 'Kona']},
                  index=([10, 20, 30]))

df[df.index == 10]

Unnamed: 0,Name,Age,Location
10,Alice,25,Seattle
