In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [9]:
# Define file paths
train_file_path = 'train1_update.csv'
test_file_path = 'test1_filterd.csv'
rul_file_path = 'RUL_FD001.txt'

# Load the data
train_df = pd.read_csv(train_file_path, sep=' ', header=None)
test_df = pd.read_csv(test_file_path, sep=' ', header=None)
rul_df = pd.read_csv(rul_file_path, sep=' ', header=None)

# Display the first few rows to inspect the structure
print(train_df.head())
print(test_df.head())
print(rul_df.head())

# Display the number of columns to confirm the structure
print(f"Train DataFrame columns: {train_df.shape[1]}")
print(f"Test DataFrame columns: {test_df.shape[1]}")
print(f"RUL DataFrame columns: {rul_df.shape[1]}")


                                                   0
0  id,cycle,setting1,setting2,T24_Total_temperatu...
1  1,1,-0.0007,-0.0004,641.82,1589.7,1400.6,554.3...
2  1,2,0.0019,-0.0003,642.15,1591.82,1403.14,553....
3  1,3,-0.0043,0.0003,642.35,1587.99,1404.2,554.2...
4  1,4,0.0007,0.0,642.35,1582.79,1401.87,554.45,2...
                                                   0
0  id,cycle,setting1,setting2,setting3,T2_Total_t...
1  1,1,0.0023,0.0003,100.0,518.67,643.02,1585.29,...
2  1,2,-0.0027,-0.0003,100.0,518.67,641.71,1588.4...
3  1,3,0.0003,0.0001,100.0,518.67,642.46,1586.94,...
4  1,4,0.0042,0.0,100.0,518.67,642.44,1584.12,140...
     0   1
0  112 NaN
1   98 NaN
2   69 NaN
3   82 NaN
4   91 NaN
Train DataFrame columns: 1
Test DataFrame columns: 1
RUL DataFrame columns: 2


In [10]:
# Check and drop the last two columns if they are all NaN due to trailing space in files
if train_df.shape[1] > 26:
    train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)
if test_df.shape[1] > 26:
    test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)

# Assign column names
column_names = [
    'id',
    'cycle',
    'setting1',
    'setting2',
    'setting3',
    'T2_Total_temperature_at_fan_inlet',
    'T24_Total_temperature_at_LPC_outlet',
    'T30_Total_temperature_at_HPC_outlet',
    'T50_Total_temperature_at_LPT_outlet',
    'P2_Pressure_at_fan_inlet',
    'P15_Total_pressure_in_bypass_duct',
    'P30_Total_pressure_at_HPC_outlet',
    'Nf_Physical_fan_speed',
    'Nc_Physical_core_speed',
    'epr_Engine_pressure_ratio',
    'Ps30_Static_pressure_at_HPC_outlet',
    'phi_Ratio_of_fuel_flow_to_Ps30',
    'NRf_Corrected_fan_speed',
    'NRc_Corrected_core_speed',
    'BPR_Bypass_Ratio',
    'farB_Burner_fuel_air_ratio',
    'htBleed_Bleed_Enthalpy',
    'Nf_dmd_Demanded_fan_speed',
    'PCNfR_dmd_Demanded_corrected_fan_speed',
    'W31_HPT_coolant_bleed',
    'W32_LPT_coolant_bleed'
]

train_df.columns = column_names
test_df.columns = column_names
rul_df.columns = ['remaining_useful_life']

ValueError: Length mismatch: Expected axis has 1 elements, new values have 26 elements

In [None]:
# Normalize the data
scaler = MinMaxScaler()
train_df.iloc[:, 2:] = scaler.fit_transform(train_df.iloc[:, 2:])
test_df.iloc[:, 2:] = scaler.transform(test_df.iloc[:, 2:])

# Add remaining useful life to the training data
rul = pd.DataFrame(train_df.groupby('id')['cycle'].max()).reset_index()
rul.columns = ['id', 'max_cycle']
train_df = train_df.merge(rul, on=['id'], how='left')
train_df['remaining_useful_life'] = train_df['max_cycle'] - train_df['cycle']
train_df.drop(columns=['max_cycle'], inplace=True)

In [None]:
# Select features and target
features = train_df.drop(columns=['id', 'cycle', 'remaining_useful_life'])
target = train_df['remaining_useful_life']

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, target, test_size=0.2, random_state=42)

In [None]:
# Initialize and train the linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)


In [None]:
# Make predictions on the validation set
y_pred = lr_model.predict(X_val)

# Calculate performance metrics
mse = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_val, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R^2 Score: {r2}")