In [1]:
import random
import yaml
import pickle
# Third Party
from transformers import (
    PatchTSMixerForPrediction,
    Trainer,
    TrainingArguments,
)
import numpy as np
import pandas as pd
import torch
from tsfm_public.toolkit.dataset import ForecastDFDataset
from read_data import loadData

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 42
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)
with open("./config.yaml", "r") as file:
    configs = yaml.safe_load(file)

num_workers = configs['num_workers'] 
batch_size = configs['batch_size']  
context_length = configs['context_length'] 
forecast_horizon = configs['forecast_horizon'] 
patch_length = configs['patch_length'] 
target_col = None
timestamp_column = "cycle" 

In [3]:
data_train, testDatasets, expectedRulDatasets = loadData()

In [4]:
drop_col = ['setting_1', 'setting_2', 'setting_3'] 
data_clean = [data.drop(columns = drop_col) for data in data_train]

In [5]:
df_all = pd.DataFrame()
for i in range(4):
    df_all =pd.concat([df_all, data_clean[i]])

In [6]:
for engine in list(set(df_all['engine'])):
    max_cycle = df_all[df_all['engine'] == engine]['cycle'].max() 
    condition = (df_all['engine'] == engine) & (df_all['cycle'] > max_cycle - 25)
    df_all.loc[condition, 'label'] = 1

In [7]:
df_all = df_all.fillna(0)

In [8]:
df_engine_1 = df_all.drop(columns=['index'], errors='ignore')

In [9]:
correlation_matrix = df_engine_1.drop(columns=['engine']).corr()
high_corr_columns = correlation_matrix[abs(correlation_matrix['label'] )> 0.03].index.tolist()
# high_corr_columns = correlation_matrix[correlation_matrix['label'] > 0.05].index.tolist()
df_sample = df_engine_1[high_corr_columns]

In [10]:
col_train = ['cycle', 'Fan_inlet_temperature_R', 'Fan_inlet_Pressure_psia',
       'bypass_duct_pressure_psia', 'HPC_outlet_pressure_psia',
       'Ratio_of_fuel_flow_to_Ps30_pps_psia', 'Corrected_core_speed_rpm',
       'High_pressure_turbines_Cool_air_flow',
       'Low_pressure_turbines_Cool_air_flow', 'index']

In [11]:
df_sample.drop(columns=['label'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_sample.drop(columns=['label'], inplace=True)


In [12]:
df_sample.shape, df_engine_1.shape

((160359, 9), (160359, 24))

In [13]:
target_col = None 

In [14]:
# df_sample2 = df_sample[100000:160359]
df_sample2 = df_sample[:10000]

In [15]:
class PatchTSFeatureExtraction():
    data = df_sample2
    id_columns = []
    forecast_columns = [col for col in data.columns if col not in timestamp_column]
    with open("../model/patch_model/timeseriesprocessor.pkl", "rb") as file:
        tsp = pickle.load(file)
    all_dataset = ForecastDFDataset(
        tsp.preprocess(data),
        id_columns=id_columns,
        input_columns=forecast_columns,
        output_columns=forecast_columns,
        context_length=context_length,
        prediction_length=forecast_horizon,
    )
    print("Loading pretrained model")
    inference_forecast_model = PatchTSMixerForPrediction.from_pretrained(configs['foundation_model_path'])
    print("Done")
    all_trainer = Trainer(args= TrainingArguments(output_dir="./output",per_device_eval_batch_size =batch_size), model=inference_forecast_model, 
                        eval_dataset=all_dataset)
    all_dls = all_trainer.get_eval_dataloader()
    def return_con_ten(all_dls,inference_forecast_model):
        concatenated_tensors = []
        for x in all_dls:
            output = inference_forecast_model(past_values=x['past_values'])        
            model_output = output.last_hidden_state
            all_channels_compact_embedding = model_output.mean(dim = (2,3))
            concatenated_tensors.append(all_channels_compact_embedding.unsqueeze(0))
        return concatenated_tensors
    concatenated_tensors = return_con_ten(all_dls, inference_forecast_model)
    print('1')
    concatenated_tensors_sm = concatenated_tensors[:-1]
    print('2')
    result_tensor = torch.cat(concatenated_tensors_sm, dim=1)
    print('3')
    final_df=pd.DataFrame(result_tensor[0].detach().numpy(),columns=forecast_columns,index=data.index[:len(result_tensor[0])])
    print('4')
    final_df.to_csv(f'./output/data_patch_1.csv', index=False)
    print('Saved dataframe named data_patch with context aware features at output folder')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[INTERNAL_ID_COLUMN] = INTERNAL_ID_VALUE
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


Loading pretrained model
Done
1
2
3
4
Saved dataframe named data_patch with context aware features at output folder


In [12]:
df = pd.read_csv("./output/data_patch.csv")

In [13]:
df

Unnamed: 0,label
0,0.051166
1,0.051166
2,0.051166
3,0.051166
4,0.051166
...,...
160321,-0.182871
160322,-0.185774
160323,-0.185280
160324,-0.141838


In [10]:
df_sample.shape

(921, 11)

In [11]:
921-888

33