# lifelines - predictive maintenance

## Importing the libraries

In [1]:
import os
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


## Importing the dataset

In [2]:
# pyspark_df = spark.table("sd_bdc_demo.sarima_time_series_forecasting.1_service_now_survival_data")
# pyspark_df.display()
# df = pyspark_df.toPandas()

In [3]:
df = pd.read_csv("2_service_now_lifelines_updated_data/2_service_now_lifelines_updated_data.csv")


In [4]:
print(df.head())

   Asset_Number  start  stop  event  Category      usage  temperature  \
0        100080      0    14      0  Security  89.242856    21.072078   
1        100080     14    17      1  Security  18.028311    52.449810   
2        100080     17    19      1  Security  56.002279    75.497191   
3        100080     19    21      0  Security  13.719146    12.130803   
4        100080     21    25      0  Security  34.996827    10.222960   

        load  health_bar  
0  18.921408   75.926079  
1  30.837617   46.889084  
2  14.388191   46.312371  
3  54.861030  100.000000  
4  68.653680   43.616789  


In [5]:
print(df.columns)

Index(['Asset_Number', 'start', 'stop', 'event', 'Category', 'usage',
       'temperature', 'load', 'health_bar'],
      dtype='object')


In [6]:
print(df.dtypes)

Asset_Number      int64
start             int64
stop              int64
event             int64
Category         object
usage           float64
temperature     float64
load            float64
health_bar      float64
dtype: object


In [7]:
print(df.shape)

(11100, 9)


### Drop category column

In [8]:
df.drop(columns=['Category'], inplace=True)

In [9]:
print(df.shape)

(11100, 8)


## Different Data Frame For Individual assets

In [10]:
unique_assets = df['Asset_Number'].unique()
print(unique_assets)

[100080 100081 300006 300007 300008 300009 300010 300011 300012 300013
 300014 300015 300035 300036 300016 300037 300017 300018 300019 300020
 300021 300022 300038 300039 300023 300024 300025 300026 300027 300028
 300029 300030 300031 300032 300033 300034 600053 600054 600055 600056
 600057 600058 600059 600060 600061 600062 600063 600064 600065 600066
 600067 600068 600069 600070 600071 600072 600073 600074 600075 600076
 600077 600078 600079 600080 600081 600082 600083 600084 600085 600086
 600087 600088 600089 600090 600091 600092 600093 600094 600095 600096
 600097 600098 600099 600100 600101 600102 600103 600104 600105 600106
 600107 600108 600109 600110 600111 600112 600113 600114 600115 600116
 600117 600118 600119 600120 600121 600122 600123 600124 600125 600126
 600127]


In [11]:
asset_dfs = {}

for asset_id in unique_assets:
    asset_df = df[df['Asset_Number'] == asset_id]

    asset_dfs[asset_id] = asset_df

In [12]:
individual_asset = asset_dfs[100080]
print(individual_asset)

    Asset_Number  start  stop  event      usage  temperature       load  \
0         100080      0    14      0  89.242856    21.072078  18.921408   
1         100080     14    17      1  18.028311    52.449810  30.837617   
2         100080     17    19      1  56.002279    75.497191  14.388191   
3         100080     19    21      0  13.719146    12.130803  54.861030   
4         100080     21    25      0  34.996827    10.222960  68.653680   
..           ...    ...   ...    ...        ...          ...        ...   
95        100080    447   448      0  82.623957    79.928071  85.666082   
96        100080    448   453      0  52.185868    50.653635  60.664292   
97        100080    453   458      1  87.750317    41.481725  98.926973   
98        100080    458   473      1  38.249971    97.978681  59.125432   
99        100080    473   497      0  92.668396    58.518503  98.768340   

    health_bar  
0    75.926079  
1    46.889084  
2    46.312371  
3   100.000000  
4    43.616789

In [13]:
print(individual_asset.shape)

(100, 8)


## CoxTimeVaryingFitter on each asset dataset

In [14]:
from lifelines import CoxTimeVaryingFitter
from lifelines.utils import concordance_index

def run_ctv_pipeline(asset_df, test_size=20):
    # asset_df = asset_df.sort_values("start").reset_index(drop=True)

    if asset_df['event'].sum() == 0:
        return None, {"error": "No observed events"}

    if len(asset_df) <= test_size:
        return None, {"error": "Insufficient data for train-test split"}

    # Split
    train_df = asset_df.iloc[:-test_size]
    test_df = asset_df.iloc[-test_size:]

    if test_df["event"].sum() == 0:
        return None, {"error": "Test set has no events"}

    try:
        # Fit
        ctv = CoxTimeVaryingFitter()
        ctv.fit(train_df, id_col="Asset_Number", start_col="start", stop_col="stop", event_col="event")

        # Predict partial hazard on test
        test_df["predicted_hazard"] = ctv.predict_partial_hazard(test_df)

        # Evaluate
        c_index = concordance_index(
            test_df["stop"],
            -test_df["predicted_hazard"],  # Higher hazard → lower survival
            test_df["event"]
        )

        return ctv, {
            "Concordance_Index": round(c_index, 4),
            "Train_Size": len(train_df),
            "Test_Size": len(test_df)
        }

    except Exception as e:
        return None, {"error": str(e)}


In [15]:
results = {}

for asset_id, df_asset in asset_dfs.items():
    model, metrics = run_ctv_pipeline(df_asset)

    if model is None:
        print(f"[{asset_id}] Skipped: {metrics['error']}")
    else:
        print(f"[{asset_id}] ✅ Concordance Index: {metrics['Concordance_Index']}")
        results[asset_id] = {
            "model": model,
            "metrics": metrics
        }


[100080] Skipped: Matrix is singular.
[100081] Skipped: Matrix is singular.
[300006] Skipped: Matrix is singular.
[300007] Skipped: Matrix is singular.
[300008] Skipped: Matrix is singular.
[300009] Skipped: Matrix is singular.
[300010] Skipped: Matrix is singular.
[300011] Skipped: Matrix is singular.
[300012] Skipped: Matrix is singular.
[300013] Skipped: Matrix is singular.
[300014] Skipped: Matrix is singular.
[300015] Skipped: Matrix is singular.
[300035] Skipped: Matrix is singular.
[300036] Skipped: Matrix is singular.
[300016] Skipped: Matrix is singular.
[300037] Skipped: Matrix is singular.
[300017] Skipped: Matrix is singular.
[300018] Skipped: Matrix is singular.
[300019] Skipped: Matrix is singular.
[300020] Skipped: Matrix is singular.
[300021] Skipped: Matrix is singular.
[300022] Skipped: Matrix is singular.
[300038] Skipped: Matrix is singular.
[300039] Skipped: Matrix is singular.
[300023] Skipped: Matrix is singular.
[300024] Skipped: Matrix is singular.
[300025] Ski