# Data Preprocessing Tools

## Importing the libraries

In [127]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


## Importing the maintenance dataset

In [128]:
# Load Excel files
df_maintenance = pd.read_csv("0_maintenance_raw_data/maintenance.csv", sep=";")

In [129]:
print(df_maintenance.columns)

Index(['lifetime', 'broken', 'pressureInd', 'moistureInd', 'temperatureInd',
       'team', 'provider'],
      dtype='object')


In [130]:
print(df_maintenance.head())

   lifetime  broken  pressureInd  moistureInd  temperatureInd   team  \
0        56       0    92.178854   104.230204       96.517159  TeamA   
1        81       1    72.075938   103.065701       87.271062  TeamC   
2        60       0    96.272254    77.801376      112.196170  TeamA   
3        86       1    94.406461   108.493608       72.025374  TeamC   
4        34       0    97.752899    99.413492      103.756271  TeamB   

    provider  
0  Provider4  
1  Provider4  
2  Provider1  
3  Provider2  
4  Provider1  


In [131]:
print(df_maintenance.shape)

(1000, 7)


## Importing service now dataset

In [132]:
# Load Excel files
df_service = pd.read_csv("0_maintenance_raw_data/service_now_only.csv")

In [133]:
print(df_service.columns)

Index(['Asset Number', 'Incident_Number', 'Short_Description', 'Status',
       'Priority', 'Category', 'Subcategory', 'Assigned_To', 'Assigned_Group',
       'Created_Date'],
      dtype='object')


In [134]:
print(df_service.head())

   Asset Number Incident_Number           Short_Description   Status  \
0        100080      INC0010047    Access badge not working   Closed   
1        100081      INC0010085   Data center cooling alert   Closed   
2        300006      INC0010048   Printer toner replacement     Open   
3        300007      INC0010032  VoIP phone not registering  Stalled   
4        300008      INC0010058      Network switch failure  Stalled   

       Priority    Category      Subcategory   Assigned_To   Assigned_Group  \
0  3   Moderate    Security  Physical Access  Chris Parker    Security Team   
1  1   Critical  Facilities      Data Center  Michael Wong       Facilities   
2       4   Low    Hardware          Printer    Unassigned  Desktop Support   
3  3   Moderate    Hardware            Phone  Sarah Miller  Telecom Support   
4  1   Critical     Network           Switch  Michael Wong     Network Team   

          Created_Date  
0  2024 04 09T15:58:22  
1  2024 04 17T08:05:22  
2  2024 04 10T08:

In [135]:
print(df_service.shape)

(111, 10)


## Insert Asset Number in `df_maintenance` from `df_service`

In [136]:
unique_assets = df_service["Asset Number"].unique()
repeated_assets = np.resize(unique_assets, df_maintenance.shape[0])

df_maintenance_with_asset_num = df_maintenance.copy()
df_maintenance_with_asset_num.insert(0, "Asset_Number", repeated_assets)


In [137]:
print(df_maintenance_with_asset_num.head)

<bound method NDFrame.head of      Asset_Number  lifetime  broken  pressureInd  moistureInd  temperatureInd  \
0          100080        56       0    92.178854   104.230204       96.517159   
1          100081        81       1    72.075938   103.065701       87.271062   
2          300006        60       0    96.272254    77.801376      112.196170   
3          300007        86       1    94.406461   108.493608       72.025374   
4          300008        34       0    97.752899    99.413492      103.756271   
..            ...       ...     ...          ...          ...             ...   
995        600124        88       1    88.589759   112.167556       99.861456   
996        600125        88       1   116.727075   110.871332       95.075631   
997        600126        22       0   104.026778    88.212873       83.221220   
998        600127        78       0   104.911649   104.257296       83.421491   
999        100080        63       0   116.901354    99.998694       47.641493  

## Export Updated Excel

In [138]:
df_with_index = df_maintenance_with_asset_num.copy()
df_with_index.insert(0, 'Observation_Index', range(len(df_maintenance_with_asset_num)))

In [139]:
# Create the folder if it doesn't exist
os.makedirs("1_maintenance_survival_data", exist_ok=True)

# Save the Excel file inside the folder
df_with_index.to_csv("1_maintenance_survival_data/1_maintenance_survival_data.csv", index=False)