In [1]:
import pandas as pd
import pprint

# Clustering tool data fetching and processing

Work order **2100058590** is usable as an example

## Useful functions

In [2]:
def read_csv_file(file_name, columns_dict):
    df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())
    return df.rename(columns=columns_dict)

def write_mid_level_csv_file(df, file_name):
    df.to_csv('./data/mid_' + file_name + '.csv' if '.csv' not in file_name else '')

## 1. Work Center tables


Tables:
- CHRD
- INOB
- AUSP

### Read needed tables

#### Read CRHD data

Work center header data

In [3]:
# Needed columns
crhd_columns = {
    'OBJID': 'CRHD_WBS_ID',
    'ARBPL': 'CRHD_WBS_Name',
    'WERKS': 'CRHD_WBS_Plant'
}

# Read CRHD table
crhd_df = read_csv_file("CRHD_V1", crhd_columns)

# Build the WBS name column
crhd_df["CRHD_WBS_Full_Name"] = crhd_df["CRHD_WBS_Plant"] + crhd_df["CRHD_WBS_Name"]

print(crhd_df)


     CRHD_WBS_ID CRHD_WBS_Name CRHD_WBS_Plant CRHD_WBS_Full_Name
0       10001014      DRILLING           DK10       DK10DRILLING
1       10001016      WELLMAIN           DK10       DK10WELLMAIN
2       10001017      WELLSUPV           DK10       DK10WELLSUPV
3       10001018      WELLTECH           DK10       DK10WELLTECH
4       10001019      MAINONSH           DK10       DK10MAINONSH
..           ...           ...            ...                ...
405     10001445      MTN-ROPE           DK90       DK90MTN-ROPE
406     10001424      VEN-INST           DK90       DK90VEN-INST
407     10001431      INP-SITE           DK90       DK90INP-SITE
408     10001978      VEN-TURB           DK90       DK90VEN-TURB
409     10001974      MTN-PIPF           DK90       DK90MTN-PIPF

[410 rows x 4 columns]


#### Read INOB data

Work center type 

In [4]:
# Needed columns
inob_columns = {
    'CUOBJ': 'INOB_Object_Number',
    'KLART': 'INOB_Class_Type',
    'OBJEK': 'INOB_Object_Key'
}

# Read INOB table
inob_df = read_csv_file('INOB', inob_columns)

# Filter on KLART == 19
inob_df = inob_df[inob_df['INOB_Class_Type'] == 19]
# Filter on INOB_Object_Key starting with DK
inob_df = inob_df[inob_df['INOB_Object_Key'].str.startswith("DK")]

print(inob_df)

         INOB_Object_Number  INOB_Class_Type INOB_Object_Key
1148811             2588124               19    DK90MTN-PIPF
1148812             2588126               19    DK30VEN-TURB
1150273             2588125               19    DK20VEN-TURB
1150274             2588127               19    DK70VEN-TURB
1150275             2588128               19    DK90VEN-TURB
...                     ...              ...             ...
2116723             1663533               19    DK30MTN-TURB
2116724             1663751               19    DK80MTN-PAIN
2119394             1664209               19     DK70CON-NPT
2119395             1664210               19     DK80CON-NPT
2119396             1664211               19     DK90CON-NPT

[410 rows x 3 columns]


  df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())


#### Read AUSP data

This table contains some objects accessible using a object key, we are here looking for the department order

__

After a deep look, there isn't any work order in denmark getting data from this table

In [5]:
# Needed columns
ausp_columns = {
    'OBJEK': 'AUSP_Object_Number',
    'KLART': 'AUSP_Class_Type',
    'ATWRT': 'AUSP_Description'
}

# Read AUSP table
ausp_df = read_csv_file('AUSP', ausp_columns)

# Filter on KLART == 19
ausp_df = ausp_df[ausp_df['AUSP_Class_Type'] == 19]

ausp_df["AUSP_Object_Number"] = ausp_df["AUSP_Object_Number"].astype(int)

print(ausp_df)

  df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())


         AUSP_Object_Number AUSP_Class_Type AUSP_Description
1891919              235125              19             ELEC
1891920              235132              19            OTHER
1892005              235137              19            OTHER
1892131              235156              19             INSP
1892132              235182              19            OTHER
...                     ...             ...              ...
5516681               18305              19             FABM
5516682               18306              19            PAINT
5516683               18307              19             SCAF
5516684               18308              19             TELE
5516685               18308              19            OTHER

[1365 rows x 3 columns]


In [6]:
print(ausp_df["AUSP_Description"].unique())

['ELEC' 'OTHER' 'INSP' 'MAINT' 'PROD' 'MECH' 'CONS' 'FABM' 'DRILL' 'INST'
 'TELECOM' 'PERF' 'INTG' 'TELE' 'MNGT' 'PAINT' 'SCAF']


### Merge tables

#### Merge INOB and AUSP

In [7]:
# Merge INOB and AUSP - nothing to merge atm, so nothing to do to not pollute the model
df_work_center_merged = inob_df.merge(ausp_df, how = 'left', left_on = ['INOB_Object_Number'], right_on = ['AUSP_Object_Number'])

#df_work_center_merged = inob_df.copy()
#df_work_center_merged = df_work_center_merged[df_work_center_merged['INOB_Object_Key'].str.startswith('DK')]
print(df_work_center_merged)

print(df_work_center_merged['AUSP_Object_Number'].unique())

     INOB_Object_Number  INOB_Class_Type INOB_Object_Key  AUSP_Object_Number  \
0               2588124               19    DK90MTN-PIPF                 NaN   
1               2588126               19    DK30VEN-TURB                 NaN   
2               2588125               19    DK20VEN-TURB                 NaN   
3               2588127               19    DK70VEN-TURB                 NaN   
4               2588128               19    DK90VEN-TURB                 NaN   
..                  ...              ...             ...                 ...   
405             1663533               19    DK30MTN-TURB                 NaN   
406             1663751               19    DK80MTN-PAIN                 NaN   
407             1664209               19     DK70CON-NPT                 NaN   
408             1664210               19     DK80CON-NPT                 NaN   
409             1664211               19     DK90CON-NPT                 NaN   

    AUSP_Class_Type AUSP_Description  


#### Merge CRHD and df_work_center_merged

In [8]:
# Merge CRHD and df_work_center_merged
df_work_center_merged = df_work_center_merged.merge(crhd_df, how = 'left', left_on = ['INOB_Object_Key'], right_on = ['CRHD_WBS_Full_Name'])

print(df_work_center_merged)

     INOB_Object_Number  INOB_Class_Type INOB_Object_Key  AUSP_Object_Number  \
0               2588124               19    DK90MTN-PIPF                 NaN   
1               2588126               19    DK30VEN-TURB                 NaN   
2               2588125               19    DK20VEN-TURB                 NaN   
3               2588127               19    DK70VEN-TURB                 NaN   
4               2588128               19    DK90VEN-TURB                 NaN   
..                  ...              ...             ...                 ...   
405             1663533               19    DK30MTN-TURB                 NaN   
406             1663751               19    DK80MTN-PAIN                 NaN   
407             1664209               19     DK70CON-NPT                 NaN   
408             1664210               19     DK80CON-NPT                 NaN   
409             1664211               19     DK90CON-NPT                 NaN   

    AUSP_Class_Type AUSP_Description  C

### Filter and rename columns to get the final Work Center merge

Columns kept:
* CRHD_WBS_ID -> WBS_ID
* CRHD_WBS_Name -> WBS_Name
* CRHD_WBS_Plant -> WBS_Plant
* CRHD_WBS_Full_Name -> WBS_Full_name
* AUSP_Description -> WBS_Description

In [9]:
df_work_center_merged = df_work_center_merged[["CRHD_WBS_ID", "CRHD_WBS_Name", "CRHD_WBS_Plant", "CRHD_WBS_Full_Name"]]
df_work_center_merged.rename(inplace = True, columns = {
    "CRHD_WBS_ID": "WBS_ID",
    "CRHD_WBS_Name": "WBS_Name",
    "CRHD_WBS_Plant": "WBS_Plant",
    "CRHD_WBS_Full_Name": "WBS_Full_name",
    "AUSP_Description": "WBS_Description"
})

### Save results in csv file

In [10]:
write_mid_level_csv_file(df_work_center_merged, "work_center")

## 2. Status tables

Tables:
- TJ30T
- JSTO
- JEST
- TJ02T

### Read needed tables

#### Read TJ30T data

This table contains all available status in E - not linked to system

To fetch a code, the code of the status must be used **ESTAT** and the profile of the user i.e. the work order **STSMA**

__

This table contains only status in E, not I, displayed in the "Status with Status Number" and "Status Without Status No." tables.

Data for the status contained in the "Syst. Status" table are stored in E1P_010_TJ02T

__

**SPRAS** column is the laguage used, E for english, D for danish... only keep english





In [11]:
# Needed columns
tj30t_columns = {
    'ESTAT': 'TJ30T_User_Status_ID', 
    'STSMA': 'TJ30T_User_Status_Profile',
    'SPRAS': 'TJ30T_User_Language',
    'TXT04': 'TJ30T_Textual_Status',
    'TXT30': 'TJ30T_Long_Textual_Status'
}

# Read TJ30T table
tj30t_df = read_csv_file('TJ30T', tj30t_columns)

# Only keep english
tj30t_df = tj30t_df[tj30t_df["TJ30T_User_Language"] == "E"]

print(tj30t_df)

write_mid_level_csv_file(tj30t_df, "tj30t_filtered")

     TJ30T_User_Status_Profile TJ30T_User_Status_ID TJ30T_User_Language  \
1                     00000002                E0002                   E   
8                     00000002                E0003                   E   
15                    00000003                E0001                   E   
22                    00000003                E0002                   E   
29                    00000003                E0003                   E   
...                        ...                  ...                 ...   
1970                      TASK                E0002                   E   
1977                      TASK                E0003                   E   
1984                      TASK                E0004                   E   
1991                      TEST                E0001                   E   
1998                      WARN                E0001                   E   

     TJ30T_Textual_Status TJ30T_Long_Textual_Status  
1                     LKD                    

#### Read JSTO data

This table mainly contains the user profile **STSMA** to use for a specific object id i.e. work order defined by **OBJNR**

In [12]:
# Needed columns
jsto_columns = {
    'OBJNR': 'JSTO_Object_Number',
    'STSMA': 'JSTO_Status_Profile'
}

# Read TJ30T table
jsto_df = read_csv_file('JSTO', jsto_columns)

print(jsto_df)

  df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())


           JSTO_Object_Number JSTO_Status_Profile
0              OR002400035375            PMMTNWOH
1        OV100009028900000001            PMMTNWOO
2        OV100009028900000002            PMMTNWOO
3        OV100009028900000003            PMMTNWOO
4              QM001400035843             PMNOTNP
...                       ...                 ...
6214767  OV100068832100000011            PMMTNWOO
6214768  OV100068832100000012            PMMTNWOO
6214769  OV100068832100000013            PMMTNWOO
6214770        QM001400375837             PMNOTNP
6214771        QM001100159714            PMNOTALL

[6214772 rows x 2 columns]


#### Read JEST data (E1P_JEST_OR_V1 file)

This table mainly contains all current status **STAT** for a specific object id i.e. work order defined by **OBJNR**

Only status starting with an **E** are kept. **I** status are not processed.

In [13]:
# Needed columns
jest_columns = {
    'OBJNR': 'JEST_Object_Number',
    'STAT': 'JEST_Object_Status'
}

# Read TJ30T table
jest_df = read_csv_file('JEST_OR_V1', jest_columns)

# Keep only stats starting by E and not I or something else
#jest_df = jest_df[jest_df["JEST_Object_Status"].str.startswith("E")]

print(jest_df)

        JEST_Object_Number JEST_Object_Status
0           OR002900001515              E0003
1           OR002900001515              E0017
2           OR002900001515              I0002
3           OR002900001515              I0016
4           OR002900001515              I0028
...                    ...                ...
1147877     OR002900000516              I0028
1147878     OR002900000516              I0046
1147879     OR002900000516              I0215
1147880     OR002900000516              I0321
1147881     OR002900000516              I0420

[1147882 rows x 2 columns]


#### Read TJ02T data

This table contains all status starting by a **I** in the column **ISTAT**.

Keep only english version.

In [14]:
# Needed columns
tj02t_columns = {
    'ISTAT': 'TJ02T_User_Status_ID',
    'SPRAS': 'TJ02T_User_Language',
    'TXT04': 'TJ02T_Textual_Status',
    'TXT30': 'TJ02T_Long_Textual_Status'
}

# Read the TJ02T file
tj02t_df = read_csv_file("TJ02T", tj02t_columns)

# Only keep th english texts
tj02t_df = tj02t_df[tj02t_df["TJ02T_User_Language"] == "E"]
tj02t_df = tj02t_df.drop(columns = ["TJ02T_User_Language"])

print(tj02t_df)

write_mid_level_csv_file(tj02t_df, "tj02t_filtered")

     TJ02T_User_Status_ID TJ02T_Textual_Status     TJ02T_Long_Textual_Status
4                   I0158                 OSTS  Outstanding task(s) exist(s)
11                  I0159                 ATCO           All Tasks Completed
18                  I0160                 NOTI       Notification (internal)
25                  I0161                 NOTE       Notification (external)
32                  I0162                 DEFR             Defects Recording
...                   ...                  ...                           ...
7844                I0038                 CONI      Configuration incomplete
7851                I0042                 PREL            Partially released
7860                I0043                  LKD                        Locked
7868                I0045                 TECO         Technically completed
7876                I0046                 CLSD                        Closed

[1112 rows x 3 columns]


### Merge tables

#### Merge JEST and JSTO tables

In [15]:
df_wo_status_merged = jest_df.merge(jsto_df, how = 'left', left_on = ['JEST_Object_Number'], right_on = ['JSTO_Object_Number'])

In [16]:
df_wo_status_merged = df_wo_status_merged.drop(columns=["JSTO_Object_Number"])

print(df_wo_status_merged)

        JEST_Object_Number JEST_Object_Status JSTO_Status_Profile
0           OR002900001515              E0003            PMMTNWOH
1           OR002900001515              E0017            PMMTNWOH
2           OR002900001515              I0002            PMMTNWOH
3           OR002900001515              I0016            PMMTNWOH
4           OR002900001515              I0028            PMMTNWOH
...                    ...                ...                 ...
1147877     OR002900000516              I0028            PMMTNWOH
1147878     OR002900000516              I0046            PMMTNWOH
1147879     OR002900000516              I0215            PMMTNWOH
1147880     OR002900000516              I0321            PMMTNWOH
1147881     OR002900000516              I0420            PMMTNWOH

[1147882 rows x 3 columns]


#### Merge df_wo_status_merged and TJ30T table

Get **status** corresponding to **status code** and **user profile**.

In [17]:
df_wo_status_merged = df_wo_status_merged.merge(tj30t_df, how = "left", left_on = ['JSTO_Status_Profile', 'JEST_Object_Status'], right_on = ['TJ30T_User_Status_Profile', 'TJ30T_User_Status_ID'])

In [18]:
print(df_wo_status_merged[df_wo_status_merged["JEST_Object_Number"] == "OR002100058590"])

       JEST_Object_Number JEST_Object_Status JSTO_Status_Profile  \
494674     OR002100058590              E0007            PMMTNWOH   
494675     OR002100058590              E0030            PMMTNWOH   
494676     OR002100058590              I0007            PMMTNWOH   
494677     OR002100058590              I0009            PMMTNWOH   
494678     OR002100058590              I0016            PMMTNWOH   
494679     OR002100058590              I0028            PMMTNWOH   
494680     OR002100058590              I0046            PMMTNWOH   
494681     OR002100058590              I0215            PMMTNWOH   
494682     OR002100058590              I0321            PMMTNWOH   
494683     OR002100058590              I0420            PMMTNWOH   

       TJ30T_User_Status_Profile TJ30T_User_Status_ID TJ30T_User_Language  \
494674                  PMMTNWOH                E0007                   E   
494675                  PMMTNWOH                E0030                   E   
494676              

#### Merge df_wo_status_merged and TJ02T Table

Get **I Status** corresponding to **I status** code.

In [19]:
df_wo_status_merged = df_wo_status_merged.merge(tj02t_df, how = "left", left_on = ['JEST_Object_Status'], right_on = ['TJ02T_User_Status_ID'])

In [20]:
print(df_wo_status_merged[df_wo_status_merged["JEST_Object_Number"] == "OR002100058590"])

       JEST_Object_Number JEST_Object_Status JSTO_Status_Profile  \
494674     OR002100058590              E0007            PMMTNWOH   
494675     OR002100058590              E0030            PMMTNWOH   
494676     OR002100058590              I0007            PMMTNWOH   
494677     OR002100058590              I0009            PMMTNWOH   
494678     OR002100058590              I0016            PMMTNWOH   
494679     OR002100058590              I0028            PMMTNWOH   
494680     OR002100058590              I0046            PMMTNWOH   
494681     OR002100058590              I0215            PMMTNWOH   
494682     OR002100058590              I0321            PMMTNWOH   
494683     OR002100058590              I0420            PMMTNWOH   

       TJ30T_User_Status_Profile TJ30T_User_Status_ID TJ30T_User_Language  \
494674                  PMMTNWOH                E0007                   E   
494675                  PMMTNWOH                E0030                   E   
494676              

### Filter and Rename columns to get the final Work Order Status merge

Columns kept:
* JEST_Object_Number -> WO_Object_Number
* JEST_Object_Status -> WO_Status_ID
* JSTO_Status_Profile -> WO_Status_Profile
* TJ30T_Textual_Status -> WO_Status_Code
* TJ30T_Long_Textual_Status -> WO_Status_Message
* TJ02T_Textual_Status -> WO_I_Status_Code
* TJ02T_Long_Textual_Status -> WO_I_Status_Message

In [21]:
df_wo_status_merged = df_wo_status_merged[["JEST_Object_Number", "JEST_Object_Status", "JSTO_Status_Profile", "TJ30T_Textual_Status", "TJ30T_Long_Textual_Status", "TJ02T_Textual_Status", "TJ02T_Long_Textual_Status"]]
df_wo_status_merged.rename(inplace = True, columns = {
    "JEST_Object_Number": "WO_Object_Number", 
    "JEST_Object_Status": "WO_Status_ID", 
    "JSTO_Status_Profile": "WO_Status_Profile", 
    "TJ30T_Textual_Status": "WO_E_Status_Code", 
    "TJ30T_Long_Textual_Status": "WO_E_Status_Message",
    "TJ02T_Textual_Status": "WO_I_Status_Code", 
    "TJ02T_Long_Textual_Status": "WO_I_Status_Message"
    }
)
df_wo_status_merged = df_wo_status_merged.replace(pd.NA, "")

### Save the file

In [22]:
write_mid_level_csv_file(df_wo_status_merged, "work_orders_status")

## 3. Functional location

Tables:
- ILOA_V1
- IFLOS

### Read and format data

#### Read ILOA data

In [24]:
# Needed columns
iloa_columns = {
    'ILOAN': 'ILOAN_Technical_Location_ID',
    'TPLNR': 'ILOAN_Functional_Location_ID'
}

# Read ILOA table
iloa_df = read_csv_file("ILOA_V1", iloa_columns)

print(iloa_df)

        ILOAN_Technical_Location_ID ILOAN_Functional_Location_ID
0                           2512161         ?0100000000000632852
1                           2512180         ?0100000000000634098
2                           1180579         ?0100000000000580069
3                           1180580         ?0100000000000580070
4                           1180581         ?0100000000000580071
...                             ...                          ...
481348                      2613005         ?0100000000000578966
481349                      2613008         ?0100000000000578941
481350                      2613007         ?0100000000000578941
481351                      2613114         ?0100000000000833446
481352                      2613113         ?0100000000000833446

[481353 rows x 2 columns]


#### Read IFLOS data

In [25]:
# Needed columns
iflos_columns = {
    'TPLNR': 'IFLOS_Functional_Location_ID',
    'STRNO': 'IFLOS_Functional_Location_Name',
}

# Read IFLOS table
iflos_df = read_csv_file("IFLOS", iflos_columns)

print(iflos_df)

       IFLOS_Functional_Location_ID     IFLOS_Functional_Location_Name
0              ?0100000000000594008  HB /A /10/HBA-6-PTS-030-4103-FE-U
1              ?0100000000000594009  HB /A /10/HBA-6-PTS-030-4293-FC-T
2              ?0100000000000594011           HB /A /10/HBAA-XCV-30109
3              ?0100000000000594014           HB /A /10/HBAA-XCV-30158
4              ?0100000000000594016  HB /A /10/HBA-6-PTS-033-4007-FC-U
...                             ...                                ...
241260         ?0100000000000974190             VB /A /84/VBA-FL-18183
241261         ?0100000000000974122           HD /A /34/HDAY-PL-674302
241262         ?0100000000000974164     DF /E /84/ELECTRICAL-SAFETY-EQ
241263         ?0100000000000974165     DF /F /84/ELECTRICAL-SAFETY-EQ
241264         ?0100000000000974191             VB /A /84/VBA-FL-18182

[241265 rows x 2 columns]


### Merge the data - ILOA and IFLOS

In [26]:
df_functional_location_merged = iloa_df.merge(iflos_df, how = "left", left_on = ["ILOAN_Functional_Location_ID"], right_on = ["IFLOS_Functional_Location_ID"])

print(df_functional_location_merged)

        ILOAN_Technical_Location_ID ILOAN_Functional_Location_ID  \
0                           2512161         ?0100000000000632852   
1                           2512180         ?0100000000000634098   
2                           1180579         ?0100000000000580069   
3                           1180580         ?0100000000000580070   
4                           1180581         ?0100000000000580071   
...                             ...                          ...   
496309                      2613005         ?0100000000000578966   
496310                      2613008         ?0100000000000578941   
496311                      2613007         ?0100000000000578941   
496312                      2613114         ?0100000000000833446   
496313                      2613113         ?0100000000000833446   

       IFLOS_Functional_Location_ID      IFLOS_Functional_Location_Name  
0              ?0100000000000632852               HW /A /07/HWAA-P-5902  
1              ?0100000000000634098

### Filter and Rename columns to get the final Work Order Status merge

Columns kept:
* ILOAN_Technical_Location_ID -> FLOC_Technical_ID
* ILOAN_Functional_Location_ID -> FLOC_Functional_ID
* IFLOS_Functional_Location_Name -> FLOC_Name

In [27]:
# drop useless columns
df_functional_location_merged = df_functional_location_merged[["ILOAN_Technical_Location_ID", "ILOAN_Functional_Location_ID", "IFLOS_Functional_Location_Name"]]

# rename columns
df_functional_location_merged.rename(inplace = True, columns = {
    "ILOAN_Technical_Location_ID": "FLOC_Technical_ID", 
    "ILOAN_Functional_Location_ID": "FLOC_Functional_ID", 
    "IFLOS_Functional_Location_Name": "FLOC_Name"})


# drop duplicates (keep last registered entry for FLOC_Functional_ID)
df_functional_location_merged.drop_duplicates(subset = ['FLOC_Technical_ID', 'FLOC_Functional_ID'], keep = 'last', inplace = True)

# reset index
df_functional_location_merged.reset_index(inplace = True, drop = True)

# drop created index column

print(df_functional_location_merged)

        FLOC_Technical_ID    FLOC_Functional_ID  \
0                 2512161  ?0100000000000632852   
1                 2512180  ?0100000000000634098   
2                 1180579  ?0100000000000580069   
3                 1180580  ?0100000000000580070   
4                 1180581  ?0100000000000580071   
...                   ...                   ...   
481348            2613005  ?0100000000000578966   
481349            2613008  ?0100000000000578941   
481350            2613007  ?0100000000000578941   
481351            2613114  ?0100000000000833446   
481352            2613113  ?0100000000000833446   

                                 FLOC_Name  
0                    HW /A /07/HWAA-P-5902  
1            HW /A /10/HEA06-MANUAL-VALVES  
2                 HD /A /23/HDAD-BDV-33035  
3                HD /A /23/HDAD-ESDV-33032  
4       HD /A /28/HDA-0.5-DP-041-0002-AS-U  
...                                    ...  
481348         HD /A /37/LIFT-GAS-MANIFOLD  
481349              HD /A /

### Save the data

In [48]:
write_mid_level_csv_file(df_functional_location_merged, "functional_locations")

## 4. Work Order tables

Tables:
* AFKO
* AUFK
* AFIH
* YTLXXPM_BICNOLTX_V1 (notifications)
* QMEL (notifications)

### Read and format data

#### Read AFIH data

In [28]:
# Needed columns
afih_columns = {
    'AUFNR': 'AFIH_WO_Number', 
    'GEWRK': 'AFIH_WO_WBS_ID', 
    'PRIOK': 'AFIH_WO_Priority', 
    'REVNR': 'AFIH_WO_Revision', 
    'INGPR': 'AFIH_WO_Planner_Group', 
    'ILART': 'AFIH_WO_Activity_Type', 
    'QMNUM': 'AFIH_WO_Notification_Number',
    'ILOAN': 'AFIH_WO_Functional_Location_Number', # Main functional location
    'OBKNR': 'AFIH_WO_Plan_Maintenance_Number' # Secondary functional location
}

# Read AFIH table
afih_df = read_csv_file("AFIH_V1", afih_columns)
#print(afih_df)

# Filter on nan worker order id - I don't know if we keep orders without any WBS, the next line does it if needed
#afih_df = afih_df[afih_df['AFIH_WO_WBS_ID'].notna()]
#afih_df = afih_df[afih_df['AFIH_WO_Number'].notna()]

print(afih_df)

        AFIH_WO_Number AFIH_WO_Priority  AFIH_WO_Functional_Location_Number  \
0           2300009145                3                             2558055   
1           2400352096                A                             2610303   
2           2100114335                3                             2571638   
3           2100114433                3                             2604131   
4           2800031347                2                             2529524   
...                ...              ...                                 ...   
116889      2100087418                4                             2273323   
116890      2100086200                2                             2260911   
116891      2100086201                2                             2260913   
116892      2800024552                3                             2260977   
116893      2900000760                3                             2261013   

       AFIH_WO_Planner_Group  AFIH_WO_WBS_ID  AFIH_

In [None]:
print(afih_df[afih_df["AFIH_WO_Notification_Number"] == 1100025915])

#### Read AFKO data

In [29]:
# Needed columns
afko_columns = {
    'AUFNR': 'AFKO_WO_Number',
    'AUFPL': 'AFKO_WO_Operation_ID',
    'GSTRS': 'AFKO_WO_Scheduled_Start_Date'
}

# Read AFKO table
afko_df = read_csv_file("AFKO_V1", afko_columns)
print(afko_df)


       AFKO_WO_Number  AFKO_WO_Scheduled_Start_Date  AFKO_WO_Operation_ID
0          2300008768                      20240501            1000658262
1          2300008769                      20240501            1000658263
2          2300008770                      20240501            1000658264
3          2300008771                      20240501            1000658265
4          2300008772                      20240501            1000658266
...               ...                           ...                   ...
568086     2400266964                      20230821            1000519723
568087     2400266966                      20230807            1000519725
568088     2400266967                      20230630            1000519726
568089     2400266971                      20230816            1000519730
568090     2400266972                      20230822            1000519731

[568091 rows x 3 columns]


  df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())


#### Read AUFK data

In [30]:
# Needed columns
aufk_columns = {
    'AUFNR': 'AUFK_WO_Number', 
    'KTEXT': 'AUFK_WO_Header_Description', 
    'AUART': 'AUFK_WO_Order_Type', 
    'PHAS0': 'AUFK_WO_Phase_Order_Created', 
    'PHAS1': 'AUFK_WO_Phase_Order_Released', 
    'ZZ_OLAFD': 'AUFK_WO_Original_Deadline', 
    'OBJNR': 'AUFK_WO_Object_Number'
}

# Read AUFK table
aufk_df = read_csv_file("AUFK_V1", aufk_columns)
print(aufk_df)

       AUFK_WO_Number AUFK_WO_Order_Type  \
0        002900001591                WSO   
1        002900001587                WSO   
2        002900001515                WSO   
3        002900001542                WSO   
4        002900001585                WSO   
...               ...                ...   
117452     2300005429                WRO   
117453     2300005420                WRO   
117454     2300005425                WRO   
117455     2300005428                WRO   
117456     2900000516                WSO   

                     AUFK_WO_Header_Description AUFK_WO_Phase_Order_Created  \
0                       Rolf Scheduled WHM 2024                         NaN   
1                 DFB01,Plug retrieval,WL,19862                         NaN   
2                        Scheduled WHM SAN 2024                         NaN   
3                         Scheduled WHM 2024 DD                           X   
4                        Scheduled WHM KRA 2024                         NaN 

  df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())


#### Read YTLXXPM_BICNOLTX_V1 table

Keep only notifications related to a maintenance work order

In [31]:
# Needed columns
notif_columns = {
    'QMNUM': 'NOTIF_Number',
    'YROWNUM': 'NOTIF_Row_Number',
    'LONGTEXT': 'NOTIF_Row_Text'
}

# Read notifications table
notif_df = read_csv_file("YTLXXPM_BICNOLTX_V1", notif_columns)


# Keep only notifications linked to a work order in AFIH
notif_df = notif_df[notif_df["NOTIF_Number"].isin(afih_df["AFIH_WO_Notification_Number"].unique())]

# Drop duplicated rows
notif_df.drop_duplicates(subset = ["NOTIF_Number", "NOTIF_Row_Number"], inplace = True)

# Sort the df by notif number and row number
notif_df = notif_df.sort_values(["NOTIF_Number", "NOTIF_Row_Number"])

print(notif_df)


          NOTIF_Number  NOTIF_Row_Number  \
1123541     1100025915                 1   
1123542     1100025915                 2   
1123543     1100025915                 3   
1123544     1100025915                 4   
1123545     1100025915                 5   
...                ...               ...   
11959814    1700005861                 1   
11959815    1700005861                 2   
11959816    1700005861                 3   
11959817    1700005863                 1   
11959818    1700005863                 2   

                                             NOTIF_Row_Text  
1123541     .05.2013 05:55:41 Mikael Borre Thomsen (MBT009)  
1123542   oolstykke samt checkventil nedstrøms GC-HCV-09...  
1123543   O ifbm kommende frysejob. Spoolstykke (mat: 20...  
1123544   bpakkelager og ny checkventil bestilles på den...  
1123545                                             bpakke.  
...                                                     ...  
11959814  30.07.2024 12:55:07 CET Ren

In [32]:
# Group by with string concatenation - each row of the notification text is contained in a row of the file, all linked by the notification number and the row number
notif_df["NOTIF_Row_Text"] = notif_df["NOTIF_Row_Text"].astype(str)
notif_processed_df = notif_df[["NOTIF_Number", "NOTIF_Row_Text"]].groupby(["NOTIF_Number"])["NOTIF_Row_Text"].agg('\n'.join)

print(notif_processed_df)

NOTIF_Number
1100025915    .05.2013 05:55:41 Mikael Borre Thomsen (MBT009...
1100025916    Work to be done\n-06-2009 hras\nr kom pludseli...
1100025917    .02.2018 07:43:39 UTC Morten Jensen (MJE099) P...
1100025918    .12.2011 18:41:26 Hans-Jørgen Andreasen (HJA03...
1100025919    .03.2012 10:35:35 Frank Lemmiche (FLE008)\n co...
                                    ...                        
1700005858    24.07.2024 14:42:29 CET Peter THIELLESEN (J051...
1700005859    30.07.2024 12:49:42 CET Rene GRINDERSLEV (L051...
1700005860    30.07.2024 12:52:57 CET Rene GRINDERSLEV (L051...
1700005861    30.07.2024 12:55:07 CET Rene GRINDERSLEV (L051...
1700005863    31.07.2024 14:36:42 CET Peter THIELLESEN (J051...
Name: NOTIF_Row_Text, Length: 51167, dtype: object


In [None]:
print(notif_processed_df[1100025915])

In [33]:
# save mid file
write_mid_level_csv_file(notif_processed_df, "notif_processed")

#### Read QMEL data

Keep only notifications related to a maintenance work order

In [34]:
# Needed columns
qmel_columns = {
    'QMNUM': 'QMEL_Notification_Number',
    'QMDAT': 'QMEL_Malfunction_Started',
    'ERDAT': 'QMEL_Notification_Created'
}

# Read notifications table
qmel_df = read_csv_file("QMEL_V1", qmel_columns)


# Keep only notifications linked to a work order in AFIH
qmel_df = qmel_df[qmel_df["QMEL_Notification_Number"].isin(afih_df["AFIH_WO_Notification_Number"].unique())]

print(qmel_df)

        QMEL_Notification_Number  QMEL_Notification_Created  \
0                     1400352484                   20240426   
1                     1100147446                   20240426   
2                     1100147453                   20240426   
3                     1100147461                   20240426   
4                     1100147462                   20240426   
...                          ...                        ...   
103221                1400284084                   20230501   
103222                1400284090                   20230501   
103223                1100118367                   20230501   
103224                1100117478                   20230419   
103226                1100117500                   20230420   

        QMEL_Malfunction_Started  
0                       20240426  
1                       20240426  
2                       20240426  
3                       20240426  
4                       20240426  
...                          ... 

### Merge the data

#### Merge AFIH and AFKO

In [35]:
# Merge AFKO and AUFK
df_work_order_merged = afih_df.merge(afko_df, how = 'left', left_on = ['AFIH_WO_Number'], right_on = ['AFKO_WO_Number'])

print(df_work_order_merged)

       AFIH_WO_Number AFIH_WO_Priority  AFIH_WO_Functional_Location_Number  \
0          2300009145                3                             2558055   
1          2400352096                A                             2610303   
2          2100114335                3                             2571638   
3          2100114433                3                             2604131   
4          2800031347                2                             2529524   
...               ...              ...                                 ...   
116889     2100087418                4                             2273323   
116890     2100086200                2                             2260911   
116891     2100086201                2                             2260913   
116892     2800024552                3                             2260977   
116893     2900000760                3                             2261013   

       AFIH_WO_Planner_Group  AFIH_WO_WBS_ID  AFIH_WO_Plan_Main

#### Merge AUFK and df_work_order_merged

In [36]:
# Merge AFKO and AUFK
df_work_order_merged = df_work_order_merged.merge(aufk_df, how = 'left', left_on = ['AFIH_WO_Number'], right_on = ['AUFK_WO_Number'])

print(df_work_order_merged)

       AFIH_WO_Number AFIH_WO_Priority  AFIH_WO_Functional_Location_Number  \
0          2300009145                3                             2558055   
1          2400352096                A                             2610303   
2          2100114335                3                             2571638   
3          2100114433                3                             2604131   
4          2800031347                2                             2529524   
...               ...              ...                                 ...   
116889     2100087418                4                             2273323   
116890     2100086200                2                             2260911   
116891     2100086201                2                             2260913   
116892     2800024552                3                             2260977   
116893     2900000760                3                             2261013   

       AFIH_WO_Planner_Group  AFIH_WO_WBS_ID  AFIH_WO_Plan_Main

In [None]:
print(df_work_order_merged.columns)

#### Merge df_work_order_merged and notifications

In [37]:
df_work_order_merged = df_work_order_merged.merge(notif_processed_df, how = 'left', left_on = ['AFIH_WO_Notification_Number'], right_on = ['NOTIF_Number'])

print(df_work_order_merged)

       AFIH_WO_Number AFIH_WO_Priority  AFIH_WO_Functional_Location_Number  \
0          2300009145                3                             2558055   
1          2400352096                A                             2610303   
2          2100114335                3                             2571638   
3          2100114433                3                             2604131   
4          2800031347                2                             2529524   
...               ...              ...                                 ...   
116889     2100087418                4                             2273323   
116890     2100086200                2                             2260911   
116891     2100086201                2                             2260913   
116892     2800024552                3                             2260977   
116893     2900000760                3                             2261013   

       AFIH_WO_Planner_Group  AFIH_WO_WBS_ID  AFIH_WO_Plan_Main

#### Merge df_work_order_merged and QMEL

In [38]:
df_work_order_merged = df_work_order_merged.merge(qmel_df, how = 'left', left_on = ['AFIH_WO_Notification_Number'], right_on = ['QMEL_Notification_Number'])

print(df_work_order_merged)

       AFIH_WO_Number AFIH_WO_Priority  AFIH_WO_Functional_Location_Number  \
0          2300009145                3                             2558055   
1          2400352096                A                             2610303   
2          2100114335                3                             2571638   
3          2100114433                3                             2604131   
4          2800031347                2                             2529524   
...               ...              ...                                 ...   
116889     2100087418                4                             2273323   
116890     2100086200                2                             2260911   
116891     2100086201                2                             2260913   
116892     2800024552                3                             2260977   
116893     2900000760                3                             2261013   

       AFIH_WO_Planner_Group  AFIH_WO_WBS_ID  AFIH_WO_Plan_Main

#### Filter and Rename columns to get the final Work Orders merge

Columns kept:
* AFIH_WO_Number -> WO_Number
* AFIH_WO_Priority -> WO_Priority
* AFIH_WO_Functional_Location_Number -> WO_Functional_Location_Number
* AFIH_WO_Plan_Maintenance_Number -> WO_Plan_Maintenance_Number
* AFIH_WO_Planner_Group -> WO_Planner_Group
* AFIH_WO_WBS_ID -> WO_WBS_ID
* AFIH_WO_Revision -> WO_Revision
* AFIH_WO_Activity_Type -> WO_Activity_Type
* AFKO_WO_Scheduled_Start_Date -> WO_Scheduled_Start_Date
* AFKO_WO_Operation_ID -> WO_Operation_ID
* AUFK_WO_Order_Type -> WO_Order_Type
* AUFK_WO_Header_Description -> WO_Header_Description
* AUFK_WO_Phase_Order_Created -> WO_Phase_Order_Created 
* AUFK_WO_Phase_Order_Released -> WO_Phase_Order_Released
* AUFK_WO_Object_Number -> WO_Status_ID
* AUFK_WO_Original_Deadline -> WO_Original_Deadline
* AFIH_WO_Notification_Number -> WO_Notification_Number
* NOTIF_Row_Text -> WO_Notification
* QMEL_Malfunction_Started -> WO_Notification_Malfunction_Started
* QMEL_Notification_Created -> WO_Notification_Created

In [39]:
# drop useless columns
df_work_order_merged = df_work_order_merged[["AFIH_WO_Number", "AFIH_WO_Priority", "AFIH_WO_Functional_Location_Number", "AFIH_WO_Plan_Maintenance_Number",
                                             "AFIH_WO_Planner_Group", "AFIH_WO_WBS_ID", "AFIH_WO_Revision", "AFIH_WO_Activity_Type", "AFKO_WO_Scheduled_Start_Date", 
                                             "AFKO_WO_Operation_ID", "AUFK_WO_Order_Type", "AUFK_WO_Header_Description", "AUFK_WO_Phase_Order_Created", 
                                             "AUFK_WO_Phase_Order_Released", "AUFK_WO_Object_Number", "AUFK_WO_Original_Deadline", "AFIH_WO_Notification_Number", 
                                             "QMEL_Malfunction_Started", "QMEL_Notification_Created", "NOTIF_Row_Text"
                                             ]]

# rename columns
df_work_order_merged.rename(inplace = True, columns = {
    "AFIH_WO_Number": "WO_Number", 
    "AFIH_WO_Priority": "WO_Priority", 
    "AFIH_WO_Functional_Location_Number": "WO_Functional_Location_Number",
    "AFIH_WO_Plan_Maintenance_Number": "WO_Plan_Maintenance_Number",
    "AFIH_WO_Planner_Group": "WO_Planner_Group",
    "AFIH_WO_WBS_ID": "WO_WBS_ID",
    "AFIH_WO_Revision": "WO_Revision",
    "AFIH_WO_Activity_Type": "WO_Activity_Type",
    "AFKO_WO_Scheduled_Start_Date": "WO_Scheduled_Start_Date",
    "AFKO_WO_Operation_ID": "WO_Operation_ID",
    "AUFK_WO_Order_Type": "WO_Order_Type",
    "AUFK_WO_Header_Description": "WO_Header_Description",
    "AUFK_WO_Phase_Order_Created": "WO_Phase_Order_Created",
    "AUFK_WO_Phase_Order_Released": "WO_Phase_Order_Released",
    "AUFK_WO_Object_Number": "WO_Status_ID",
    "AUFK_WO_Original_Deadline": "WO_Original_Deadline",
    "AFIH_WO_Notification_Number": "WO_Notification_Number",
    "QMEL_Malfunction_Started": "WO_Notification_Malfunction_Started",
    "QMEL_Notification_Created": "WO_Notification_Created",
    "NOTIF_Row_Text": "WO_Notification"
})

print(df_work_order_merged)

         WO_Number WO_Priority  WO_Functional_Location_Number  \
0       2300009145           3                        2558055   
1       2400352096           A                        2610303   
2       2100114335           3                        2571638   
3       2100114433           3                        2604131   
4       2800031347           2                        2529524   
...            ...         ...                            ...   
116889  2100087418           4                        2273323   
116890  2100086200           2                        2260911   
116891  2100086201           2                        2260913   
116892  2800024552           3                        2260977   
116893  2900000760           3                        2261013   

        WO_Plan_Maintenance_Number WO_Planner_Group  WO_WBS_ID WO_Revision  \
0                           572308              MIN   10001172         NaN   
1                           580945              INP   10001416 

### save data into a csv file

In [40]:
write_mid_level_csv_file(df_work_order_merged, "work_orders")

## 5. Secondary functional locations

It's possible to perform the secondary functional locations only after the work order processing to avoid loading the same table multiple times

Table:
* OBJK

### Read OBJK data

In [41]:
# Needed columns
objk_columns = {
    'OBKNR': 'PM_Object_Number',
    'SORTF': 'PM_Object_Sorting',
    'OBJVW': 'PM_Object_Usage',
    'ILOAN': 'PM_Functional_Location'
}

# Read AFIH table
objk_df = read_csv_file("OBJK", objk_columns)

# Filter on NaN function location
objk_df.dropna(subset = ["PM_Functional_Location"], inplace = True)
objk_df.reset_index(inplace = True, drop = True)

print(objk_df)

  df =  pd.read_csv('./data/E1P_010_' + file_name + '.csv', delimiter = ';', usecols = columns_dict.keys())


         PM_Object_Number  PM_Functional_Location PM_Object_Sorting  \
0                  572073               2127560.0              0020   
1                  572084               2126675.0              0010   
2                  572084               2127974.0              0020   
3                  572084               2125953.0              0030   
4                  572084               2125961.0              0040   
...                   ...                     ...               ...   
1425553            598358               2608974.0               NaN   
1425554            598367               2608974.0               NaN   
1425555            598271               2609038.0               NaN   
1425556            598377               2609038.0               NaN   
1425557            598270               2609038.0               NaN   

        PM_Object_Usage  
0                     A  
1                     A  
2                     A  
3                     A  
4                

### Save data

In [42]:
write_mid_level_csv_file(objk_df, 'secondary_locations')

In [None]:
print(objk_df[objk_df["PM_Object_Number"] == 572084])

## 6. Operations Tables

Tables:
* AFVC
* AFVV

### Read data

#### Read AFVC data

In [43]:
# needed columns
afvc_columns = {
    "ARBID": 'AFVC_WBS_ID',
    "AUFPL": 'AFVC_Operation_Routing_Number',
    "APLZL": 'AFVC_Operation_Counter',
    "ANZZL": 'AFVC_Workers_Number',
    "LTXA1": 'AFVC_Operation_Short_Text',
}

# read the df
afvc_df = read_csv_file("AFVC_V1", afvc_columns)

In [None]:
print(afvc_df)

#### Read AFVV data

In [44]:
# needed columns
afvv_columns = {
    "AUFPL": 'AFVV_Operation_Routing_Number',
    "APLZL": 'AFVV_Operation_Counter',
    "ARBEI": 'AFVV_Planned_Work',
    "ISMNW": 'AFVV_Actual_Work',
    "FSSBD": 'AFVV_Work_Start_Date',
    "FSSAD": 'AFVV_Work_End_Date'
}

# read the df
afvv_df = read_csv_file("AFVV_V1", afvv_columns)

In [None]:
print(afvv_df)

### Merge AFVC and AFVV

In [45]:
df_operations_merged = afvc_df.merge(afvv_df, how = 'left', left_on = ["AFVC_Operation_Routing_Number", "AFVC_Operation_Counter"], right_on = ["AFVV_Operation_Routing_Number", "AFVV_Operation_Counter"])

print(df_operations_merged)

        AFVC_Operation_Routing_Number  AFVC_Operation_Counter  AFVC_WBS_ID  \
0                          1000589075                       3     10001079   
1                          1000589075                       4     10001080   
2                          1000589075                       5     10001081   
3                          1000589075                       6     10001080   
4                          1000589075                       7     10001079   
...                               ...                     ...          ...   
502504                     1000515360                       1     10001080   
502505                     1000515357                       1     10001440   
502506                     1000515358                       1     10001168   
502507                     1000515359                       1            0   
502508                     1000197463                      13     10001566   

                   AFVC_Operation_Short_Text  AFVC_Workers_Numb

In [None]:
print(df_operations_merged[df_operations_merged["AFVC_Operation_Routing_Number"] == 1000126414])

### Filter and Rename columns to get the final Operations merge

Columns kept:
* AFVC_Operation_Routing_Number -> WO_OPR_Routing_Number
* AFVC_Operation_Counter -> WO_OPR_Counter
* AFVC_WBS_ID -> WO_OPR_WBS_ID
* AFVC_Workers_Number -> WO_OPR_Workers_Numbers
* AFVV_Planned_Work -> OPR_Planned_Work
* AFVV_Actual_Work -> OPR_Actual_Work
* AFVV_Work_Start_Date -> OPR_Start_Date
* AFVV_Work_End_Date -> OPR_End_Date
* AFVC_Operation_Short_Text -> OPR_Description

In [46]:
# drop useless columns
df_operations_merged = df_operations_merged[["AFVC_Operation_Routing_Number", "AFVC_Operation_Counter", "AFVC_WBS_ID", "AFVC_Workers_Number", "AFVV_Planned_Work", "AFVV_Actual_Work", "AFVV_Work_Start_Date", "AFVV_Work_End_Date", "AFVC_Operation_Short_Text"]]

# rename columns
df_operations_merged.rename(inplace = True, columns = {
    "AFVC_Operation_Routing_Number": "OPR_Routing_Number", 
    "AFVC_Operation_Counter": "OPR_Counter", 
    "AFVC_WBS_ID": "OPR_WBS_ID",
    "AFVC_Workers_Number": "OPR_Workers_Numbers",
    "AFVV_Planned_Work": "OPR_Planned_Work",
    "AFVV_Actual_Work": "OPR_Actual_Work",
    "AFVV_Work_Start_Date": "OPR_Start_Date",
    "AFVV_Work_End_Date": "OPR_End_Date",
    "AFVC_Operation_Short_Text": "OPR_Description"
})


print(df_operations_merged)

        OPR_Routing_Number  OPR_Counter  OPR_WBS_ID  OPR_Workers_Numbers  \
0               1000589075            3    10001079                    1   
1               1000589075            4    10001080                    2   
2               1000589075            5    10001081                    2   
3               1000589075            6    10001080                    2   
4               1000589075            7    10001079                    1   
...                    ...          ...         ...                  ...   
502504          1000515360            1    10001080                    0   
502505          1000515357            1    10001440                    0   
502506          1000515358            1    10001168                    0   
502507          1000515359            1           0                    0   
502508          1000197463           13    10001566                    0   

        OPR_Planned_Work  OPR_Actual_Work  OPR_Start_Date  OPR_End_Date  \
0           

In [50]:
write_mid_level_csv_file(df_operations_merged, "work_operations")

## Functions to get all data related to one Work Order

### Read mid csv files

Simulates API getting its data from read csv files

In [51]:
def read_mid_csv_file(file_name):
    print("-----------------------------------")
    print("-----------------------------------")
    print("-----------------------------------")
    print("------ " + file_name + " ----------")
    print("\n")
    df = pd.read_csv('./data/mid_' + file_name + '.csv', delimiter = ',', index_col=0)
    print(df)
    return df

# Locations
functional_locations_df = read_mid_csv_file("functional_locations")
plan_maintenance_location_df = read_mid_csv_file("secondary_locations")
plan_maintenance_location_df["PM_Functional_Location"] = plan_maintenance_location_df["PM_Functional_Location"].astype(int)

# Work order
work_orders_df = read_mid_csv_file("work_orders")

# Operations
operations_df = read_mid_csv_file("work_operations")

# WBS
wbs_df = read_mid_csv_file("work_center")

# Status
status_df = read_mid_csv_file("work_orders_status")

-----------------------------------
-----------------------------------
-----------------------------------
------ functional_locations ----------


        FLOC_Technical_ID    FLOC_Functional_ID  \
0                 2512161  ?0100000000000632852   
1                 2512180  ?0100000000000634098   
2                 1180579  ?0100000000000580069   
3                 1180580  ?0100000000000580070   
4                 1180581  ?0100000000000580071   
...                   ...                   ...   
481348            2613005  ?0100000000000578966   
481349            2613008  ?0100000000000578941   
481350            2613007  ?0100000000000578941   
481351            2613114  ?0100000000000833446   
481352            2613113  ?0100000000000833446   

                                 FLOC_Name  
0                    HW /A /07/HWAA-P-5902  
1            HW /A /10/HEA06-MANUAL-VALVES  
2                 HD /A /23/HDAD-BDV-33035  
3                HD /A /23/HDAD-ESDV-33032  
4       HD /A

  df = pd.read_csv('./data/mid_' + file_name + '.csv', delimiter = ',', index_col=0)


         PM_Object_Number  PM_Functional_Location PM_Object_Sorting  \
0                  572073               2127560.0              0020   
1                  572084               2126675.0              0010   
2                  572084               2127974.0              0020   
3                  572084               2125953.0              0030   
4                  572084               2125961.0              0040   
...                   ...                     ...               ...   
1425553            598358               2608974.0               NaN   
1425554            598367               2608974.0               NaN   
1425555            598271               2609038.0               NaN   
1425556            598377               2609038.0               NaN   
1425557            598270               2609038.0               NaN   

        PM_Object_Usage  
0                     A  
1                     A  
2                     A  
3                     A  
4                

  df = pd.read_csv('./data/mid_' + file_name + '.csv', delimiter = ',', index_col=0)


         WO_Number WO_Priority  WO_Functional_Location_Number  \
0       2300009145           3                        2558055   
1       2400352096           A                        2610303   
2       2100114335           3                        2571638   
3       2100114433           3                        2604131   
4       2800031347           2                        2529524   
...            ...         ...                            ...   
116889  2100087418           4                        2273323   
116890  2100086200           2                        2260911   
116891  2100086201           2                        2260913   
116892  2800024552           3                        2260977   
116893  2900000760           3                        2261013   

        WO_Plan_Maintenance_Number WO_Planner_Group  WO_WBS_ID WO_Revision  \
0                           572308              MIN   10001172         NaN   
1                           580945              INP   10001416 

In [52]:
def get_int_data(data):
    return str(data).replace(".0", "") if not pd.isnull(data) else "-"

def get_wbs_data(wbs_id):
    # Extract from the wbs df the concerned wbs
    wbs_tmp_df = wbs_df[wbs_df["WBS_ID"] == wbs_id]
    if len(wbs_tmp_df) == 0:
        return "None"
    
    wbs_data = wbs_tmp_df.iloc[0]

    return {
        "ID": wbs_id,
        "Name": wbs_data["WBS_Name"],
        "Plant": wbs_data["WBS_Plant"],
        "Full_Name": wbs_data["WBS_Full_name"]
    }

def get_operations_data(wo_operation_id):
    # Get operations for the WO
    opr_tmp_df = operations_df[operations_df["OPR_Routing_Number"] == wo_operation_id].sort_values(["OPR_Counter"])

    # Format in json
    res = []
    for index, row in opr_tmp_df.iterrows():
        res.append({
            "Counter": row["OPR_Counter"],
            "Workers_Numbers": row["OPR_Workers_Numbers"],
            "Planned_Work": row["OPR_Planned_Work"],
            "Actual_Work": row["OPR_Actual_Work"],
            "Work_Start_Date": get_int_data(row["OPR_Start_Date"]),
            "Work_End_Date": get_int_data(row["OPR_End_Date"]),
            "Operation_Short_Text": row["OPR_Description"],
            "Work_Center": get_wbs_data(row["OPR_WBS_ID"])
        })

    return res

def get_location_data(location_number):
    loc_tmp_df = functional_locations_df[functional_locations_df["FLOC_Technical_ID"] == location_number]
    # Extract from the functional location df the concerned location
    if len(loc_tmp_df) == 0:
        return "None"
    elif len(loc_tmp_df) > 1:
        raise Exception("Too much locations have been found for this id ", location_number)
    
    loc_data = loc_tmp_df.iloc[0]

    #print(loc_data)

    return {
        "ID": loc_data["FLOC_Technical_ID"],
        "Name": loc_data["FLOC_Name"]
    }

def get_plan_maintenance(plan_number):
    #print(plan_number)
    pm_tmp_df = plan_maintenance_location_df[plan_maintenance_location_df["PM_Object_Number"] == plan_number].merge(functional_locations_df, how = "left", left_on=["PM_Functional_Location"], right_on = ["FLOC_Technical_ID"]).sort_values(["PM_Object_Sorting"])

    secondary_locations = []
    for index, row in pm_tmp_df.iterrows():
        secondary_locations.append({
            "ID": row["FLOC_Technical_ID"],
            "Counter": row["PM_Object_Sorting"],
            "Name": row["FLOC_Name"]
        })

    #print(pm_tmp_df)

    return secondary_locations

def get_wo_status_data(wo_status_number):
    # Get status related to the work order
    status_tmp_df = status_df[status_df["WO_Object_Number"] == wo_status_number]
    
    e_status = []
    i_status = []

    for index, row in status_tmp_df.iterrows():
        if row["WO_Status_ID"].startswith("E"):
            e_status.append({
                "ID": row["WO_Status_ID"],
                "Profile": row["WO_Status_Profile"],
                "Code": row["WO_E_Status_Code"],
                "Message": row["WO_E_Status_Message"],
            })
        elif row["WO_Status_ID"].startswith("I"):
            i_status.append({
                "ID": row["WO_Status_ID"],
                "Profile": row["WO_Status_Profile"],
                "Code": row["WO_I_Status_Code"],
                "Message": row["WO_I_Status_Message"],
            })

    return {
        "E_Status": e_status,
        "I_Status": i_status
    }

def get_work_order_data(work_order_id):

    # Extract from the work order df the concerned wo
    wo_tmp_df = work_orders_df[work_orders_df["WO_Number"] == work_order_id]
    if len(wo_tmp_df) == 0:
        raise Exception("Impossible to pursue, no work order found for this id ", work_order_id)
    
    wo_data = wo_tmp_df.iloc[0]

    #print(wo_data)

    # Operations
    #print(get_operations_data(wo_data["WO_Operation_ID"]))
    # Functional Location
    #print(get_location_data(wo_data["WO_Functional_Location_Number"]))
    # Get WBS
    #print(get_wbs_data(wo_data["WO_WBS_ID"]))
    # Work Order status
    #print(get_wo_status_data(wo_data["WO_Status_ID"]))
    # Plan maintenance - secondary locations
    #print(get_plan_maintenance(wo_data["WO_Plan_Maintenance_Number"]))

    return {
        "ID": wo_data["WO_Number"],
        "Priority": wo_data["WO_Priority"],
        "Planner_Group": wo_data["WO_Planner_Group"],
        "Revision": wo_data["WO_Revision"],
        "Activity_Type": wo_data["WO_Activity_Type"],
        "Order_Type": wo_data["WO_Order_Type"],
        "Scheduled_Start_Date": get_int_data(wo_data["WO_Scheduled_Start_Date"]),
        "Phase_Order_Created": get_int_data(wo_data["WO_Phase_Order_Created"]),
        "Phase_Order_Released": get_int_data(wo_data["WO_Phase_Order_Released"]),
        "Original_Deadline": get_int_data(wo_data["WO_Original_Deadline"]),
        "Header_Description": wo_data["WO_Header_Description"],
        "Operations_ID": get_int_data(wo_data["WO_Operation_ID"]),
        "Operations": get_operations_data(wo_data["WO_Operation_ID"]),
        "Status": get_wo_status_data(wo_data["WO_Status_ID"]),
        "Location": get_location_data(wo_data["WO_Functional_Location_Number"]),
        "Plan_Maintenance": get_plan_maintenance(wo_data["WO_Plan_Maintenance_Number"]),
        "Work_Center": get_wbs_data(wo_data["WO_WBS_ID"]),
        "Notification_Number": get_int_data(wo_data["WO_Notification_Number"]),
        "Notification_Created": get_int_data(wo_data["WO_Notification_Created"]),
        "Notification_Malfunction_Started": get_int_data(wo_data["WO_Notification_Malfunction_Started"]),
        "Notification": wo_data["WO_Notification"]
    }

pprint.pprint(get_work_order_data(2100027772))

{'Activity_Type': 'DE',
 'Header_Description': 'AnoID 253 GOC-8-GP-099-0105-F',
 'ID': 2100027772,
 'Location': {'ID': 1374523, 'Name': 'GO /C /32/GOC-8-GP-099-0105-F-U'},
 'Notification': 'OTE: PLEASE USE LOWER CASE LETTERS"\n'
                 ': DETAILED FAULT DESCRIPTION(SPECIFICS, MEASSURABLES, '
                 'LOCATION)"\n'
                 ' Heavy corrosion observed on 4" GP line adjacent to '
                 'C-204A(Injection\n'
                 's compressor) located at north side of compressor.\n'
                 ': INVESTIGATION OR MITIGATION REQUIRED?"\n'
                 ' No\n'
                 ': SPECIAL REQUIREMENTS (MATERIALS, TOOLS OR TRADES)"\n'
                 'Painter  +  Paint QC\n'
                 ': RELEVANT INFORMATION (P + '
                 'ID/PIC/ESTIMATES/PARTLIST/ZMOG)"\n'
                 'omaly report attached to notification\n'
                 'is notification has been raised due to external corrosion; '
                 'to mitigate\n'
       