In [177]:
# importing libraries

import pandas as pd

import numpy as np

import re

In [178]:
# creating data dict

df_dict = {'tag' : ['P101A/B/C', 'P203A-D', 'P401A/G', 'T101B', 'B301'],
          'Service' : ['Oil', 'Gas', 'N2', 'FW', 'CW'],
          'WorkingTemp' : ['35', '41', '10', '40', '23'],
          'WorkingPressure' : ['2000', '3000', '2100', '1500', '2400']}

In [179]:
# turning dict into df

df = pd.DataFrame(df_dict)

df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure
0,P101A/B/C,Oil,35,2000
1,P203A-D,Gas,41,3000
2,P401A/G,N2,10,2100
3,T101B,FW,40,1500
4,B301,CW,23,2400


In [180]:
# splitting tag

parse = df['tag'].apply(lambda x: (re.findall(r'([A-Z]\d{3})(.+)*', x)))

In [181]:
# adding tuple as column

df['parse_tuple'] = parse

df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]"
1,P203A-D,Gas,41,3000,"[(P203, A-D)]"
2,P401A/G,N2,10,2100,"[(P401, A/G)]"
3,T101B,FW,40,1500,"[(T101, B)]"
4,B301,CW,23,2400,"[(B301, )]"


In [182]:
# splitting the parse tuple into separate columns

# first, converting tuple into list

df['parse_list'] = df['parse_tuple'].apply(lambda x: list(x[0])) # if isinstance(x, list) and isinstance(x[0], tuple) else x)

In [183]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]"
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]"
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]"
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]"
4,B301,CW,23,2400,"[(B301, )]","[B301, ]"


In [184]:
df['parse_list'].apply(type) # parse_list is a list

0    <class 'list'>
1    <class 'list'>
2    <class 'list'>
3    <class 'list'>
4    <class 'list'>
Name: parse_list, dtype: object

In [185]:
# checking to make sure tuple element within list became strings

df['parse_list'].apply(lambda x: [type(i) for i in x])

0    [<class 'str'>, <class 'str'>]
1    [<class 'str'>, <class 'str'>]
2    [<class 'str'>, <class 'str'>]
3    [<class 'str'>, <class 'str'>]
4    [<class 'str'>, <class 'str'>]
Name: parse_list, dtype: object

In [186]:
# parsing letter tag from parse_list

def letter_tag (col):

    parse = []
    
    for x,y in col:
        
        if y is None:
            parse.append(pd.NA)
        else:
            parse.append(y)
            
    return parse

In [187]:
# applying function

df['letter_tag'] = letter_tag(df['parse_list'])

In [188]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",


### GK suggestion - looping over parse tuple to extract letter and main tags without converting it first to a list

In [189]:
df['parse_tuple'].apply(type)

0    <class 'list'>
1    <class 'list'>
2    <class 'list'>
3    <class 'list'>
4    <class 'list'>
Name: parse_tuple, dtype: object

In [190]:
df['parse_tuple'].apply(lambda x: [type(i) for i in x])

0    [<class 'tuple'>]
1    [<class 'tuple'>]
2    [<class 'tuple'>]
3    [<class 'tuple'>]
4    [<class 'tuple'>]
Name: parse_tuple, dtype: object

In [191]:
df_gk = df.copy()

In [192]:
# trying to create mutiple tuples to test

parse_tuple_test = [[('A', 'B'), (1, 2)],
                    [('C', 'D'), (3, 4)],
                    [('E', 'F'), (5, 6)],
                    [('G', 'H'), (7, 8)],
                    [('I', 'J'), (9, 10)]]

df_gk['parse_tuple_test'] = parse_tuple_test

df_gk.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,parse_tuple_test
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,"[(A, B), (1, 2)]"
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,"[(C, D), (3, 4)]"
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,"[(E, F), (5, 6)]"
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,"[(G, H), (7, 8)]"
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,"[(I, J), (9, 10)]"


In [193]:
df_gk['parse_tuple_test'].apply(type)

0    <class 'list'>
1    <class 'list'>
2    <class 'list'>
3    <class 'list'>
4    <class 'list'>
Name: parse_tuple_test, dtype: object

In [194]:
df_gk['parse_tuple_test'].apply(lambda x: [type(i) for i in x])

0    [<class 'tuple'>, <class 'tuple'>]
1    [<class 'tuple'>, <class 'tuple'>]
2    [<class 'tuple'>, <class 'tuple'>]
3    [<class 'tuple'>, <class 'tuple'>]
4    [<class 'tuple'>, <class 'tuple'>]
Name: parse_tuple_test, dtype: object

In [208]:
# test 1

def parse_loop_func(row):

    parse_loop_list = []
    
    for list_ in row:
        
        list_ = list(list_)

        parse_loop_list.append(list_)
    
    print(parse_loop_list)
    
    return parse_loop_list

In [209]:
df_gk['parse_tuple_test'].apply(lambda x: pd.Series((parse_loop_func(x)))) # applying func using lambda with pd.Series to split into col

[['A', 'B'], [1, 2]]
[['C', 'D'], [3, 4]]
[['E', 'F'], [5, 6]]
[['G', 'H'], [7, 8]]
[['I', 'J'], [9, 10]]


Unnamed: 0,0,1
0,"[A, B]","[1, 2]"
1,"[C, D]","[3, 4]"
2,"[E, F]","[5, 6]"
3,"[G, H]","[7, 8]"
4,"[I, J]","[9, 10]"


In [210]:
test = [['A', 'B'], [1, 2]]

In [212]:
pd.DataFrame(pd.Series(test)) # pd.series vertical, as opposed to horizontal when used in the context of apply

Unnamed: 0,0
0,"[A, B]"
1,"[1, 2]"


### End GK Solution

In [56]:
# parsing main tag

def main_tag (col):
    
    parse = []
    
    for x,y in col:
        
        if x is None:
            return parse.append(pd.NA)
        
        else:
            parse.append(x)
    
    return parse

In [57]:
# applying function

df['main_tag'] = main_tag(df['parse_list'])

In [58]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,main_tag
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,P101
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,P203
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,P401
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,T101
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,B301


In [59]:
# extracting first letter

df['first_letter'] = df['letter_tag'].apply(lambda x: re.findall(r'(^[^/-])', x))

In [60]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,main_tag,first_letter
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,P101,[A]
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,P203,[A]
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,P401,[A]
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,T101,[B]
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,B301,[]


In [61]:
df['first_letter'].apply(type)

0    <class 'list'>
1    <class 'list'>
2    <class 'list'>
3    <class 'list'>
4    <class 'list'>
Name: first_letter, dtype: object

In [64]:
df['first_letter'].apply(lambda x: [type(i) for i in x ])

0    [<class 'str'>]
1    [<class 'str'>]
2    [<class 'str'>]
3    [<class 'str'>]
4                 []
Name: first_letter, dtype: object

In [41]:
# creating function to convert back into list

def list_conversion(col):
    
    converted = []
    
    for x in col:
        
        if x:
            converted.append(x[0])
        else:
            converted.append(None)
            
    return converted

In [42]:
# applying function

df['first_letter'] = list_conversion(df['first_letter'])

In [43]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,main_tag,first_letter,last_letter,first_letter_int,last_letter_int
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,P101,A,C,65.0,67.0
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,P203,A,D,65.0,68.0
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,P401,A,G,65.0,71.0
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,T101,B,B,66.0,66.0
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,B301,,,,


In [21]:
# extracting last letter

df['last_letter'] = df['letter_tag'].apply(lambda x: re.findall(r'([^/-]$)', x))

In [22]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,main_tag,first_letter,last_letter
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,P101,A,[C]
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,P203,A,[D]
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,P401,A,[G]
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,T101,B,[B]
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,B301,,[]


In [23]:
# applying list conversion

df['last_letter'] = list_conversion(df['last_letter'])

In [24]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,main_tag,first_letter,last_letter
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,P101,A,C
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,P203,A,D
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,P401,A,G
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,T101,B,B
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,B301,,


In [25]:
# converting first letter to int

df['first_letter_int'] = df['first_letter'].apply(lambda x: None if x == None else ord(x))

# converting last letter to int

df['last_letter_int'] = df['last_letter'].apply(lambda x: None if x == None else ord(x))

In [26]:
df.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,parse_tuple,parse_list,letter_tag,main_tag,first_letter,last_letter,first_letter_int,last_letter_int
0,P101A/B/C,Oil,35,2000,"[(P101, A/B/C)]","[P101, A/B/C]",A/B/C,P101,A,C,65.0,67.0
1,P203A-D,Gas,41,3000,"[(P203, A-D)]","[P203, A-D]",A-D,P203,A,D,65.0,68.0
2,P401A/G,N2,10,2100,"[(P401, A/G)]","[P401, A/G]",A/G,P401,A,G,65.0,71.0
3,T101B,FW,40,1500,"[(T101, B)]","[T101, B]",B,T101,B,B,66.0,66.0
4,B301,CW,23,2400,"[(B301, )]","[B301, ]",,B301,,,,


In [27]:
# cleaning up columns

df_clean = df[['tag', 'Service', 'WorkingTemp', 'WorkingPressure', 'main_tag', 'first_letter_int', 'last_letter_int']]
    
df_clean.head()

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,main_tag,first_letter_int,last_letter_int
0,P101A/B/C,Oil,35,2000,P101,65.0,67.0
1,P203A-D,Gas,41,3000,P203,65.0,68.0
2,P401A/G,N2,10,2100,P401,65.0,71.0
3,T101B,FW,40,1500,T101,66.0,66.0
4,B301,CW,23,2400,B301,,


In [28]:
df_clean

Unnamed: 0,tag,Service,WorkingTemp,WorkingPressure,main_tag,first_letter_int,last_letter_int
0,P101A/B/C,Oil,35,2000,P101,65.0,67.0
1,P203A-D,Gas,41,3000,P203,65.0,68.0
2,P401A/G,N2,10,2100,P401,65.0,71.0
3,T101B,FW,40,1500,T101,66.0,66.0
4,B301,CW,23,2400,B301,,


In [29]:
# scaffolding based on first and last letter

new_rows = []

for index, row in df_clean.iterrows():
    
    init = row['first_letter_int']
    end = row['last_letter_int']
    
    if not pd.isna(init):

        while init <= end:
        
            increment = init
        
            new_row = row.copy()
        
            new_row['scaffold'] = increment 
        
            new_rows.append(new_row)
        
            init += 1
    
    else:
        
        new_row = row.copy()
        
        new_row['scaffold'] = None
        
        new_rows.append(new_row)

In [30]:
new_rows

[tag                 P101A/B/C
 Service                   Oil
 WorkingTemp                35
 WorkingPressure          2000
 main_tag                 P101
 first_letter_int         65.0
 last_letter_int          67.0
 scaffold                 65.0
 Name: 0, dtype: object,
 tag                 P101A/B/C
 Service                   Oil
 WorkingTemp                35
 WorkingPressure          2000
 main_tag                 P101
 first_letter_int         65.0
 last_letter_int          67.0
 scaffold                 66.0
 Name: 0, dtype: object,
 tag                 P101A/B/C
 Service                   Oil
 WorkingTemp                35
 WorkingPressure          2000
 main_tag                 P101
 first_letter_int         65.0
 last_letter_int          67.0
 scaffold                 67.0
 Name: 0, dtype: object,
 tag                 P203A-D
 Service                 Gas
 WorkingTemp              41
 WorkingPressure        3000
 main_tag               P203
 first_letter_int       65.0
 last_l

In [31]:
# converting scaffold into df

df_scaffold = pd.DataFrame(new_rows)

df_scaffold.reset_index(inplace = True)

df_scaffold

Unnamed: 0,index,tag,Service,WorkingTemp,WorkingPressure,main_tag,first_letter_int,last_letter_int,scaffold
0,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,65.0
1,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,66.0
2,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,67.0
3,1,P203A-D,Gas,41,3000,P203,65.0,68.0,65.0
4,1,P203A-D,Gas,41,3000,P203,65.0,68.0,66.0
5,1,P203A-D,Gas,41,3000,P203,65.0,68.0,67.0
6,1,P203A-D,Gas,41,3000,P203,65.0,68.0,68.0
7,2,P401A/G,N2,10,2100,P401,65.0,71.0,65.0
8,2,P401A/G,N2,10,2100,P401,65.0,71.0,66.0
9,2,P401A/G,N2,10,2100,P401,65.0,71.0,67.0


In [32]:
# converting scaffolded to letters

df_scaffold['scaffold_letters'] = df_scaffold['scaffold'].apply(lambda x: chr(int(x)) if not pd.isna(x) else None)

df_scaffold.head()

Unnamed: 0,index,tag,Service,WorkingTemp,WorkingPressure,main_tag,first_letter_int,last_letter_int,scaffold,scaffold_letters
0,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,65.0,A
1,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,66.0,B
2,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,67.0,C
3,1,P203A-D,Gas,41,3000,P203,65.0,68.0,65.0,A
4,1,P203A-D,Gas,41,3000,P203,65.0,68.0,66.0,B


In [33]:
# creating new tag

df_scaffold['new_tag'] = df_scaffold.apply(lambda x: x['main_tag'] + x['scaffold_letters'] if not pd.isna(x['scaffold_letters']) else x['tag'], axis = 1)

df_scaffold

Unnamed: 0,index,tag,Service,WorkingTemp,WorkingPressure,main_tag,first_letter_int,last_letter_int,scaffold,scaffold_letters,new_tag
0,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,65.0,A,P101A
1,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,66.0,B,P101B
2,0,P101A/B/C,Oil,35,2000,P101,65.0,67.0,67.0,C,P101C
3,1,P203A-D,Gas,41,3000,P203,65.0,68.0,65.0,A,P203A
4,1,P203A-D,Gas,41,3000,P203,65.0,68.0,66.0,B,P203B
5,1,P203A-D,Gas,41,3000,P203,65.0,68.0,67.0,C,P203C
6,1,P203A-D,Gas,41,3000,P203,65.0,68.0,68.0,D,P203D
7,2,P401A/G,N2,10,2100,P401,65.0,71.0,65.0,A,P401A
8,2,P401A/G,N2,10,2100,P401,65.0,71.0,66.0,B,P401B
9,2,P401A/G,N2,10,2100,P401,65.0,71.0,67.0,C,P401C
