# My PIPEEEEE

In [254]:
class Pipe:
  
  def __init__(self, stages):
    self.stages = stages
    self.components = {}
    
  def fit(self, x):
    result = x
    for c in self.stages:
      result = c.fit(result)
    return result
  
  def inverse(self,x):
    result = x
    for c in self.stages[::-1]:
      result = c.inverse(result)
    return result

In [318]:
class NullRemover:
  
  def __init__(self, cols):
    self.cols = cols
    
  def fit(self, x):
    for col in self.cols:
      x[col].fillna('Others_{}'.format(col), inplace=True)
    return x
  
  def inverse(self,x):
    for col in self.cols:
      x[col].replace('Others_{}'.format(col),'None', inplace=True)
    return x

In [319]:
class DummyConvertor:
  
  def __init__(self, cols, sep='|'):
    self.cols = cols
    self.sep = sep
    self.prefix_col = {}
    
  def fit(self, x):
    self.id_col = [i for i in x.columns if i not in self.cols]
    result = pd.get_dummies(x, columns=self.cols, prefix_sep=self.sep)
    
    for col in result.columns:
      if self.sep in col:
        prefix_name = col.split(self.sep)
        if prefix_name[0] in self.prefix_col:
          self.prefix_col[prefix_name[0]].append(prefix_name[1])
        else:
          self.prefix_col[prefix_name[0]] = [prefix_name[1]]
          
    result.columns = [i.split(self.sep)[1] if self.sep in i else i for i in result.columns ]
    return result
  
  def inverse(self, x):
    result = x
    for c in self.cols:
      result = result.melt(id_vars = [i for i in result.columns if i not in self.prefix_col[c]],
                           value_vars = self.prefix_col[c],
                           var_name = [c])\
                           .query('value != 0').drop('value',1) 
    result = result.reset_index().drop('index',1)
    return result

In [268]:
df = pd.DataFrame({'A': ['a', None, 'a'], 'B': ['haha', 'lol', 'hehe'],
'C': [1, 2, 3]})

In [323]:
df

Unnamed: 0,A,B,C
0,a,haha,1
1,Others_A,lol,2
2,a,hehe,3


In [320]:
pipe = Pipe([NullRemover(['A']), DummyConvertor(['A','B'])])
df_final = pipe.fit(df)

In [321]:
df_final

Unnamed: 0,C,Others_A,a,haha,hehe,lol
0,1,0,1,1,0,0
1,2,1,0,0,0,1
2,3,0,1,0,1,0


In [322]:
pipe.inverse(df_final)

Unnamed: 0,C,A,B
0,1,a,haha
1,3,a,hehe
2,2,,lol


In [75]:
x_1 = df_final.melt(id_vars = ['A','C'],
                    value_vars = [i for i in df_final if i not in ['A','C']])\
                    .query('value != 0')

In [76]:
x_1

Unnamed: 0,A,C,variable,value
1,other,2,B|a,1
3,a,1,B|b,1
8,a,3,B|c,1


In [63]:
x_1 = df_final[['A','C']]

In [64]:
x = df_final[[i for i in df_final if i not in ['A','C']]].stack()

In [57]:
x

0  B|a    0
   B|b    1
   B|c    0
1  B|a    1
   B|b    0
   B|c    0
2  B|a    0
   B|b    0
   B|c    1
dtype: uint8

In [66]:
x[x!=0].index.get_level_values(1)

Index(['B|b', 'B|a', 'B|c'], dtype='object')