In [1]:
# utilities
from typing import Sequence, Mapping


def evaluation(y_test, y_pred):

  return dict(
      accuracy=accuracy_score(y_test, y_pred),
      recall=recall_score(y_test, y_pred, average='weighted', zero_division=0),
      precision=precision_score(y_test, y_pred, average='weighted', zero_division=0),
      f1_score=f1_score(y_test, y_pred, average='weighted', zero_division=0),
  )


def pprint(data, start = '', end = '\r\n', output = False, tab = '\t'):
  if isinstance(data, Mapping):
    temp = '|' + end
    
    for k, v in data.items():
      temp += start + k + ' = ' + pprint(v, tab + start, end, output=True) + end
    
    if not output:
      print(temp)
      
    return temp

  if not isinstance(data, str):
    if isinstance(data, Sequence):
      temp = '|' + end
      
      for k, v in enumerate(data):
        temp += start + str(k) + ' = ' + pprint(v, tab + start, end, output=True) + end
      
      if not output:
        print(temp)
        
      return temp

  if not output:
    print(data)
    
  return str(data)

In [2]:
import joblib
from sklearn.pipeline import make_pipeline


with open('best_data_model.jbl', 'rb') as stream:
  best_data_model = joblib.load(stream)
  pprint(best_data_model, tab='  ')
  
  pipe = make_pipeline(*best_data_model.get('preprocessing'), best_data_model.get('model'))
  print(pipe)

|
name = RandomForestClassifier_x_adasyn
model = RandomForestClassifier(random_state=42)
use_smote = False
use_adasyn = True
preprocessing = |
  0 = ADASYN(random_state=42)

scores = |
  accuracy = 0.90625
  recall = 0.90625
  precision = 0.9054878982843138
  f1_score = 0.9048479074251077


Pipeline(steps=[('adasyn', ADASYN(random_state=42)),
                ('randomforestclassifier',
                 RandomForestClassifier(random_state=42))])


In [1]:
import pandas as pd
import numpy as np

In [6]:
df = pd.DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'a', 1], [4, 'b', 1], [4, 'b', 1], [4, 'b', 1]], columns=['a', 'b', 'c'])
df

Unnamed: 0,a,b,c
0,1,a,1
1,2,b,1
2,3,a,1
3,4,b,1
4,4,b,1
5,4,b,1


In [12]:
df.drop(df.columns[df.nunique() == 1], axis=1)

Unnamed: 0,a,b
0,1,a
1,2,b
2,3,a
3,4,b
4,4,b
5,4,b


In [20]:
df[df.duplicated()]

Unnamed: 0,a,b,c
4,4,b,1
5,4,b,1


In [163]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   a       4 non-null      int64 
 1   b       4 non-null      object
dtypes: int64(1), object(1)
memory usage: 196.0+ bytes


In [164]:
df.columns

Index(['a', 'b'], dtype='object')

In [165]:
df.values

array([[1, 'a'],
       [2, 'b'],
       [3, 'a'],
       [4, 'b']], dtype=object)

In [166]:
df['a'].dtype in (np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32, np.float64) 

True

In [167]:
mapping_data = {}
for column in df.columns:
    df_row = df[column]
    
    if df_row.dtype not in (np.dtype('O'),):
        continue
    
    df_row_unique = df_row.unique()
    mapping_dict = dict([(v, i) for i, v in enumerate(df_row_unique)])
    df_row.replace(mapping_dict, inplace=True)
    mapping_data[column] = mapping_dict

mapping_data

{'b': {'a': 0, 'b': 1}}

In [168]:
df

Unnamed: 0,a,b
0,1,0
1,2,1
2,3,0
3,4,1


In [65]:
import math

In [139]:
import re


val = '+123.0012'

# validation
pattern = re.compile('^[+-]?(\d+)?([.]\d+)?')
matches = pattern.search(val)
val = matches.group(0)

# fixed
result = 0
if len(val) > 0:
    
    negative = False
    if val[0] in ('+', '-'):
        negative = val[0] == '-'
        val = val[1:]

    floating = False
    if '.' in val:
        floating = True

    if floating:
        result = -float(val) if negative else float(val)

    else:
        result = -int(val) if negative else int(val)

if type(result) is float:
    k = int(result)
    
    result = k if abs(result - k) <= .0 else result

# convert
str(result)

'123.0012'

In [140]:
val = '-19.999.999,99'

# validation
pattern = re.compile('^[+-]?\d{,3}(([.]\d{3})+)?([,]\d+)?')
matches = pattern.search(val)
val = matches.group(0)

# fixed
val = val.replace('.', '')
val = val.replace(',', '.')

result = 0
if len(val) > 0:
    
    negative = False
    if val[0] in ('+', '-'):
        negative = val[0] == '-'
        val = val[1:]

    floating = False
    if '.' in val:
        floating = True

    if floating:
        result = -float(val) if negative else float(val)

    else:
        result = -int(val) if negative else int(val)

if type(result) is float:
    k = int(result)
    
    result = k if abs(result - k) == .0 else result

result

-19999999.99

In [141]:
# convert
negative = result < 0
result = abs(result)

k = math.floor(math.log(result, 1e3))
val, ex = str(float(result)).split('.')
ex = ex.ljust(2, '0')

x = k * 3
y = len(val) - x
r = val[:y]
val = val[y:]

price = r
for i in range(0, x, 3):
    price += '.' + val[i:i+3]

price += ',' + ex

price = '-' + price if negative else price
price

'-19.999.999,99'