Contains data preprocessing and model trials for personality dataset: https://huggingface.co/datasets/kl08/myers-briggs-type-indicator 
Methods / Experiments:
1. Multinomial 
2. Logistic Regression 

### Setting up a playground for me to play

In [None]:
import numpy as np 
import pandas as pd 
from tqdm.autonotebook import tqdm, trange
from sentence_transformers import SentenceTransformer 

import joblib 

model_card = "all-mpnet-base-v2"

class Basement(SentenceTransformer):
    """Basement to encode speech."""
    def __init__(self, model: str = model_card):
        super().__init__(model)
        self.expression = joblib.load('big5model.pkl')

    def predict(self, query: str):
        embedding = self.encode([query])
        logits = self.expression.predict_log_proba(embedding)
        return {
            'big5': logits, 
            'personality': np.zeros(3)
        }
        

base = Basement()

  from tqdm.autonotebook import tqdm, trange


In [None]:
# Loading the dataset
import pandas as pd

df = pd.read_csv("hf://datasets/kl08/myers-briggs-type-indicator/mbti_1.csv")
mbti_tags = {label: i for i, label in enumerate(df['type'].unique())}
mbti_tags

{'INFJ': 0,
 'ENTP': 1,
 'INTP': 2,
 'INTJ': 3,
 'ENTJ': 4,
 'ENFJ': 5,
 'INFP': 6,
 'ENFP': 7,
 'ISFP': 8,
 'ISTP': 9,
 'ISFJ': 10,
 'ISTJ': 11,
 'ESTP': 12,
 'ESFP': 13,
 'ESTJ': 14,
 'ESFJ': 15}

In [None]:
df['mbti'] = df['type'].map(mbti_tags)
df

Unnamed: 0,type,posts,mbti
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,0
1,ENTP,'I'm finding the lack of me in these posts ver...,1
2,INTP,'Good one _____ https://www.youtube.com/wat...,2
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",3
4,ENTJ,'You're fired.|||That's another silly misconce...,4
...,...,...,...
8670,ISFP,'https://www.youtube.com/watch?v=t8edHB_h908||...,8
8671,ENFP,'So...if this thread already exists someplace ...,7
8672,INTP,'So many questions when i do these things. I ...,2
8673,INFP,'I am very conflicted right now when it comes ...,6


In [None]:
class_weights = df['type'].value_counts(normalize=True)
class_weights

type
INFP    0.211182
INFJ    0.169452
INTP    0.150317
INTJ    0.125764
ENTP    0.078963
ENFP    0.077810
ISTP    0.038847
ISFP    0.031239
ENTJ    0.026628
ISTJ    0.023631
ENFJ    0.021902
ISFJ    0.019135
ESTP    0.010259
ESFP    0.005533
ESFJ    0.004841
ESTJ    0.004496
Name: proportion, dtype: float64

### Experiment 0: Warm up

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

model = LogisticRegression(multi_class='multinomial', max_iter=500)
X = base.encode(df['posts'].values)
y = df['mbti'].values 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [None]:
model.fit(X_train, y_train)



In [None]:
model.score(X_train, y_train)

0.4744729907773386

In [None]:
y_pred = model.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.39      0.44      0.41       441
           1       0.42      0.33      0.37       206
           2       0.38      0.52      0.44       391
           3       0.33      0.32      0.33       327
           4       0.10      0.01      0.03        69
           5       0.33      0.02      0.03        57
           6       0.44      0.69      0.54       550
           7       0.52      0.40      0.46       203
           8       0.45      0.06      0.11        81
           9       0.36      0.15      0.21       101
          10       0.75      0.06      0.11        50
          11       0.00      0.00      0.00        61
          12       0.00      0.00      0.00        27
          13       0.00      0.00      0.00        14
          14       0.00      0.00      0.00        12
          15       0.00      0.00      0.00        13

    accuracy                           0.41      2603
   macro avg       0.28   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Experiment 1: Multinomial with predefined class weights to accomodate imbalanced dataset

In [None]:
dict(df['type'].value_counts(normalize=True))

{'INFP': 0.21118155619596543,
 'INFJ': 0.16945244956772335,
 'INTP': 0.15031700288184438,
 'INTJ': 0.12576368876080693,
 'ENTP': 0.07896253602305475,
 'ENFP': 0.07780979827089338,
 'ISTP': 0.03884726224783862,
 'ISFP': 0.031239193083573487,
 'ENTJ': 0.026628242074927953,
 'ISTJ': 0.02363112391930836,
 'ENFJ': 0.02190201729106628,
 'ISFJ': 0.01913544668587896,
 'ESTP': 0.01025936599423631,
 'ESFP': 0.00553314121037464,
 'ESFJ': 0.00484149855907781,
 'ESTJ': 0.004495677233429395}

In [None]:
df['type_weights'] = df['type'].map(dict(df['type'].value_counts(normalize=True)))
df['type_weights'].values

array([0.16945245, 0.07896254, 0.150317  , ..., 0.150317  , 0.21118156,
       0.21118156])

In [None]:
df[['mbti', 'type_weights']].values

array([[0.        , 0.16945245],
       [1.        , 0.07896254],
       [2.        , 0.150317  ],
       ...,
       [2.        , 0.150317  ],
       [6.        , 0.21118156],
       [6.        , 0.21118156]])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, df[['mbti', 'type_weights']].values, test_size=0.3, random_state=42, stratify=y)


In [None]:
y_train[:, 1]

array([0.01913545, 0.150317  , 0.150317  , ..., 0.07896254, 0.0778098 ,
       0.150317  ])

In [None]:
y_train[:, 0]

array([10.,  2.,  2., ...,  1.,  7.,  2.])

In [None]:
model.fit(X_train, y_train[:, 0], y_train[:, 1])



In [None]:
model.score(X_train, y_train[:, 0])

0.32954545454545453

In [None]:
y_pred = model.predict(X_test)
y_pred

array([0., 2., 6., ..., 0., 6., 6.])

In [None]:
print(classification_report(y_test[:, 0], y_pred))

              precision    recall  f1-score   support

         0.0       0.34      0.36      0.35       441
         1.0       0.00      0.00      0.00       206
         2.0       0.32      0.43      0.37       391
         3.0       0.36      0.09      0.14       327
         4.0       0.00      0.00      0.00        69
         5.0       0.00      0.00      0.00        57
         6.0       0.31      0.86      0.45       550
         7.0       0.00      0.00      0.00       203
         8.0       0.00      0.00      0.00        81
         9.0       0.00      0.00      0.00       101
        10.0       0.00      0.00      0.00        50
        11.0       0.00      0.00      0.00        61
        12.0       0.00      0.00      0.00        27
        13.0       0.00      0.00      0.00        14
        14.0       0.00      0.00      0.00        12
        15.0       0.00      0.00      0.00        13

    accuracy                           0.32      2603
   macro avg       0.08   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, df[['mbti', 'type_weights']].values, test_size=0.3, random_state=42, shuffle=True)
model.fit(X_train, y_train[:, 0], y_train[:, 1])




In [None]:
model.score(X_train, y_train[:, 0])

0.33415678524374176

In [None]:
y_pred = model.predict(X_test)

In [None]:
df

Unnamed: 0,type,posts,mbti,type_weights
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,0,0.169452
1,ENTP,'I'm finding the lack of me in these posts ver...,1,0.078963
2,INTP,'Good one _____ https://www.youtube.com/wat...,2,0.150317
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",3,0.125764
4,ENTJ,'You're fired.|||That's another silly misconce...,4,0.026628
...,...,...,...,...
8670,ISFP,'https://www.youtube.com/watch?v=t8edHB_h908||...,8,0.031239
8671,ENFP,'So...if this thread already exists someplace ...,7,0.077810
8672,INTP,'So many questions when i do these things. I ...,2,0.150317
8673,INFP,'I am very conflicted right now when it comes ...,6,0.211182


### Experiment 2: Chain predictions by label instead of the 4 label combination

#### Preprocessing

In [None]:
x0_label = {i: list(i)[0] for i in df['type'].unique()}
x1_label = {i: list(i)[1] for i in df['type'].unique()}
x2_label = {i: list(i)[2] for i in df['type'].unique()}
x3_label = {i: list(i)[3] for i in df['type'].unique()}

df['x0'] = df['type'].map(x0_label)
df['x1'] = df['type'].map(x1_label)
df['x2'] = df['type'].map(x2_label)
df['x3'] = df['type'].map(x3_label)

df

Unnamed: 0,type,posts,mbti,type_weights,x0,x1,x2,x3
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,0,0.169452,I,N,F,J
1,ENTP,'I'm finding the lack of me in these posts ver...,1,0.078963,E,N,T,P
2,INTP,'Good one _____ https://www.youtube.com/wat...,2,0.150317,I,N,T,P
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",3,0.125764,I,N,T,J
4,ENTJ,'You're fired.|||That's another silly misconce...,4,0.026628,E,N,T,J
...,...,...,...,...,...,...,...,...
8670,ISFP,'https://www.youtube.com/watch?v=t8edHB_h908||...,8,0.031239,I,S,F,P
8671,ENFP,'So...if this thread already exists someplace ...,7,0.077810,E,N,F,P
8672,INTP,'So many questions when i do these things. I ...,2,0.150317,I,N,T,P
8673,INFP,'I am very conflicted right now when it comes ...,6,0.211182,I,N,F,P


In [None]:
y0 = df['x0'].map({'I': 0, 'E': 1}).values
y1 = df['x1'].map({'N': 0, 'S': 1}).values
y2 = df['x2'].map({'T': 0, 'F': 1}).values
y3 = df['x3'].map({'J': 0, 'P': 1}).values

In [None]:
dict(df['x0'].value_counts(normalize=True))

{'I': 0.7695677233429394, 'E': 0.2304322766570605}

In [None]:
def add_class_weights(df: pd.DataFrame, label: str):
    df[f'{label}_weights'] = df[label].map(dict(df[label].value_counts(normalize=True)))
    return df 

for i in range(4):
    add_class_weights(df, f'x{i}')

In [None]:
df

Unnamed: 0,type,posts,mbti,type_weights,x0,x1,x2,x3,x0_weights,x1_weights,x2_weights,x3_weights
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,0,0.169452,I,N,F,J,0.769568,0.862017,0.541095,0.39585
1,ENTP,'I'm finding the lack of me in these posts ver...,1,0.078963,E,N,T,P,0.230432,0.862017,0.458905,0.60415
2,INTP,'Good one _____ https://www.youtube.com/wat...,2,0.150317,I,N,T,P,0.769568,0.862017,0.458905,0.60415
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",3,0.125764,I,N,T,J,0.769568,0.862017,0.458905,0.39585
4,ENTJ,'You're fired.|||That's another silly misconce...,4,0.026628,E,N,T,J,0.230432,0.862017,0.458905,0.39585
...,...,...,...,...,...,...,...,...,...,...,...,...
8670,ISFP,'https://www.youtube.com/watch?v=t8edHB_h908||...,8,0.031239,I,S,F,P,0.769568,0.137983,0.541095,0.60415
8671,ENFP,'So...if this thread already exists someplace ...,7,0.077810,E,N,F,P,0.230432,0.862017,0.541095,0.60415
8672,INTP,'So many questions when i do these things. I ...,2,0.150317,I,N,T,P,0.769568,0.862017,0.458905,0.60415
8673,INFP,'I am very conflicted right now when it comes ...,6,0.211182,I,N,F,P,0.769568,0.862017,0.541095,0.60415


#### Experiment 2.2: Custom Object class of Logistic regression models

In [None]:
class ChainMBTI:
    def __init__(self):
        self.m0 = LogisticRegression(max_iter=500)
        self.m1 = LogisticRegression(max_iter=500)
        self.m2 = LogisticRegression(max_iter=500)
        self.m3 = LogisticRegression(max_iter=500)

    def train(self, model: LogisticRegression, X_train: np.ndarray, y_train: np.ndarray, class_weights: np.ndarray, X_test: np.ndarray, y_test: np.ndarray):
        model.fit(X_train, y_train, class_weights)
        y_pred = model.predict(X_test)
        print(classification_report(y_test, y_pred))
        
    def update(self, X: np.ndarray, *ys: tuple):
        for model_num, y in enumerate(ys):
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
            self.train(getattr(self, f'm{model_num}'), X_train, y_train[:, 0], y_train[:, 1], X_test, y_test[:, 0])
            
mbti_model = ChainMBTI()

In [None]:
mbti_model.update(X, df[['x0', 'x0_weights']].values, df[['x1', 'x1_weights']].values, df[['x2', 'x2_weights']].values, df[['x3', 'x3_weights']].values)

              precision    recall  f1-score   support

           E       0.84      0.03      0.05       600
           I       0.77      1.00      0.87      2003

    accuracy                           0.77      2603
   macro avg       0.81      0.51      0.46      2603
weighted avg       0.79      0.77      0.68      2603



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           N       0.86      1.00      0.93      2244
           S       0.00      0.00      0.00       359

    accuracy                           0.86      2603
   macro avg       0.43      0.50      0.46      2603
weighted avg       0.74      0.86      0.80      2603

              precision    recall  f1-score   support

           F       0.74      0.84      0.79      1408
           T       0.77      0.66      0.71      1195

    accuracy                           0.75      2603
   macro avg       0.76      0.75      0.75      2603
weighted avg       0.76      0.75      0.75      2603

              precision    recall  f1-score   support

           J       0.75      0.22      0.34      1030
           P       0.65      0.95      0.77      1573

    accuracy                           0.66      2603
   macro avg       0.70      0.59      0.56      2603
weighted avg       0.69      0.66      0.60      2603



#### Quick test

In [None]:
sample = base.encode(['I prefer to spend my weekends at home cuddled up with my dog. I prefer to stay home and read books and study by myself.'])

In [None]:
sample.shape

(1, 768)

In [None]:
mbti_model.m0.predict(sample)

array(['I'], dtype=object)

In [None]:
mbti_model.m0.predict_proba(sample)

array([[0.07669628, 0.92330372]])

In [None]:
mbti_model.m1.predict_proba(sample)

array([[0.96441426, 0.03558574]])

In [None]:
mbti_model.m1.predict(sample)

array(['N'], dtype=object)

In [None]:
def predict(self, x: np.ndarray):
    mbti_label = []
    diff_score = []
    prediction = None
    for model_num in range(4):
        model = getattr(self, f'm{model_num}')
        logits = model.predict_proba(x).flatten()
    
        if prediction is None:
            prediction = logits 
        else:
            prediction = np.vstack([prediction, logits])

        diff_score += [abs(np.diff(logits))[0]]
        mbti_label += [model.predict(x)[0]]
    return mbti_label, diff_score, prediction 

In [None]:
predict(mbti_model, sample)

(['I', 'N', 'T', 'P'],
 [0.8466074496516511,
  0.9288285261849469,
  0.2267328705395908,
  0.4191881268469968],
 array([[0.07669628, 0.92330372],
        [0.96441426, 0.03558574],
        [0.38663356, 0.61336644],
        [0.29040594, 0.70959406]]))

In [None]:
mbti_label, diff_score, logits = predict(mbti_model, sample)

#### Unsure with what I was trying to do here

In [None]:
np.mean(diff_score)

0.6053392433057965

In [None]:
np.random.dirichlet(alpha=np.ones(4))

array([0.07634267, 0.22388623, 0.4259738 , 0.2737973 ])

In [None]:
np.full((4,), 0.9)

array([0.9, 0.9, 0.9, 0.9])

In [None]:
np.full((4,), 0.8)

array([0.8, 0.8, 0.8, 0.8])

In [None]:
fake = np.full((4,), 0.8)

In [None]:
diff_score

[0.8466074496516511,
 0.9288285261849469,
 0.2267328705395908,
 0.4191881268469968]

In [None]:
np.array(diff_score)

array([0.84660745, 0.92882853, 0.22673287, 0.41918813])

In [None]:
np.vstack([np.array(diff_score), fake, np.ones(fake.shape[0])])

array([[0.84660745, 0.92882853, 0.22673287, 0.41918813],
       [0.8       , 0.8       , 0.8       , 0.8       ],
       [1.        , 1.        , 1.        , 1.        ]])

In [None]:
np.vstack([np.array(diff_score), fake, np.ones(fake.shape[0])]).T

array([[0.84660745, 0.8       , 1.        ],
       [0.92882853, 0.8       , 1.        ],
       [0.22673287, 0.8       , 1.        ],
       [0.41918813, 0.8       , 1.        ]])

In [None]:
A = np.vstack([np.array(diff_score), fake, np.ones(fake.shape[0])])

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)

(array([ 0.51194411,  0.59289939, -0.09838746,  0.09110494]),
 array([], dtype=float64),
 2,
 array([2.84432982e+00, 5.25719458e-01, 1.10225117e-16]))

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0]

array([ 0.51194411,  0.59289939, -0.09838746,  0.09110494])

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.array(diff_score)

0.9999999999999998

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.random.dirichlet(alpha=np.ones((4,)))

0.514211478525303

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.random.dirichlet(alpha=np.ones((4,)))

0.13713949866913058

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.array([0.9, 0.4, 0.1, 0.1])

0.6971812031708431

In [None]:
from scipy.special import gammaln, psi

def kl_divergence_dirichlet(alpha1, alpha2):
    sum_alpha1 = np.sum(alpha1)
    sum_alpha2 = np.sum(alpha2)
    kl = gammaln(sum_alpha2) - gammaln(sum_alpha1) + np.sum(gammaln(alpha1) - gammaln(alpha2))
    kl += np.sum((alpha1 - alpha2) * (psi(alpha1) - psi(sum_alpha1)))
    return kl

In [None]:
kl_divergence_dirichlet(alpha1=np.array(diff_score), alpha2=np.array([0.9, 0.4, 0.1, 0.1]))

-5.727473672324946

In [None]:
kl_divergence_dirichlet(alpha1=np.array(diff_score), alpha2=np.random.dirichlet(alpha=np.ones((4,))))

-8.34048808291522

In [None]:
kl_divergence_dirichlet(alpha1=np.array(diff_score), alpha2=np.random.dirichlet(alpha=np.ones((4,))))

-6.230302155657039

In [None]:
np.log(np.array(diff_score) / np.random.dirichlet(alpha=np.ones((4,))))

array([0.25033317, 1.77251946, 1.1139163 , 1.35028691])

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.log(np.array(diff_score) / np.random.dirichlet(alpha=np.ones((4,))))

1.3302291735473333

In [None]:
np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.log(np.array(diff_score) / np.ones((4,))/4)

-1.583769584196391

In [None]:
np.log(np.linalg.lstsq(A, np.ones((3,)), rcond=None)[0] @ np.array(diff_score) / np.random.dirichlet(alpha=np.ones((4,))))

array([2.99600615, 2.88790846, 0.25886622, 2.1005031 ])

### Concluding Model

In [5]:
# Import necessary libraries
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
class KittenBreeder:
    def __init__(self):
        self.m0 = LogisticRegression(max_iter=500)
        self.m1 = LogisticRegression(max_iter=500)
        self.m2 = LogisticRegression(max_iter=500)
        self.m3 = LogisticRegression(max_iter=500)
        
    def train(self, model: LogisticRegression, X_train: np.ndarray, y_train: np.ndarray, class_weights: np.ndarray, X_test: np.ndarray, y_test: np.ndarray):
        model.fit(X_train, y_train, class_weights)
        y_pred = model.predict(X_test)
        print(classification_report(y_test, y_pred))
        
    def update(self, X: np.ndarray, *ys: tuple):
        for model_num, y in enumerate(ys):
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
            self.train(getattr(self, f'm{model_num}'), X_train, y_train[:, 0], y_train[:, 1], X_test, y_test[:, 0])

    def predict(self, x: np.ndarray):
        mbti_label = []
        diff_score = []
        prediction = None
        
        for model_num in range(4):
            model = getattr(self, f'm{model_num}')
            logits = model.predict_proba(x).flatten()
        
            if prediction is None:
                prediction = logits 
            else:
                prediction = np.vstack([prediction, logits])

            diff_score += [abs(np.diff(logits))[0]]
            mbti_label += [model.predict(x)[0]]

        return mbti_label, diff_score, prediction 


In [None]:
breed = KittenBreeder()

In [None]:
breed.update(X, df[['x0', 'x0_weights']].values, df[['x1', 'x1_weights']].values, df[['x2', 'x2_weights']].values, df[['x3', 'x3_weights']].values)

              precision    recall  f1-score   support

           E       0.84      0.03      0.05       600
           I       0.77      1.00      0.87      2003

    accuracy                           0.77      2603
   macro avg       0.81      0.51      0.46      2603
weighted avg       0.79      0.77      0.68      2603



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           N       0.86      1.00      0.93      2244
           S       0.00      0.00      0.00       359

    accuracy                           0.86      2603
   macro avg       0.43      0.50      0.46      2603
weighted avg       0.74      0.86      0.80      2603

              precision    recall  f1-score   support

           F       0.74      0.84      0.79      1408
           T       0.77      0.66      0.71      1195

    accuracy                           0.75      2603
   macro avg       0.76      0.75      0.75      2603
weighted avg       0.76      0.75      0.75      2603

              precision    recall  f1-score   support

           J       0.75      0.22      0.34      1030
           P       0.65      0.95      0.77      1573

    accuracy                           0.66      2603
   macro avg       0.70      0.59      0.56      2603
weighted avg       0.69      0.66      0.60      2603



#### Saving model

In [None]:
import joblib 

In [None]:
joblib.dump(breed, 'mbtiModel.pkl')

['mbtiModel.pkl']

In [None]:
breed.predict(sample)

(['I', 'N', 'T', 'P'],
 [0.8466074496516511,
  0.9288285261849469,
  0.2267328705395908,
  0.4191881268469968],
 array([[0.07669628, 0.92330372],
        [0.96441426, 0.03558574],
        [0.38663356, 0.61336644],
        [0.29040594, 0.70959406]]))

In [None]:
1 / (1 + np.array(diff_score))

array([0.54153361, 0.5184494 , 0.8151734 , 0.70462822])

In [3]:
import os 
os.getcwd()

'/Users/mimiphan/Library/Mobile Documents/com~apple~CloudDocs/Documents/MBA Documents/MyPlayground/tomemotions/dataUtils'

In [6]:
import joblib

breed = joblib.load('../src/mbtiModel.pkl')

In [7]:
breed

<__main__.KittenBreeder at 0x1687114c0>

In [8]:
breed.m0.coef_

array([[-8.74776586e-02,  4.19864408e-01,  9.66286919e-02,
        -1.49661359e-01, -4.99225588e-01, -2.44154123e-01,
         5.08316302e-01,  6.08051911e-01,  5.72583175e-01,
         5.17950658e-01, -4.30810659e-01, -3.57538458e-01,
         1.57955266e-01, -1.46721723e-01, -6.23456310e-01,
        -6.58874368e-01,  2.13883568e-01, -9.39740672e-02,
        -9.37602333e-02,  6.12485337e-02, -2.29343366e-01,
         3.51997195e-01,  9.08273215e-02, -3.22085099e-01,
         2.07765955e-01, -9.52325401e-01,  3.72439590e-01,
        -1.88002978e-01, -2.51365501e-01, -4.79689153e-02,
        -3.84031801e-01, -2.57277386e-01, -4.00516020e-01,
         4.37564280e-01,  3.12728795e-06,  3.15524215e-01,
         5.36217641e-01,  8.64524639e-02,  3.54271448e-01,
        -3.18985478e-01,  3.74561105e-01, -1.12023062e-01,
         4.63463360e-01, -1.35463415e-01, -4.77969204e-01,
        -4.20267024e-01,  3.06348260e-02, -1.57645021e-02,
        -3.38088374e-01, -6.63133885e-02, -6.95599937e-0