# Load trained models

If the models are not yet trained and saved to `/models` you should execute `train.ipynl` first.

In [3]:
import pickle
import os

def load_classifier(name):
    with open(os.path.join('models', name+'.pickle'), 'rb') as f:
        return pickle.load(f)
         
IntroExtro = load_classifier("IntroExtro")
IntuitionSensing = load_classifier("IntuitionSensing")
ThinkingFeeling = load_classifier("ThinkingFeeling")
JudgingPercieiving = load_classifier("JudgingPercieiving")
MainFunction = load_classifier("MainFunction")
DomFunction = load_classifier("DomFunction")
AuxFunction = load_classifier("AuxFunction")

# Testing the models to predict my trait my feeding few of my quora writings

Defining a functions that inputs the writings, tokenizes them and then predicts the output based on our earlier classifiers

In [11]:
import nltk
import string

useless_words = nltk.corpus.stopwords.words("english") + list(string.punctuation)
def build_bag_of_words_features_filtered(words):
    words = nltk.word_tokenize(words)
    return {
        word:1 for word in words \
        if not word in useless_words}

def MBTI(input):
    tokenize = build_bag_of_words_features_filtered(input)
    ie = IntroExtro.classify(tokenize)
    Is = IntuitionSensing.classify(tokenize)
    tf = ThinkingFeeling.classify(tokenize)
    jp = JudgingPercieiving.classify(tokenize)
    mainFunc = MainFunction.classify(tokenize)
    domFunc = DomFunction.classify(tokenize)
    auxFunc = AuxFunction.classify(tokenize)
    
    mbt = ''
    
    if(ie == 'introvert'):
        mbt+='I'
    if(ie == 'extrovert'):
        mbt+='E'
    if(Is == 'Intuition'):
        mbt+='N'
    if(Is == 'Sensing'):
        mbt+='S'
    if(tf == 'Thinking'):
        mbt+='T'
    if(tf == 'Feeling'):
        mbt+='F'
    if(jp == 'Judging'):
        mbt+='J'
    if(jp == 'Percieving'):
        mbt+='P'
    return(f"{mbt} (Main:{mainFunc}) (Dom: {domFunc}, Aux: {auxFunc})")
    

### Building another functions that takes all of my posts as input and outputs the graph showing percentage of each trait seen in each posts and sums up displaying your personality as the graph title

**Note:** The input should be an array of your posts

In [12]:
import pandas as pd

def tellmemyMBTI(input, name, traasits=[]):
    a = []
    trait1 = pd.DataFrame([0,0,0,0],['I','N','T','J'],['count'])
    trait2 = pd.DataFrame([0,0,0,0],['E','S','F','P'],['count'])
    for i in input:
        a += [MBTI(i)]
    for i in a:
        for j in ['I','N','T','J']:
            if(j in i):
                trait1.loc[j]+=1                
        for j in ['E','S','F','P']:
            if(j in i):
                trait2.loc[j]+=1 
    trait1 = trait1.T
    trait1 = trait1*100/len(input)
    trait2 = trait2.T
    trait2 = trait2*100/len(input)
    
    
    #Finding the personality
    YourTrait = ''
    for i,j in zip(trait1,trait2):
        temp = max(trait1[i][0],trait2[j][0])
        if(trait1[i][0]==temp):
            YourTrait += i  
        if(trait2[j][0]==temp):
            YourTrait += j
    traasits +=[YourTrait] 
    
    #Plotting
    
    labels = np.array(results.columns)

    intj = trait1.loc['count']
    ind = np.arange(4)
    width = 0.4
    fig = plt.figure()
    ax = fig.add_subplot(111)
    rects1 = ax.bar(ind, intj, width, color='royalblue')

    esfp = trait2.loc['count']
    rects2 = ax.bar(ind+width, esfp, width, color='seagreen')

    fig.set_size_inches(10, 7)
    
    

    ax.set_xlabel('Finding the MBTI Trait', size = 18)
    ax.set_ylabel('Trait Percent (%)', size = 18)
    ax.set_xticks(ind + width / 2)
    ax.set_xticklabels(labels)
    ax.set_yticks(np.arange(0,105, step= 10))
    ax.set_title('Your Personality is '+YourTrait,size = 20)
    plt.grid(True)
    
    
    fig.savefig(name+'.png', dpi=200)
    
    plt.show()
    return(traasits)
        

# Importing my quora answers from a text file

I copied all my answer from the link i provided before (i broke down the paragraphs as separte posts)

In [21]:
import os
from pathlib import Path

# base dataset path. You should place directories containing textes to be predicted.
dataset_path = str(Path.home())+ '/dataset/'

def collect_entries_for(directory_name: str):
    return [os.path.join(dirname, filename) \
            for dirname, _, filenames in os.walk(dataset_path + directory_name) \
             for filename in filenames]


journals_texts = collect_entries_for('journals')

stanbar_texts = collect_entries_for('stanbar')

# Derek Sivers's texts, he has a lot of texts and is known as INTJ
sivers_texts = collect_entries_for('sivers')

# Tim Ferris's texts, he has a lot of texts and is known as INTJ
ferriss_texts = collect_entries_for('ferriss')

# Will Smith's texts, he has a lot of texts and is known as ENFJ
will_texts = collect_entries_for('will')

# Gary's texts, he has a lot of texts and is known as ENFJ
gary_texts = collect_entries_for('gary')

# Branson's texts, he has a lot of texts and is known as ENFJ
branson_texts = collect_entries_for('branson')

# Oprah's texts, he has a lot of texts and is known as ENFJ
oprah_texts = collect_entries_for('oprah')

# Jacko's texts, he has a lot of texts and is known as ESFP
jacko_texts = collect_entries_for('jacko')

# Using the classifier to predict my personality type

In [14]:
def predict_for_texts(texts, verbose = False):
    for text in texts:
        lines = [line for line in [line.strip() for line in open(text).readlines()] if line]
        a = []
        trait1 = pd.DataFrame([0,0,0,0],['I','N','T','J'],['count'])
        trait2 = pd.DataFrame([0,0,0,0],['E','S','F','P'],['count'])
        for i in lines:
            guess_for_line = MBTI(i)
            if verbose:
                print(f"guess: {guess_for_line}")
            a += [guess_for_line]
        for i in a:
            for j in ['I','N','T','J']:
                if(j in i):
                    trait1.loc[j]+=1                
            for j in ['E','S','F','P']:
                if(j in i):
                    trait2.loc[j]+=1 
        trait1 = trait1.T
        trait1 = trait1*100/len(lines)
        trait2 = trait2.T
        trait2 = trait2*100/len(lines)
        #Finding the personality
        YourTrait = ''
        traasits=[]
        for i,j in zip(trait1,trait2):
            temp = max(trait1[i][0],trait2[j][0])
            if(trait1[i][0]==temp):
                YourTrait += i  
            if(trait2[j][0]==temp):
                YourTrait += j
        traasits +=[YourTrait] 

        print(YourTrait, text)

    
# trait = tellmemyMBTI(beginner_expert, "Stach")

In [22]:
#print("\n\nMy journals:")
#predict_for_texts(journals_texts)

# print("\n\nstan.bar posts:")
# predict_for_texts(stanbar_texts)

#print("\n\nDerek Sivers posts:")
#predict_for_texts(sivers_texts)


# print("\n\n ferriss_texts books:")
# predict_for_texts(ferriss_texts)

# print("\n\n will books:")
# predict_for_texts(will_texts)

print("\n\n Gary books:")
predict_for_texts(gary_texts)

print("\n\n Oprah books:")
predict_for_texts(oprah_texts)

print("\n\n Branson books:")
predict_for_texts(branson_texts)

print("\n\n Jacko books:")
predict_for_texts(jacko_texts)



 Gary books:
INTJ /Users/stanbar/dataset/gary/The Thank You Economy.txt
INTJ /Users/stanbar/dataset/gary/Crushing It.txt
INTJ /Users/stanbar/dataset/gary/Jab, Jab, Jab, Right Hook How to Tell Your Story in a Noisy Social World.txt


 Oprah books:
INFJ /Users/stanbar/dataset/oprah/What Happened to You Conversations on Trauma, Resilience, and Healing.txt
INFJ /Users/stanbar/dataset/oprah/The Wisdom of Sundays.txt
INFJ /Users/stanbar/dataset/oprah/The Path Made Clear Discovering Your Lifeâ€™s Direction and Purpose.txt
INFJ /Users/stanbar/dataset/oprah/What I Know For Sure.txt


 Branson books:
INTJ /Users/stanbar/dataset/branson/Finding My Virginity The New Autobiography (Richard Branson) (z-lib.org).txt
INTJ /Users/stanbar/dataset/branson/Like a Virgin (Richard Branson) (z-lib.org).txt
INTJ /Users/stanbar/dataset/branson/Screw It, Lets Do It Lessons in Life and Business (Richard Branson) (z-lib.org).txt
INTP /Users/stanbar/dataset/branson/Losing My Virginity How I Survived, Had Fun, an

# Concluding note

My profile according to https://www.16personalities.com/ is INTJ.

I am pretty happy that using such a basic model it was pretty close to my real profile, only 1 different. And even that difference was very close, between 10% inaccuary which pretty good.

Although, I am not sure how the classifier will perform on all test cases in general. Specially, the data for some profiles was very less.

In [None]:
# IntroExtro.show_most_informative_features()
# IntuitionSensing.show_most_informative_features()
# ThinkingFeeling.show_most_informative_features()
# JudgingPercieiving.show_most_informative_features()
MainFunction.show_most_informative_features()
# DomFunction.show_most_informative_features()
# AuxFunction.show_most_informative_features()