In [2]:
from factor_analyzer import FactorAnalyzer
import pandas as pd

import plotly.express as px
import numpy as np
from datasets import LABELS, personality_data
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)
np.set_printoptions(suppress=True)

In [3]:
train_X = personality_data()

In [4]:
train_X.head(3)

Unnamed: 0,EXT1,EXT2,EXT3,EXT4,EXT5,EXT6,EXT7,EXT8,EXT9,EXT10,...,OPN1,OPN2,OPN3,OPN4,OPN5,OPN6,OPN7,OPN8,OPN9,OPN10
0,-1.30137,0.172303,-1.060083,1.501992,-1.00009,2.119765,-1.265998,-1.112142,0.769334,1.107542,...,-0.567012,0.833287,-0.001519,0.91924,-0.802284,1.907975,0.022656,-0.147631,-1.076614,-0.928417
1,1.069992,-1.337456,1.408993,-0.920664,1.347528,-1.141234,0.876902,-1.112142,0.769334,-1.957826,...,1.160477,-0.066307,-0.914146,0.001835,0.206784,0.096609,1.013476,0.649624,0.841243,0.03921
2,-1.30137,1.682062,0.585967,1.501992,0.564989,1.304515,1.591201,0.460581,-1.458029,0.3412,...,0.296732,-0.066307,0.911109,1.836645,0.206784,-0.809074,1.013476,0.649624,-0.117685,1.006837


In [5]:
def component_table(model, df, component):
    mask = fa.loadings_[:,component] > 0.05
    values = fa.loadings_[:,component][mask]
    short_labels = df.columns[mask]

    df_components = pd.DataFrame(values, columns=['component'])
    df_components['short'] = short_labels
    df_components['long'] = short_labels
    df_components['long'].replace(LABELS, inplace=True)

    df_components = df_components.sort_values('component', ascending=False)

    return df_components

In [6]:
LABELS

{'EXT1': 'I am the life of the party.',
 'EXT2': "I don't talk a lot.",
 'EXT3': 'I feel comfortable around people.',
 'EXT4': 'I keep in the background.',
 'EXT5': 'I start conversations.',
 'EXT6': 'I have little to say.',
 'EXT7': 'I talk to a lot of different people at parties.',
 'EXT8': "I don't like to draw attention to myself.",
 'EXT9': "I don't mind being the center of attention.",
 'EXT10': 'I am quiet around strangers.',
 'EST1': 'I get stressed out easily.',
 'EST2': 'I am relaxed most of the time.',
 'EST3': 'I worry about things.',
 'EST4': 'I seldom feel blue.',
 'EST5': 'I am easily disturbed.',
 'EST6': 'I get upset easily.',
 'EST7': 'I change my mood a lot.',
 'EST8': 'I have frequent mood swings.',
 'EST9': 'I get irritated easily.',
 'EST10': 'I often feel blue.',
 'AGR1': 'I feel little concern for others.',
 'AGR2': 'I am interested in people.',
 'AGR3': 'I insult people.',
 'AGR4': "I sympathize with others' feelings.",
 'AGR5': "I am not interested in other pe

## Plot (3 factors)

In [7]:
fa = FactorAnalyzer(rotation='varimax', n_factors=3)
fa.fit(train_X)

FactorAnalyzer(rotation='varimax', rotation_kwargs={})

In [11]:
fa.loadings_.shape

(50, 3)

In [12]:
train_X.shape

(500000, 50)

In [6]:
# Possibly "introvertness"
component_table(fa, train_X, 0)

Unnamed: 0,component,short,long
1,0.677416,EXT4,I keep in the background.
0,0.669473,EXT2,I don't talk a lot.
4,0.63318,EXT10,I am quiet around strangers.
2,0.554505,EXT6,I have little to say.
3,0.55246,EXT8,I don't like to draw attention to myself.
15,0.412873,AGR7,I am not really interested in others.
14,0.264118,AGR5,I am not interested in other people's problems.
12,0.250768,EST10,I often feel blue.
19,0.202056,CSN7,I like order.
6,0.18355,EST3,I worry about things.


In [7]:
# posibly "instability"
component_table(fa, train_X, 1)

Unnamed: 0,component,short,long
9,0.715791,EST8,I have frequent mood swings.
8,0.695974,EST7,I change my mood a lot.
7,0.664234,EST6,I get upset easily.
11,0.626999,EST10,I often feel blue.
4,0.619715,EST1,I get stressed out easily.
10,0.602925,EST9,I get irritated easily.
18,0.579607,CSN4,I make a mess of things.
5,0.549148,EST3,I worry about things.
6,0.492551,EST5,I am easily disturbed.
19,0.438541,CSN6,I often forget to put things back in their pro...


In [8]:
# possibly "empathy"
component_table(fa, train_X, 2)

Unnamed: 0,component,short,long
11,0.571873,AGR4,I sympathize with others' feelings.
14,0.56909,AGR9,I feel others' emotions.
13,0.491601,AGR8,I take time out for others.
12,0.44794,AGR6,I have a soft heart.
17,0.442584,CSN3,I pay attention to details.
21,0.435257,CSN10,I am exacting in my work.
20,0.43176,CSN9,I follow a schedule.
10,0.427829,AGR2,I am interested in people.
15,0.424471,AGR10,I make people feel at ease.
19,0.409149,CSN7,I like order.


In [34]:
loadings = pd.DataFrame(fa.loadings_, columns=['introvertness', 'instability', 'empathy'])
loadings['feature'] = train_X.columns
loadings['description'] = LABELS.values()

In [35]:
fig = px.scatter_3d(loadings, x='introvertness', y='instability', z='empathy', hover_data=['feature', 'description'])
fig.update_layout(width=1300, height=800)
fig.show()

In [9]:
transformed_X = fa.transform(train_X)

In [10]:
transformed_df = pd.DataFrame(transformed_X, columns=['introvertness','instability','empathy'])
transformed_df = transformed_df.sample(transformed_df.shape[0]//30)
transformed_df['size'] = [1 for i in range(transformed_df.shape[0])]
fig = px.scatter_3d(transformed_df, x='introvertness', y='instability', z='empathy', size='size', size_max=5)
fig.update_layout(width=1300, height=800)
fig.show()

## 4 factors - Non-orthogonal rotation

In [11]:
fa = FactorAnalyzer(rotation='promax', n_factors=4)
fa.fit(train_X)

FactorAnalyzer(n_factors=4, rotation_kwargs={})

### Phlegmatic
individuals tend to be relaxed, peaceful, quiet, and easy-going. They are sympathetic and care about others, yet they try to hide their emotions. Phlegmatic individuals are also good at generalising ideas or problems to the world and making compromises.

In [13]:
component_table(fa, train_X, 0)

Unnamed: 0,component,short,long
1,0.684042,EXT4,I keep in the background.
0,0.682577,EXT2,I don't talk a lot.
4,0.637124,EXT10,I am quiet around strangers.
3,0.54771,EXT8,I don't like to draw attention to myself.
2,0.534169,EXT6,I have little to say.
13,0.460916,AGR7,I am not really interested in others.
12,0.318309,AGR5,I am not interested in other people's problems.
10,0.237456,EST10,I often feel blue.
17,0.198337,CSN7,I like order.
11,0.156981,AGR1,I feel little concern for others.


### Choleric
individuals tend to be more extroverted. They are described as independent, decisive, goal-oriented, and ambitious. These combined with their dominant, result-oriented outlook make them natural leaders. In Greek, Medieval, and Renaissance thought, they were also violent, vengeful, and short-tempered.

In [14]:
component_table(fa, train_X, 1)

Unnamed: 0,component,short,long
10,0.706433,EST8,I have frequent mood swings.
9,0.69009,EST7,I change my mood a lot.
8,0.683599,EST6,I get upset easily.
5,0.663708,EST1,I get stressed out easily.
12,0.62981,EST10,I often feel blue.
6,0.616219,EST3,I worry about things.
11,0.596263,EST9,I get irritated easily.
20,0.506722,CSN4,I make a mess of things.
7,0.501378,EST5,I am easily disturbed.
21,0.361002,CSN6,I often forget to put things back in their pro...


### Melancholic
individuals tend to be analytical and detail-oriented, and they are deep thinkers and feelers. They are introverted and try to avoid being singled out in a crowd. A melancholic personality leads to self-reliant individuals who are thoughtful, reserved, and often anxious. They often strive for perfection within themselves and their surroundings, which leads to tidy and detail-oriented behavior.

In [15]:
component_table(fa, train_X, 2)

Unnamed: 0,component,short,long
10,0.57074,AGR4,I sympathize with others' feelings.
13,0.534186,AGR9,I feel others' emotions.
19,0.506737,CSN9,I follow a schedule.
12,0.467699,AGR8,I take time out for others.
11,0.467151,AGR6,I have a soft heart.
17,0.458355,CSN5,I get chores done right away.
15,0.436219,CSN1,I am always prepared.
18,0.433664,CSN7,I like order.
16,0.363152,CSN3,I pay attention to details.
20,0.353908,CSN10,I am exacting in my work.


### Sanguine
personality type is described primarily as being highly talkative, enthusiastic, active, and social. Sanguines tend to be more extroverted and enjoy being part of a crowd; they find that being social, outgoing, and charismatic is easy to accomplish. Individuals with this personality have a hard time doing nothing and engage in more risk seeking behavior.

In [16]:
component_table(fa, train_X, 3)

Unnamed: 0,component,short,long
23,0.673981,OPN10,I am full of ideas.
19,0.605122,OPN5,I have excellent ideas.
17,0.593316,OPN1,I have a rich vocabulary.
21,0.580987,OPN8,I use difficult words.
18,0.531017,OPN3,I have a vivid imagination.
20,0.515996,OPN7,I am quick to understand things.
22,0.398822,OPN9,I spend time reflecting on things.
16,0.292218,CSN10,I am exacting in my work.
13,0.28525,CSN3,I pay attention to details.
2,0.165786,EXT9,I don't mind being the center of attention.
