In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # graph plotting
from sklearn.cluster import MiniBatchKMeans # kmeans


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Use pandas library to read data from .csv file and store it into a dataframe 'df'

In [None]:
df= pd.read_csv('/kaggle/input/big-five-personality-test/IPIP-FFM-data-8Nov2018/data-final.csv', delimiter='\t')

df

In [None]:
columns= df.columns

# display all the columns in the dataset
for c in columns:
    print(c)

Here, columns 1 to 50 represents the test questions.
* Questions 1 to 10 trying to find out whether the person is extrovert or not.
* Questions 11 to 20 trying to find out whether the person is neurotic or not.
* Questions 21 to 30 trying to find out whether the person is agreeable or not.
* Questions 31 to 40 trying to find out whether the person is conscientious or not.
* Questions 41 to 50 trying to find out whether the person is open to new experience or not.

Remaining 10 columns contain metadata such as width and height of user's screen when they were taking these tests.

In [None]:
# save first 50 columns in variable X

X= df[df.columns[0:50]]

In [None]:
# to see entire dataframe

pd.set_option("display.max_columns", None)

In [None]:
X

In [None]:
# fill in missing values with zeros

X= X.fillna(0)

Now, we will run a k-means algorithm with number of clusters, batch size, random state and maximum iteration as a parameters.
* Here, number of clusters are 10. We can change it to any number
* Our batch size is 100 i.e. amount of data trained at once

In [None]:
kmeans= MiniBatchKMeans(n_clusters= 10, random_state= 0, batch_size= 100, max_iter= 100).fit(X)

In [None]:
# length of a cluster

len(kmeans.cluster_centers_)

In [None]:
one= kmeans.cluster_centers_[0] #personality type 1

two= kmeans.cluster_centers_[1]

three= kmeans.cluster_centers_[2]

four= kmeans.cluster_centers_[3]

five= kmeans.cluster_centers_[4]

six= kmeans.cluster_centers_[5]

seven= kmeans.cluster_centers_[6]

eight= kmeans.cluster_centers_[7]

nine= kmeans.cluster_centers_[8]

ten= kmeans.cluster_centers_[9]

In [None]:
# cluster one

one

Now, we will calculate scores of cluster one.
Here we added and substracted the questions depends upon the type of the personality.

In [None]:
one_scores= {}

one_scores['extroversion_score']= one[0]- one[1]+ one[2]- one[3]+ one[4]- one[5]+ one[6]- one[7]+ one[8]- one[9]
one_scores['neuroticism_score']= one[0]- one[1]+ one[2]- one[3]+ one[4]+ one[5]+ one[6]+ one[7]+ one[8]+ one[9]
one_scores['agreeableness_score']= -one[0]+ one[1]- one[2]+ one[3]- one[4]- one[5]+ one[6]- one[7]+ one[8]+ one[9]
one_scores['conscientiousness_score']= one[0]- one[1]+ one[2]- one[3]+ one[4]- one[5]+ one[6]- one[7]+ one[8]+ one[9]
one_scores['openness_score']= one[0]- one[1]+ one[2]- one[3]+ one[4]- one[5]+ one[6]+ one[7]+ one[8]+ one[9]

In [None]:
one_scores

In [None]:
# calculating scores for all 10 clusters

all_types= {'one':one, 'two':two, 'three':three, 'four':four, 'five':five, 'six':six, 'seven':seven, 'eight':eight,
            'nine':nine, 'ten':ten}

all_types_scores= {}

for name, personality_type in all_types.items():
    personality_trait= {}
    
    personality_trait['extroversion_score']= personality_type[0]- personality_type[1]+ personality_type[2]- personality_type[3]+ personality_type[4]- personality_type[5]+ personality_type[6]- personality_type[7]+ personality_type[8]- personality_type[9]
    personality_trait['neuroticism_score']= personality_type[0]- personality_type[1]+ personality_type[2]- personality_type[3]+ personality_type[4]+ personality_type[5]+ personality_type[6]+ personality_type[7]+ personality_type[8]+ personality_type[9]
    personality_trait['agreeableness_score']= -personality_type[0]+ personality_type[1]- personality_type[2]+ personality_type[3]- personality_type[4]- personality_type[5]+ personality_type[6]- personality_type[7]+ personality_type[8]+ personality_type[9]
    personality_trait['conscientiousness_score']= personality_type[0]- personality_type[1]+ personality_type[2]- personality_type[3]+ personality_type[4]- personality_type[5]+ personality_type[6]- personality_type[7]+ personality_type[8]+ personality_type[9]
    personality_trait['openness_score']= personality_type[0]- personality_type[1]+ personality_type[2]- personality_type[3]+ personality_type[4]- personality_type[5]+ personality_type[6]+ personality_type[7]+ personality_type[8]+ personality_type[9]
    
    all_types_scores[name]= personality_trait

In [None]:
all_types_scores

The magnitudes of these scores are not relative to each other. To compare the types, we will normalize the data.

In [None]:
all_extroversion= []
all_neuroticism= []
all_agreeableness= []
all_conscientiousness= []
all_openness= []

for personlity_type, personality_trait in all_types_scores.items():
    all_extroversion.append(personality_trait['extroversion_score'])
    all_neuroticism.append(personality_trait['neuroticism_score'])
    all_agreeableness.append(personality_trait['agreeableness_score'])
    all_conscientiousness.append(personality_trait['conscientiousness_score'])
    all_openness.append(personality_trait['openness_score'])

Now, we will use the equation for normalization i.e. norm(i)= x(i)-min(x) / max(x)-min(x)

In [None]:
all_extroversion_normalized= (all_extroversion-min(all_extroversion))/(max(all_extroversion)- min(all_extroversion))
all_neuroticism_normalized= (all_neuroticism-min(all_neuroticism))/(max(all_neuroticism)- min(all_neuroticism))
all_agreeableness_normalized= (all_agreeableness-min(all_agreeableness))/(max(all_agreeableness)- min(all_agreeableness))
all_conscientiousness_normalized= (all_conscientiousness-min(all_conscientiousness))/(max(all_conscientiousness)- min(all_conscientiousness))
all_openness_normalized= (all_openness-min(all_openness))/(max(all_openness)- min(all_openness))

In [None]:
# normalized array of the type 'extroversion'

all_extroversion_normalized

In [None]:
# normalizing all the data

counter= 0

normalized_all_types_scores= {}

for personality_type, personality_trait in all_types_scores.items():
    normalized_personality_trait= {}
    normalized_personality_trait['extroversion_score']= all_extroversion_normalized[counter]
    normalized_personality_trait['neuroticism_score']= all_neuroticism_normalized[counter]
    normalized_personality_trait['agreeableness_score']= all_agreeableness_normalized[counter]
    normalized_personality_trait['conscientiousness_score']= all_conscientiousness_normalized[counter]
    normalized_personality_trait['openness_score']= all_openness_normalized[counter]
    
    normalized_all_types_scores[personality_type]= normalized_personality_trait

    counter+= 1

In [None]:
normalized_all_types_scores

Now, we are plotting the bar graph with respect to all the personality types for all the clusters.

In [None]:
# for cluster one

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['one'].keys()), normalized_all_types_scores['one'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster two

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['two'].keys()), normalized_all_types_scores['two'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster three

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['three'].keys()), normalized_all_types_scores['three'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster four

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['four'].keys()), normalized_all_types_scores['four'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster five

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['five'].keys()), normalized_all_types_scores['five'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster six

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['six'].keys()), normalized_all_types_scores['six'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster seven

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['seven'].keys()), normalized_all_types_scores['seven'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster eight

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['eight'].keys()), normalized_all_types_scores['eight'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster nine

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['nine'].keys()), normalized_all_types_scores['nine'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

In [None]:
# for cluster ten

plt.figure(figsize= (15,5))
plt.ylim(0, 1)
plt.bar(list(normalized_all_types_scores['ten'].keys()), normalized_all_types_scores['ten'].values(), color=['darkred', 'olive', 'darkorange', 'turquoise', 'indigo'])
plt.show()

**Thank You...**