In [None]:
##############################################################################
                            #Importing Packages
##############################################################################

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import KMeans

In [None]:
##############################################################################
                            #User Defined Functions
##############################################################################

########################################
# inertia
########################################
def inertia_plot(data, max_clust = 50):
    """
PARAMETERS
----------
data      : DataFrame, data from which to build clusters. Dataset should be scaled
max_clust : int, maximum of range for how many clusters to check interia, default 50
    """

    ks = range(1, max_clust)
    inertias = []


    for k in ks:
        # INSTANTIATING a kmeans object
        model = KMeans(n_clusters = k)


        # FITTING to the data
        model.fit(data)


        # append each inertia to the list of inertias
        inertias.append(model.inertia_)



    # plotting ks vs inertias
    fig, ax = plt.subplots(figsize = (12, 8))
    plt.plot(ks, inertias, '-o')


    # labeling and displaying the plot
    plt.xlabel('number of clusters, k')
    plt.ylabel('inertia')
    plt.xticks(ks)
    plt.show()


########################################
# scree_plot
########################################
def scree_plot(pca_object, export = False):
    # building a scree plot

    # setting plot size
    fig, ax = plt.subplots(figsize=(10, 8))
    features = range(pca_object.n_components_)


    # developing a scree plot
    plt.plot(features,
             pca_object.explained_variance_ratio_,
             linewidth = 2,
             marker = 'o',
             markersize = 10,
             markeredgecolor = 'black',
             markerfacecolor = 'grey')


    # setting more plot options
    plt.title('Scree Plot')
    plt.xlabel('PCA feature')
    plt.ylabel('Explained Variance')
    plt.xticks(features)

    if export == True:
    
        # exporting the plot
        plt.savefig('top_customers_correlation_scree_plot.png')
        
    # displaying the plot
    plt.show()

In [None]:
##############################################################################
                                #Reading File
##############################################################################

survey_df = pd.read_excel('survey_data.xlsx')

pd.set_option('display.max_rows',500)
pd.set_option('display.max_columns',500)
pd.set_option('display.width',1000)
pd.set_option('display.max_colwidth',100)


In [None]:
##############################################################################
                        #Deleting Repeated Columns
##############################################################################

survey_df = survey_df.drop(labels=['Respond effectively to multiple priorities.1',
                                  'Take initiative even when circumstances, objectives, or rules aren\'t clear.1',
                                  'Encourage direct and open discussions.1'
                                 ],axis=1)

In [None]:
##############################################################################
                                #Renaming Columns
##############################################################################

survey_df.columns = ['surveyID',
                    'life_party',
                    'little_concern_others',
                    'always_prepared',
                    'stressed_easily',
                    'rich_vocabulary',
                    'dont_talk',
                    'interested_people',
                    'leave_belongings',
                    'relaxes_most_time',
                    'difficulty_understanding',
                    'comfortable_people',
                    'insult_people',
                    'attention_details',
                    'worry_things',
                    'vivid_imagination',
                    'keep_background',
                    'sympathize_others',
                    'make_mess',
                    'seldom_feel_blue',
                    'not_interest_abstract',
                    'start_conversations',
                    'not_interest_people',
                    'chores_done',
                    'easily_disturbed',
                    'excellent_ideas',
                    'little_say',
                    'soft_heart',
                    'forget_things',
                    'upset_easily',
                    'not_good_imagination',
                    'talk_people',
                    'not_interested_others',
                    'like_order',
                    'change_mood',
                    'quick_understand',
                    'draw_attention',
                    'take_time',
                    'shirt_duties',
                    'mood_swings',
                    'difficult_words',
                    'center_of_attention',
                    'feel_others',
                    'follow_schedule',
                    'irritated_easily',
                    'time_reflecting',
                    'quite_strangers',
                    'people_at_ease',
                    'exacting_word',
                    'often_feel_blue',
                    'full_ideas',
                    'underlying_patterns',
                    'no_new_ideas',
                    'awareness',
                    'growth_mindset',
                    'multiple_priorities',
                    'take_initiative',
                    'open_discussions',
                    'listen_carefully',
                    'dont_sell_idea',
                    'cooperative_relationships',
                    'work_diverse',
                    'effectively_negotiate',
                    'cant_rally_people',
                    'plans_organized',
                    'resolve_conflicts',
                    'seek_feedback',
                    'coach_teammates',
                    'drive_results',
                    'current_laptop',
                    'next_laptop',
                    'program',
                    'age',
                    'gender',
                    'nationality',
                    'ethnicity']


In [None]:
##############################################################################
                        #Checking for missing values
##############################################################################

survey_df.info()

survey_df.describe()

In [None]:
##############################################################################
                        #Demographic Analysis
##############################################################################

#CONVERSION RATES

pd.pivot_table(survey_df,index=["current_laptop",
                                   "next_laptop"],
               values=["surveyID"],aggfunc='count')



#CONVERSION RATES
#1.35% of Mac users would choose Chromebook as their next laptop of choice
#6.76% of Mac users would choose Windows as their next laptop of choice
#91.81% of Mac users would stick to Mac


#4.11% of Windows users would choose Chrome as their next laptop of choice
#20.55% of Windows users would choose Mac as their next laptop of choice
#75.34% of Windows users would stick to Windows

#FEMALE - MALE Laptop of choice
pd.pivot_table(survey_df,index=["gender",
                                   "current_laptop"],
               values=["surveyID"],aggfunc='count')

# 58.73% Female users use Mac 
# 41.27% Female users use Windows

# 44.05% Male users use Mac
# 55.95% Male users use Windows

pd.pivot_table(survey_df,index=["program",
                                   "current_laptop"],
               values=["surveyID"],aggfunc='count')

#side note only one respondant from DD (MBA & Disruptive innovation) might be an typo

#DD (MBA & Business Analytics)
# 34.21% of DD (MIB & Business Analytics) use Mac 
# 65.78% DD (MIB & Business Analytics) use Windows

#DD (MBA & Disruptive innovation)
# 100% DD (MBA & Disruptive innovation) use Mac (1 person)

#DD (MIB & Business Analytics)
# 63.77% of DD (MIB & Business Analytics) use Mac
# 36,.23% of DD (MIB & Business Analytics) use Windows

#One year Business Analytics
# 41.03% of One year Business Analytics use Mac
# 58.97% of One year Business Analytics use Windows

pd.pivot_table(survey_df,index=["ethnicity",
                                   "current_laptop"],
               values=["surveyID"],aggfunc='count')


#Laptop users by Ethnicity 

#African American 9 people
# 55.56% use Mac 
# 44.44% use Windows

#Far east Asian 31 people
# 58.06% use Mac 
# 41.93% use Windows

#Hispanic / Latino 26 people
# 26.92% use Mac 
# 73.08% use Windows

#Middle Eastern 3 people
# 33.33% use Mac 
# 66.67% use Windows

#Native American 1 person
# 100% use Mac 

#Prefer not to answer 11 people
# 54.55% use Mac 
# 45.45% use Windows

#West Asian / Indian 26 people
# 38.46% use Mac 
# 61.54% use Windows

#White / Caucasian 40 people
# 65.00% use Mac 
# 35.00% use Windows

In [None]:
##############################################################################
                    #Applying 5 Personality Traits Formula
##############################################################################

extroversion=[]
agreeableness=[]
conscientiousness=[]
neuroticism=[]
openness=[]

for i in range(0,len(survey_df)):
    extroversion.append(20)
    agreeableness.append(14)
    conscientiousness.append(14)
    neuroticism.append(38)
    openness.append(8)

for index,col in survey_df.iterrows():
    extroversion[index]=extroversion[index]+survey_df.iloc[index,1]-\
                        survey_df.iloc[index,6]+survey_df.iloc[index,11]-\
                        survey_df.iloc[index,16]+survey_df.iloc[index,21]-\
                        survey_df.iloc[index,26]+survey_df.iloc[index,31]-\
                        survey_df.iloc[index,36]+survey_df.iloc[index,41]-\
                        survey_df.iloc[index,46]
    
    agreeableness[index]=agreeableness[index]-survey_df.iloc[index,2]+\
                        survey_df.iloc[index,7]-survey_df.iloc[index,12]+\
                        survey_df.iloc[index,17]-survey_df.iloc[index,22]+\
                        survey_df.iloc[index,27]-survey_df.iloc[index,32]+\
                        survey_df.iloc[index,37]+survey_df.iloc[index,42]+\
                        survey_df.iloc[index,47]
    
    conscientiousness[index]=conscientiousness[index]+survey_df.iloc[index,3]-\
                        survey_df.iloc[index,8]+survey_df.iloc[index,13]-\
                        survey_df.iloc[index,18]+survey_df.iloc[index,23]-\
                        survey_df.iloc[index,28]+survey_df.iloc[index,33]-\
                        survey_df.iloc[index,38]+survey_df.iloc[index,43]+\
                        survey_df.iloc[index,48]
    
    neuroticism[index]=neuroticism[index]-survey_df.iloc[index,4]+\
                        survey_df.iloc[index,9]-survey_df.iloc[index,14]+\
                        survey_df.iloc[index,19]-survey_df.iloc[index,24]-\
                        survey_df.iloc[index,29]-survey_df.iloc[index,34]-\
                        survey_df.iloc[index,39]-survey_df.iloc[index,44]-\
                        survey_df.iloc[index,49]
    
    openness[index]=openness[index]+survey_df.iloc[index,5]-\
                        survey_df.iloc[index,10]+survey_df.iloc[index,15]-\
                        survey_df.iloc[index,20]+survey_df.iloc[index,25]-\
                        survey_df.iloc[index,30]+survey_df.iloc[index,35]+\
                        survey_df.iloc[index,40]+survey_df.iloc[index,45]+\
                        survey_df.iloc[index,50]

In [None]:
##############################################################################
                    #Concatenating New Columns to DF
##############################################################################

extroversion = pd.DataFrame(extroversion)
extroversion.columns = ['extroversion']

agreeableness = pd.DataFrame(agreeableness)
agreeableness.columns = ['agreeableness']

conscientiousness = pd.DataFrame(conscientiousness)
conscientiousness.columns = ['conscientiousness']

neuroticism = pd.DataFrame(neuroticism)
neuroticism.columns = ['neuroticism']

openness = pd.DataFrame(openness)
openness.columns = ['openness']

survey_df_processed = pd.concat([survey_df,extroversion,agreeableness,conscientiousness,neuroticism,openness],
                                axis=1
                               )

In [None]:
##############################################################################
                        #Deleting Pyschometrics
##############################################################################
#We will delete the first 50 columns corresponding to the psychometrics
#because we have summarized them using the formula for the Big 5 Traits

deleted_columns=[]
for i in range(1,51):
    deleted_columns.append(survey_df_processed.columns[i])

survey_df_processed = survey_df_processed.drop(labels=deleted_columns,axis=1)

In [None]:
##############################################################################
                            #Separating Columns
##############################################################################

#Separating Demographic Data

survey_demographic = survey_df_processed.iloc[:,[0,19,20,21,22,23,24,25]]

#Separating Psychometric Data

survey_psychometric = survey_df_processed.iloc[:,26:]

#Separating Hult DNA Data

survey_hult = survey_df_processed.iloc[:,1:19]

survey_demographic

In [None]:
##############################################################################
                     #Standardizing Data for Psychometrics
##############################################################################

# Scaling the data
scaler = StandardScaler()

scaler.fit(survey_psychometric)
big5_scaled = scaler.transform(survey_psychometric)
big5_scaled_df = pd.DataFrame(big5_scaled) 
big5_scaled_df.columns = survey_psychometric.columns

# checking pre- and post-scaling variance
print(pd.np.var(survey_psychometric), '\n\n')
print(pd.np.var(big5_scaled_df))

In [None]:
##############################################################################
                    #Creating PCA Model for Psychometrics
##############################################################################

# INSTANTIATING a PCA object with no limit to principal components
pca = PCA(n_components = None,
          random_state = 802)


# FITTING and TRANSFORMING the scaled data
customer_big5 = pca.fit_transform(big5_scaled_df)


# calling the scree_plot function
scree_plot(pca_object = pca)

Note:
    We will use 3 features for the PCA model because they explain most of the variance

In [None]:
##############################################################################
                #PCA Model with 3 Components for Psychometrics
##############################################################################

# INSTANTIATING a new model using the first three principal components
new_pca = PCA(n_components = 3,
            random_state = 802)


# FITTING and TRANSFORMING the purchases_scaled
customer_new_big5 = new_pca.fit_transform(big5_scaled_df)


# calling the scree_plot function
scree_plot(pca_object = new_pca)

In [None]:
##############################################################################
                #Factor Loadings Analysis for Psychometrics
##############################################################################

####################
### Max PC Model ###
####################
# transposing pca components (pc = MAX)
factor_loadings = pd.DataFrame(pd.np.transpose(pca.components_))

# naming rows as original features
factor_loadings = factor_loadings.set_index(big5_scaled_df.columns)


####################
### New PC Model ###
####################
# transposing pca components (pc = 3)
factor_loadings_new = pd.DataFrame(pd.np.transpose(new_pca.components_))

# naming rows as original features
factor_loadings_new= factor_loadings_new.set_index(big5_scaled_df.columns)


# checking the results
print(f"""
MAX Components Factor Loadings
------------------------------
{factor_loadings.round(2)}

3 Components Factor Loadings
------------------------------
{factor_loadings_new.round(2)}
""")

# checking the result
factor_loadings_new

# analyzing factor strengths per customer
X_pca_reduced = new_pca.transform(big5_scaled_df)


# converting to a DataFrame
X_pca_df = pd.DataFrame(X_pca_reduced)


# checking the results
X_pca_df

pd.np.var(X_pca_df)


Factor 0: It's low on everything, specially in agreeableness and conscientiouness, so we will call them Shy <br>
Factor 1: It's high on extroversion but also in neurotism, this means he/she cannot control too much their emotion. We will call them Uproared <br>
Factor 2: It's very agreeable and open, with a good control of emotions (low neurotism). We will called them Balanced <br>

In [None]:
##############################################################################
                    #Naming Components for Psychometrics
##############################################################################

factor_loadings_new.columns = ['Shy',
                               'Uproared',
                               'Balanced']

factor_loadings_new

In [None]:
##############################################################################
                    #Converting Psychometrics to Factors
##############################################################################

survey_big5 = new_pca.transform(big5_scaled_df)

survey_big5_df = pd.DataFrame(survey_big5)

survey_big5_df.columns = factor_loadings_new.columns

survey_big5_df

In [None]:
##############################################################################
                        #Arranging Data for Hult DNA
##############################################################################

#Aligning values for negative columns

#Grabbing the negative columns
hult_inverse = survey_hult.loc[:,['no_new_ideas','dont_sell_idea','cant_rally_people']]

#Inverting Values
for i in range(0,3):
    hult_inverse.iloc[:,i] = 6 - hult_inverse.iloc[:,i]
    
#Chaning names
hult_inverse.columns = ['new_ideas','sell_ideas','rally_people']

#Adding inverse columns and droping old ones
survey_hult_processed = survey_hult.drop(['no_new_ideas','dont_sell_idea','cant_rally_people'],axis=1)
survey_hult_processed = pd.concat([survey_hult_processed,hult_inverse],axis=1)



In [None]:
##############################################################################
                        #Standardizing Data for Hult DNA
##############################################################################
# scaling the data
scaler_hult = StandardScaler()

scaler_hult.fit(survey_hult_processed)
hult_scaled = scaler_hult.transform(survey_hult_processed)
hult_scaled_df = pd.DataFrame(hult_scaled) 
hult_scaled_df.columns = survey_hult_processed.columns

# checking pre- and post-scaling variance
print(pd.np.var(survey_hult), '\n\n')
print(pd.np.var(hult_scaled_df))

In [None]:
##############################################################################
                    #Creating PCA Model for Hult DNA
##############################################################################

# INSTANTIATING a PCA object with no limit to principal components
pca_hult = PCA(n_components = None,
          random_state = 802)


# FITTING and TRANSFORMING the scaled data
customer_hult = pca_hult.fit_transform(hult_scaled_df)


# calling the scree_plot function
scree_plot(pca_object = pca_hult)


We will use 3 components because they explain most of the variance in the data

In [None]:
##############################################################################
                #PCA Model with 3 Components for Hult DNA
##############################################################################

# INSTANTIATING a new model using the first three principal components
new_pca_hult = PCA(n_components = 3,
            random_state = 802)


# FITTING and TRANSFORMING the purchases_scaled
customer_new_hult = new_pca_hult.fit_transform(hult_scaled_df)

# calling the scree_plot function
scree_plot(pca_object = new_pca_hult)


In [None]:
##############################################################################
                #Factor Loadings Analysis for Hult DNA
##############################################################################

####################
### Max PC Model ###
####################
# transposing pca components (pc = MAX)
factor_loadings = pd.DataFrame(pd.np.transpose(pca_hult.components_))

# naming rows as original features
factor_loadings = factor_loadings.set_index(hult_scaled_df.columns)


####################
### New PC Model ###
####################
# transposing pca components (pc = 3)
factor_loadings_new = pd.DataFrame(pd.np.transpose(new_pca_hult.components_))

# naming rows as original features
factor_loadings_new= factor_loadings_new.set_index(hult_scaled_df.columns)


# checking the results
print(f"""
MAX Components Factor Loadings
------------------------------
{factor_loadings.round(2)}

3 Components Factor Loadings
------------------------------
{factor_loadings_new.round(2)}
""")

# checking the result
factor_loadings_new

# analyzing factor strengths per customer
X_pca_reduced = new_pca_hult.transform(hult_scaled_df)


# converting to a DataFrame
X_pca_df = pd.DataFrame(X_pca_reduced)


# checking the results
X_pca_df

pd.np.var(X_pca_df)


Factor 0: We can see the first component ranks negative in all elements of Hult DNA so we will name them Beginners <br>
Factor 1: We can see the second component is high on team building skills so we will name them TeamPlayers <br>
Factor 2: We can see the third component is high in the awareness skill so we will name the GrowthMindset <br>

In [None]:
##############################################################################
                    #Naming Components for Hult DNA
##############################################################################

factor_loadings_new.columns = ['Beginners',
                               'TeamPlayers',
                               'GrowthMindset']

factor_loadings_new

In [None]:
##############################################################################
                    #Converting Hult DNA Data to Factors
##############################################################################

survey_hult_dna = new_pca_hult.transform(hult_scaled_df)

survey_hult_dna_df = pd.DataFrame(survey_hult_dna)

survey_hult_dna_df.columns = factor_loadings_new.columns

survey_hult_dna_df

In [None]:
##############################################################################
                #Concatenating Data to Standardize and Cluster
##############################################################################

factorized_survey = pd.concat([survey_big5_df,survey_hult_dna_df],axis=1)

In [None]:
##############################################################################
                    #Standardizing Factorized Data
##############################################################################

# scaling the data
scaler_factorized = StandardScaler()

scaler_factorized.fit(factorized_survey)
factorized_scaled = scaler_factorized.transform(factorized_survey)
factorized_scaled_df = pd.DataFrame(factorized_scaled) 
factorized_scaled_df.columns = factorized_survey.columns

# checking pre- and post-scaling variance
print(pd.np.var(factorized_survey), '\n\n')
print(pd.np.var(factorized_scaled_df))

In [None]:
##############################################################################
                    #Dendrogram and Inertia Plot
##############################################################################

# grouping data based on Ward distance
standard_mergings_ward = linkage(y = factorized_scaled_df,
                                 method = 'ward')


# setting plot size
fig, ax = plt.subplots(figsize=(12, 12))

# developing a dendrogram
dendrogram(Z = standard_mergings_ward,
           leaf_rotation = 90,
           leaf_font_size = 6)


# saving and displaying the plot
plt.savefig('standard_hierarchical_clust_ward.png')
plt.show()

#Running Intertia Plot

inertia_plot(data = factorized_scaled_df)


Based on the dendrogram and the inertia plot we believe the optimal number of cluster will be 3

In [None]:
##############################################################################
                        #K-Means Model
##############################################################################

# INSTANTIATING a k-Means object with five clusters
customers_k_pca = KMeans(n_clusters = 3,
                        random_state = 802)


# fitting the object to the data
customers_k_pca.fit(factorized_scaled_df)


# converting the clusters to a DataFrame
customers_kmeans_pca = pd.DataFrame({'Cluster': customers_k_pca.labels_})


# checking the results
print(customers_kmeans_pca.iloc[: , 0].value_counts())

# storing cluster centers
centroids_pca = customers_k_pca.cluster_centers_


# converting cluster centers into a DataFrame
centroids_pca_df = pd.DataFrame(centroids_pca)

#Renaming columns for centroids
centroids_pca_df.columns = factorized_scaled_df.columns

# checking results (clusters = rows, pc = columns)
print(centroids_pca_df.round(2))

# concatinating cluster memberships with principal components
clst_pca_df = pd.concat([customers_kmeans_pca,
                          factorized_scaled_df],
                          axis = 1)
clst_pca_df

In [None]:
##############################################################################
                        #Adding Demographic Data
##############################################################################

#Lowercasing demographics

lower_laptop=[]
for i,col in survey_demographic.iterrows():
    lower_laptop.append(survey_demographic.loc[i,'current_laptop'].lower())

lower_next=[]
for i,col in survey_demographic.iterrows():
    lower_next.append(survey_demographic.loc[i,'next_laptop'].lower())

lower_program=[]
for i,col in survey_demographic.iterrows():
    lower_program.append(survey_demographic.loc[i,'program'].lower())

lower_gender=[]
for i,col in survey_demographic.iterrows():
    lower_gender.append(survey_demographic.loc[i,'gender'].lower())

lower_nationality=[]
for i,col in survey_demographic.iterrows():
    lower_nationality.append(survey_demographic.loc[i,'nationality'].lower())

lower_ethnicity=[]
for i,col in survey_demographic.iterrows():
    lower_ethnicity.append(survey_demographic.loc[i,'ethnicity'].lower())

#Converting to DataFrame    
lower_laptop = pd.DataFrame(lower_laptop)
lower_next = pd.DataFrame(lower_next)
lower_program = pd.DataFrame(lower_program)
lower_gender = pd.DataFrame(lower_gender)
lower_nationality = pd.DataFrame(lower_nationality)
lower_ethnicity = pd.DataFrame(lower_ethnicity)

lower_laptop.columns = ['current_laptop']
lower_next.columns = ['next_laptop']
lower_program.columns = ['program']
lower_gender.columns = ['gender']
lower_nationality.columns = ['nationality']
lower_ethnicity.columns = ['ethnicity']


# concatenating demographic information with pca-clusters
final_pca_clust_df = pd.concat([clst_pca_df,
                                lower_laptop,
                                lower_next,
                                lower_program,
                                lower_gender,
                                lower_nationality,
                                lower_ethnicity,
                                survey_demographic['age']
                               ],
                                  axis = 1)

final_pca_clust_df.head(n = 5)

In [None]:
final_pca_clust_df.to_excel('clustered_data.xlsx')

In [None]:
##############################################################################
                        #Graphical Analysis
##############################################################################

########################
# Gender
########################

sns.countplot(x = 'gender',
            hue = 'Cluster',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()


sns.countplot(x = 'Cluster',
            hue = 'gender',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize = (12, 8))
sns.countplot(x = 'program',
            hue = 'Cluster',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize = (12, 8))
sns.countplot(x = 'Cluster',
            hue = 'program',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize = (12, 8))
sns.countplot(x = 'ethnicity',
            hue = 'Cluster',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize = (12, 8))
sns.countplot(x = 'next_laptop',
            hue = 'Cluster',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize = (12, 8))
sns.countplot(x = 'Cluster',
            hue = 'next_laptop',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

fig, ax = plt.subplots(figsize = (12, 8))
sns.countplot(x = 'Cluster',
            hue = 'current_laptop',
            data = final_pca_clust_df)

plt.tight_layout()
plt.show()

In [None]:
##############################################################################
                        #Graphical Analysis
##############################################################################

########################
# Gender
########################

var = ['Shy',
        'Uproared',
        'Balanced',
        'Beginners',
        'TeamPlayers',
        'GrowthMindset']

"""current_laptop
next_laptop
program
gender
nationality
ethnicity
age"""

for col in var:
    fig, ax = plt.subplots(figsize = (12, 8))
    sns.boxplot(x = 'current_laptop',
                y = col,
                hue = 'Cluster',
                data = final_pca_clust_df)
    plt.ylim(-10, 19)
    plt.tight_layout()
    plt.show()
