## Import Libraries

In [None]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

## Helper Functions

### Column Namer

In [None]:


def columnNamer(cols, isBigFive, isHultDna):
    '''
    This function renames columns depending if they are from the big 5 or 
    from Hult DNA. The format of the columns is <B5 or DNA>_Q<n>__<NAME_OF_COLUMN>.
    
    ----------------------
    Params
    ----------------------
    
    cols: Column name list.
    isBigFive: Boolean. If True, function will assume that all the columns are from the big five. 
    isHuldDNA: Boolean. If True, function will assume that all the columns are from the hult dna.
    
    '''
    

    cols = cols.str.lower()
    cols = cols.str.replace(' ', '_') # Fill spaces with underscore 
    cols = cols.str.replace("'", '') # Remove quotes from questions


    if isBigFive == True:
        _cols =  []
        
        for i in range(len(cols)):
#             _cols.append(f'B5_Q{i+1}__{cols[i]}')
            _cols.append(f'B5__{cols[i]}')
        return _cols

    elif isHultDna == True: 

        _cols =  []

        for i in range(len(cols)):
            _cols.append(f'DNA_Q{i+1}__{cols[i]}')
        return _cols 

    else:
        print('Call Manwe, Morgoth introduced a bug in this function')

## Domain Knowledge Research


### Nemid & Pastva (2013)

 - Big Five personality traits did not differentiate between Mac and PC owners. Students overall rated Macs higher on various product attributes (attractive style, cool, youthful, and exciting) and PCs higher on reasonable price and good for gaming.
 
 - PC owners placed greater importance on cost as a determinant of brand choice, whereas Mac owners placed greater emphasis on style. 
 
 - Personality traits may have more nuanced effects on brand choices, as shown by relationships between Neuroticism and greater importance placed on cost and lesser importance placed on ease of use. 
     - **Personality Traits are more important in the brand choice!**
     - More neuroticism, more importance in cost and less importance in ease of use
     
- Openness to Experience was associated with greater importance placed on reliability and lesser importance placed on style.
    - **More openness to experience, more importance in reliability and less importance in style**
    
    
https://www.researchgate.net/publication/259540094_I'm_a_Mac_versus_I'm_a_PC_Personality_Differences_between_Mac_and_PC_Users_in_a_College_Sample

<br> 

___________

<br>

### PC World

<br>


- People who purchase Macs fall into what the branding company calls the "Openness 5" personality category -- which means they are more liberal, less modest and more assured of their own superiority than the population at large.
-  People from Openness 5 seek rich, varied and novel experiences, according to the company, and believe that imagination and intellectual curiosity are as important to life as more rational or pragmatic endeavors.
    - Hypothesis: Mac users will have more opennes to new adventures. 

## Data Exploration

### General Data Set Exploration

- 1 missing value in ethnicity
- 1 participant said MAC instead of Macbook

In [None]:
# Read Data
original_df = pd.read_excel('Survey_Data_Final_Exam.xlsx')

original_df.loc[:,'What laptop do you currently have?'].value_counts()

### Dividing Columns Big5 vs Hult DNA

In [None]:

big_five = original_df.iloc[:, 1:51] # Subset big five-related columns
huld_dna = original_df.iloc[:,52:72] # Subset Hult DNA



# Change Column Names
big_five.columns = columnNamer(big_five.columns,
            isBigFive=True, 
            isHultDna=False)

huld_dna.columns = columnNamer(huld_dna.columns,
            isBigFive=False,
           isHultDna=True)

### Creating factors big5

according to this version of the 

In [None]:
fac1_neg = ["Don't talk a lot",
"Keep in the background",
"Have little to say",
"Don't like to draw attention to myself",
"Am quiet around strangers"
]

fac2_neg = ["Am not really interested in others",
"Insult people",
"Am not interested in other people's problems",
"Feel little concern for others"
]

fac3_neg = ["Leave my belongings around",
"Make a mess of things",
"Often forget to put things back in their proper place",
"Shirk my duties"
]


fac4_neg = ["Get stressed out easily",
"Worry about things",
"Am easily disturbed",
"Get upset easily",
"Change my mood a lot",
"Have frequent mood swings",
"Get irritated easily",
"Often feel blue"
]


fac5_neg = ["Have difficulty understanding abstract ideas",
"Am not interested in abstract ideas",
"Do not have a good imagination"]

fac_all_neg = fac1_neg + fac2_neg + fac3_neg + fac4_neg + fac5_neg

fac_all_neg_cols = columnNamer(pd.Series(fac_all_neg), isBigFive=True, isHultDna=False)

# big_five.mean()

big_five.loc[:,fac_all_neg_cols].replace(5,1, inplace= True)
big_five.loc[:, fac_all_neg_cols].replace(4,2, inplace= True)

### Creating Factors based on Original Study

In [None]:
#### Factor I (Surgency or Extraversion)

fac1_pos = [
    "am the life of the party",
    "Feel comfortable around people",
    "Start conversations", 
    "Talk to a lot of different people at parties",
    "Don't mind being the center of attention"
    ]

fac1_neg = [ 
    "Don't talk a lot", 
    "Keep in the background", 
    "Have little to say",
    "Don't like to draw attention to myself", 
    "Am quiet around strangers"
]

## Factor II (Agreeableness)

fac2_pos = [
    "Am interested in people", 
    "Sympathize with others' feelings",
    "Have a soft heart", 
    "Take time out for others", 
    "Feel others' emotions",
    "Make people feel at ease"
    ]

fac2_neg = [
    "Am not really interested in others", 
    "Insult people",
    "Am not interested in other people's problems",
    "Feel little concern for others"
]


## Factor III (Conscientiousness)

fac3_pos = [
    "Am always prepared", 
    "Pay attention to details",
    "Get chores done right away",
    "Like order", 
    "Follow a schedule",
    "Am exacting in my work"
]

fac3_neg = [
    "Leave my belongings around", 
    "Make a mess of things",
    "Often forget to put things back in their proper place",
    "Shirk my duties"
]

##Factor IV (Emotional Stability)

fac4_pos = [
            "Am relaxed most of the time", 
            "Seldom feel blue"
]

fac4_neg = [
    "Get stressed out easily",
    "Worry about things", 
    "Am easily disturbed",
    "Get upset easily",
    "Change my mood a lot", 
    "Have frequent mood swings",
    "Get irritated easily",
    "Often feel blue"
]

## Factor V (Intellect or Imagination)

fac5_pos = [
    "Have a rich vocabulary",
    "Have a vivid imagination",
    "Have excellent ideas", 
    "Am quick to understand things",
    "Use difficult words", 
    "Spend time reflecting on things",
    "Am full of ideas"
]

fac5_neg = [
    "Have difficulty understanding abstract ideas",
    "Am not interested in abstract ideas",
    "Do not have a good imagination"
]




fac1_all = fac1_pos + fac2_neg
fac2_all = fac2_pos + fac2_neg
fac3_all = fac3_pos + fac3_neg
fac4_all = fac4_pos + fac4_neg
fac5_all = fac4_pos + fac5_neg

big_five_final = pd.DataFrame(

{
    'extraversion':big_five[columnNamer(pd.Series(fac1_all), isBigFive=True, isHultDna=False)].sum(axis = 1),

    'agreeableness':big_five[columnNamer(pd.Series(fac2_all), isBigFive=True, isHultDna=False)].sum(axis = 1),

    'conscientiousness':big_five[columnNamer(pd.Series(fac3_all), isBigFive=True, isHultDna=False)].sum(axis = 1),
    
    'emotional_stability':big_five[columnNamer(pd.Series(fac4_all), isBigFive=True, isHultDna=False)].sum(axis = 1),

    'intellect':big_five[columnNamer(pd.Series(fac5_all), isBigFive=True, isHultDna=False)].sum(axis = 1)

})

## Modelling


### PCA

In [None]:
st_scaler = StandardScaler()

big_five_scaled = st_scaler.fit_transform(big_five_final)

pca = PCA(n_components=5)

pca_fit = pca.fit_transform(big_five_scaled)

factor_loadings = pd.DataFrame(np.transpose(pca.components_))
factor_loadings.set_index(big_five_final.columns, inplace = True)

# factor_loadings.to_excel('')
# factor_loadings.abs().sort_values(0, ascending = False)

# pd.DataFrame(pca_fit, columns = [0,1,2,3,4])
