In [None]:
# Personality Strength: Study 1 Converter
# This will convert the .csv file from Qualtrics into a more usable format with exploratory analyses and reverse scoring.
# Created by Tom Tibbett on Nov. 20th, 2015

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.sandbox.stats.multicomp import TukeyHSDResults
pd.options.display.max_rows = 300
pd.options.display.max_columns = 300
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
# Note on the original file - make sure that the raw data does not include PIIs like names
# Call the relevant file; make sure it is in the same folder as this program.
file='PersonalityStr1_n17.csv'

# What do you want it to be called?  It will output in the same folder as the Python script.
csvout='PersonalityStr.csv'

In [None]:
df= pd.read_csv(file, header=0, skiprows=1)

In [None]:
df.head()

In [None]:
df.columns.values

In [None]:
df= df.drop(['ResponseID', 'ResponseSet', 'Name', 'ExternalDataReference', 'EmailAddress', 'IPAddress', 'Status', 'StartDate', 'EndDate', 'Finished', 'PSSTotal-sum', 'PSS1-sum', 'PSS2-sum', 'PSS3-sum', 'PSS4-sum', 'PSS5-sum', 'O-sum', 'C-sum', 'E-sum', 'A-sum', 'N-sum', 'AttCons-sum', 'Identity-sum', 'DP-sum', 'Authenticity-sum', 'LifeSat-sum', 'Flexibility-sum', 'Rigidity-sum', 'EmotionalInt-sum', 'MeaninginLife-sum', 'PresenceMIL-sum', 'SearchingMIL-sum', 'Directions:\xc2\xa0Finally, we would like to know a little about you. \xc2\xa0Can you tell us a little about yo...', 'LocationLatitude', 'LocationLongitude', 'LocationAccuracy', 'Unnamed: 223'], axis=1)

In [None]:
df

In [None]:
df.columns= ['PSSTotalMean', 'PSSTotalSD', 'PSS1Mean', 'PSS1SD', 'PSS2Mean', 'PSS2SD', 'PSS3Mean', 'PSS3SD', 'PSS4Mean', 'PSS4SD',
            'PSS5Mean', 'PSS5SD', 'O', 'O_SD', 'C', 'C_SD', 'E', 'E_SD', 'A', 'A_SD', 'N', 'N_SD', 'AttConsMean', 'AttConsSD', 
            'IdentityMean', 'IdentitySD', 'DPMean', 'DPSD', 'AuthMean', 'AuthSD', 'LifeSatMean', 'LifeSatSD', 'FlexMean', 'FlexSD',
            'RigidityMean', 'RigiditySD', 'EmotIntMean', 'EmotIntSD', 'MILMean', 'MILSD', 'PresenceMean', 'PresenceSD', 'SearchingMean',
            'SearchingSD', 'id', 'PSS1_1', 'PSS1_2', 'PSS1_3', 'PSS1_4', 'PSS1_5', 'PSS1_6', 'PSS2_1', 'PSS2_2', 'PSS2_3', 'PSS2_4',
            'PSS2_5', 'PSS2_6', 'PSS3_1', 'PSS3_2', 'PSS3_3', 'PSS3_4', 'PSS3_5', 'PSS3_6', 'PSS3_7', 'PSS4_1', 'PSS4_2', 'PSS4_3',
            'PSS4_4', 'PSS4_5', 'PSS4_6', 'PSS4_7', 'PSS4_8', 'PSS4_9', 'PSS4_10', 'PSS5_1', 'PSS5_2', 'PSS5_3', 'PSS5_4', 'PSS5_5',
            'PSS5_6', 'PSS5_7', 'PSS5_8', 'PSS5_9', 'PSS5_10', 'PSS5_11', 'PSS5_12', 'PSS5_13', 'PSS5_14', 'PSS5_15', 'E1', 'A1', 'C1',
            'N1', 'O1', 'E2', 'A2', 'C2', 'N2', 'O2', 'E3', 'A3', 'C3', 'N3', 'O3', 'E4', 'A4', 'C4', 'N4', 'O4', 'AttCons1', 'AttCons2',
            'AttCons3', 'AttCons4', 'AttCons5', 'AttCons6', 'AttCons7', 'AttCons8', 'Identity1', 'Identity2', 'Identity3', 'Identity4',
            'Identity5', 'DP1', 'DP2', 'DP3', 'DP4', 'DP5', 'Auth1', 'Auth2', 'Auth3', 'Auth4', 'Auth5', 'Auth6', 'Auth7', 'Auth8',
            'Auth9', 'Auth10', 'Auth11', 'Auth12', 'LifeSat1', 'LifeSat2', 'LifeSat3', 'LifeSat4', 'LifeSat5', 'Flex1', 'Flex2', 'Flex3',
            'Flex4', 'Flex5', 'Flex6', 'Flex7', 'Flex8', 'Flex9', 'Flex10', 'Rigid1', 'Rigid2', 'Rigid3', 'Rigid4', 'Rigid5', 'Rigid6',
            'Rigid7', 'Rigid8', 'Rigid9', 'Rigid10', 'EmoInt1', 'EmoInt2', 'EmoInt3', 'EmoInt4', 'EmoInt5', 'EmoInt6', 'EmoInt7',
            'MIL1', 'MIL2', 'MIL3', 'MIL4', 'MIL5', 'MIL6', 'MIL7', 'MIL8', 'MIL9', 'MIL10', 'Gender', 'GenderO', 'Class',
            'Pol', 'PolO', 'Age']

In [None]:
df

In [None]:
print 'Frequencies - Gender'

Gender= df['Gender'].groupby(df['Gender']).count()
Gender=pd.DataFrame(Gender)
Gender.index=['Male', 'Female']
Gender.columns=['Count']
Gender

In [None]:
print 'Frequencies - Political Affiliation'
Pol= df['Pol'].groupby(df['Pol']).count()
Pol=pd.DataFrame(Pol)
Pol.columns=['Count']
Pold = dict([(1, "Independent"),
            (2, "Democrat"),
            (3, "Republican"),
            (4, "Libertarian"),
            (5, "Tea Party"),
            (6, "Other")])
print Pold
Pol

In [None]:
print 'Frequencies - Classification'
Class= df['Class'].groupby(df['Class']).count()
Class=pd.DataFrame(Class)
Class.columns=['Count']
Classd = dict([(1, "Freshman"),
            (2, "Sophomore"),
            (3, "Junior"),
            (4, "Senior"),
            (5, "Graduate Student"),
            (6, "Other")])
print Classd
Class

In [None]:
df[['Age', 'O', 'C', 'E', 'A', 'N', 'PSS1Mean', 'PSS2Mean', 'PSS3Mean', 'PSS4Mean', 
    'AttConsMean', 'AuthMean', 'DPMean', 'LifeSatMean', 'FlexMean', 'RigidityMean', 
    'EmotIntMean', 'MILMean', 'PresenceMean', 'SearchingMean']].describe()

In [None]:
# Reverse scoring.  See the codebook for details.
# Personality Strength Scale
df['PSS1_2R']=8-df['PSS1_2']
df['PSS2_1R']=8-df['PSS2_1']
df['PSS2_3R']=8-df['PSS2_3']
df['PSS2_5R']=8-df['PSS2_5']
df['PSS3_1R']=8-df['PSS3_1']
df['PSS3_2R']=8-df['PSS3_2']
df['PSS3_3R']=8-df['PSS3_3']
df['PSS3_4R']=8-df['PSS3_4']
df['PSS3_5R']=8-df['PSS3_5']
df['PSS3_7R']=8-df['PSS3_7']
df['PSS5_8R']=8-df['PSS5_8']
df['PSS5_12R']=8-df['PSS5_12']

# Attitude Consistency
df['AttCons1R']=8-df['AttCons1']
df['AttCons2R']=8-df['AttCons2']
df['AttCons3R']=8-df['AttCons3']
df['AttCons6R']=8-df['AttCons6']
df['AttCons7R']=8-df['AttCons7']

# Identity
df['Identity3R']=8-df['Identity3']

# MDMQ - Indecision needs no reverse coding

# Authenticity Scale
df['Auth2R']=8-df['Auth2']
df['Auth3R']=8-df['Auth3']
df['Auth4R']=8-df['Auth4']
df['Auth5R']=8-df['Auth5']
df['Auth6R']=8-df['Auth6']
df['Auth7R']=8-df['Auth7']
df['Auth10R']=8-df['Auth10']
df['Auth12R']=8-df['Auth12']

# Mini-IPIP
df['E2R']=8-df['E2']
df['E4R']=8-df['E4']
df['C2R']=8-df['C2']
df['C4R']=8-df['C4']
df['N2R']=8-df['N2']
df['N4R']=8-df['N4']
df['A2R']=8-df['A2']
df['A4R']=8-df['A4']
df['O2R']=8-df['O2']
df['O3R']=8-df['O3']
df['O4R']=8-df['O4']

# Satisfaction with Life scale needs no reverse coding.

# HEXACO Flexibility
df['Flex3R']=8-df['Flex3']
df['Flex4R']=8-df['Flex4']
df['Flex5R']=8-df['Flex5']
df['Flex6R']=8-df['Flex6']
df['Flex7R']=8-df['Flex7']
df['Flex8R']=8-df['Flex8']
df['Flex9R']=8-df['Flex9']
df['Flex10R']=8-df['Flex10']

# CAT PD - Rigidity needs no reverse coding

# Emotional Intelligence
df['EmoInt7R']=8-df['EmoInt7']

# Meaning in Life
df['MIL9R']=8-df['MIL9']

In [None]:
# Calculating Cronbach's alpha
 
def CronbachAlpha(itemscores):
    itemscores = np.asarray(itemscores)
    itemvars = itemscores.var(axis=1, ddof=1)
    tscores = itemscores.sum(axis=0)
    nitems = len(itemscores)

    return nitems / (nitems-1.) * (1 - itemvars.sum() / tscores.var(ddof=1))

#Defining the Measures to be tested
PSS1=[df.PSS1_1, df.PSS1_2R, df.PSS1_3, df.PSS1_4, df.PSS1_5, df.PSS1_6]
PSS2=[df.PSS2_1R, df.PSS2_2, df.PSS2_3R, df.PSS2_4, df.PSS2_5R, df.PSS2_6]
PSS3=[df.PSS3_1R, df.PSS3_2R, df.PSS3_3R, df.PSS3_4R, df.PSS3_5R, df.PSS3_6, df.PSS3_7R]
PSS4=[df.PSS4_1, df.PSS4_2, df.PSS4_3, df.PSS4_4, df.PSS4_5, df.PSS4_6, df.PSS4_7, df.PSS4_8, df.PSS4_9, df.PSS4_10]
Indecision=[df.DP1, df.DP2, df.DP3, df.DP4, df.DP5]
Flex=[df.Flex1, df.Flex2, df.Flex3R, df.Flex4R, df.Flex5R, df.Flex6R, df.Flex7R, df.Flex8R, df.Flex9R, df.Flex10R]
Rigidity=[df.Rigid1, df.Rigid2, df.Rigid3, df.Rigid4,df.Rigid5,df.Rigid6, df.Rigid7, df.Rigid8, df.Rigid9, df.Rigid10]
AttCon=[df.AttCons1R, df.AttCons2R, df.AttCons3R, df.AttCons4, df.AttCons5, df.AttCons6R, df.AttCons7R]
O=[df.O1, df.O2R, df.O3R, df.O4R]
C=[df.C1, df.C2R, df.C3, df.C4R]
E=[df.E1, df.E2R, df.E3, df.E4R]
A=[df.A1, df.A2R, df.A3, df.A4R]
N=[df.N1, df.N2R, df.N3, df.N4R]
LifeSat=[df.LifeSat1, df.LifeSat2, df.LifeSat3, df.LifeSat4, df.LifeSat5]
Auth=[df.Auth1, df.Auth2R, df.Auth3R, df.Auth4R, df.Auth5R, df.Auth6R, df.Auth7R, df.Auth8, df.Auth9, df.Auth10R, df.Auth11, df.Auth12R]
EmoInt=[df.EmoInt1, df.EmoInt2, df.EmoInt3, df.EmoInt4, df.EmoInt5, df.EmoInt6, df.EmoInt7R]
Searching=[df.MIL1, df.MIL4, df.MIL5, df.MIL6, df.MIL9R]
Presence=[df.MIL2, df.MIL3, df.MIL7, df.MIL8, df.MIL10]

In [None]:
print "Cronbach's Alphas - Measures of Internal Consistency Reliability"
print 'There is a missing value in Presence.  Working on figuring a way to circumvent this in Python.'
Alpha=[["PSS - Across Time", CronbachAlpha(PSS1)],
       ["PSS - Within Context", CronbachAlpha(PSS2)],
       ["PSS - Between Contexts", CronbachAlpha(PSS3)],
       ["PSS - Specific Situations", CronbachAlpha(PSS4)],
       ["MDMQ - Indecision", CronbachAlpha(Indecision)],
       ["HEXACO - Flexibility", CronbachAlpha(Flex)],
       ["CAT PD - Rigidity", CronbachAlpha(Rigidity)],
       ["Attitude Consistency", CronbachAlpha(AttCon)],
       ["Mini-IPIP - Intelligence", CronbachAlpha(O)],
       ["Mini-IPIP - Conscientiousness", CronbachAlpha(C)],
       ["Mini-IPIP - Extraversion", CronbachAlpha(E)],
       ["Mini-IPIP - Agreeableness", CronbachAlpha(A)],
       ["Mini-IPIP - Neuroticism", CronbachAlpha(N)],
       ["Satisfaction with Life Scale", CronbachAlpha(LifeSat)],
       ["Authenticity Scale", CronbachAlpha(Auth)],
       ["VIA - Emotional Intelligence", CronbachAlpha(EmoInt)],
       ["Meaning in Life - Presence", CronbachAlpha(Presence)],
       ["Meaning in Life - Searching", CronbachAlpha(Searching)]]
Alpha=pd.DataFrame(Alpha)
Alpha.columns=['Psychological Scale', 'Alpha']
Alpha

In [None]:
Con = df.C
Agr = df.A
Neu = df.N
Int = df.O
Ext = df.E

Y = df.PSS1Mean 
formula = 'Y ~ Con+Agr+Neu+Int+Ext'
lm = ols(formula, df).fit()
print '\nRegression Results - Consistency Across Time Subscale and Personality\n'
print lm.summary()

In [None]:
Y = df.PSS2Mean 
formula = 'Y ~ Con+Agr+Neu+Int+Ext'
lm = ols(formula, df).fit()
print '\nRegression Results - Consistency Within Context Subscale and Personality\n'
print lm.summary()

In [None]:
Y = df.PSS3Mean 
formula = 'Y ~ Con+Agr+Neu+Int+Ext'
lm = ols(formula, df).fit()
print '\nRegression Results - Consistency Between Contexts Subscale and Personality\n'
print lm.summary()

In [None]:
B = df.AuthMean
Y = df.PSS3Mean 
formula = 'Y ~ B'
lm = ols(formula, df).fit()
print '\nRegression Results - Consistency Between Contexts Subscale and Authenticity\n'
print lm.summary()

In [None]:
B = df.SearchingMean
Y = df.PSS4Mean 
formula = 'Y ~ B'
lm = ols(formula, df).fit()
print '\nRegression Results - Consistency Between Contexts Subscale and Searching for Meaning in Life\n'
print lm.summary()

In [None]:
B = df.EmotIntMean
Y = df.PSS1Mean 
formula = 'Y ~ B'
lm = ols(formula, df).fit()
print '\nRegression Results - Consistency Between Contexts Subscale and Emotional Intelligence\n'
print lm.summary()

In [None]:
df.to_csv(csvout, index=False, sep=',')