In [369]:
%matplotlib inline

import pandas as pd
import numpy as np
from enum import Enum

from sklearn import linear_model
import scipy as sp

In [192]:
# ENUMS:
class AgeRange(Enum):
    MINUS18 = 0
    B2130 = 1
    B3140 = 2
    B4150 = 3
    PLUS50 = 4

class Education(Enum):
    HSCHOOL = 0
    JCOLLEGE = 1
    UNIVERSITY = 2
    MASTERS = 3
    PHD = 4
    OTHER = 5
    
class Gender(Enum):
    MALE = 0
    FEMALE = 1

class SCORETYPE(Enum):
    ICEDONUK = 0
    DISADONUK = 1
    DUYUSAL = 2
    SEZGISEL = 3
    DUSUNSEL = 4
    DUYGUSAL = 5
    YARGISAL = 6
    ALGISAL = 7

In [193]:
answerSheet = {
    1 : [ SCORETYPE.SEZGISEL, SCORETYPE.DUYUSAL],
    2 : [ SCORETYPE.ICEDONUK, SCORETYPE.DISADONUK],
    3 : [ SCORETYPE.DUSUNSEL, SCORETYPE.DUYGUSAL],
    4 : [ SCORETYPE.YARGISAL, SCORETYPE.ALGISAL],
    5 : [ SCORETYPE.DUSUNSEL, SCORETYPE.DUYGUSAL],
    6 : [ SCORETYPE.ICEDONUK, SCORETYPE.DISADONUK],
    7 : [ SCORETYPE.YARGISAL, SCORETYPE.ALGISAL],
    8 : [ SCORETYPE.ALGISAL, SCORETYPE.YARGISAL],
    9 : [ SCORETYPE.YARGISAL, SCORETYPE.ALGISAL],
    10 : [ SCORETYPE.SEZGISEL, SCORETYPE.DUYUSAL],
    11 : [ SCORETYPE.ICEDONUK, SCORETYPE.DISADONUK],
    12 : [ SCORETYPE.DUSUNSEL, SCORETYPE.DUYGUSAL],
    13 : [ SCORETYPE.DUYUSAL, SCORETYPE.SEZGISEL],
    14 : [ SCORETYPE.DUYGUSAL, SCORETYPE.DUSUNSEL],
    15 : [ SCORETYPE.DISADONUK, SCORETYPE.ICEDONUK],
    16 : [ SCORETYPE.DUYUSAL, SCORETYPE.SEZGISEL],
    17 : [ SCORETYPE.DUYUSAL, SCORETYPE.SEZGISEL],
    18 : [ SCORETYPE.ALGISAL, SCORETYPE.YARGISAL],
    19 : [ SCORETYPE.DISADONUK, SCORETYPE.ICEDONUK],
    20 : [ SCORETYPE.DUSUNSEL, SCORETYPE.DUYGUSAL],
    21 : [ SCORETYPE.DUYUSAL, SCORETYPE.SEZGISEL],
    22 : [ SCORETYPE.ICEDONUK, SCORETYPE.DISADONUK],
    23 : [ SCORETYPE.ALGISAL, SCORETYPE.YARGISAL],
    24 : [ SCORETYPE.DUYGUSAL, SCORETYPE.DUSUNSEL],
    25 : [ SCORETYPE.DUSUNSEL, SCORETYPE.DUYGUSAL],
    26 : [ SCORETYPE.YARGISAL, SCORETYPE.ALGISAL],
    27 : [ SCORETYPE.DISADONUK, SCORETYPE.ICEDONUK],
    28 : [ SCORETYPE.SEZGISEL, SCORETYPE.DUYUSAL],
    29 : [ SCORETYPE.DUYGUSAL, SCORETYPE.DUSUNSEL],
    30 : [ SCORETYPE.SEZGISEL, SCORETYPE.DUYUSAL],
    31 : [ SCORETYPE.YARGISAL, SCORETYPE.ALGISAL],
    32 : [ SCORETYPE.DISADONUK, SCORETYPE.ICEDONUK]
}

In [335]:
data = pd.read_csv("kpa-data.csv", sep=";", names = ["PType", "Age", "Gender", "Education", "Answer"]);

# Remove First 3 rows
for dataIndex in range(3):
    data = data.drop([dataIndex])
data = data.reset_index(drop = True)

dataRowCount = data.shape[0]

#Remove commas from answer column
for row in range(dataRowCount):
    data["Answer"][row] = data["Answer"][row].replace(",", "")

# Convert String Labels to Enums
for row in range(dataRowCount):
    if data["Age"][row] == "21-30":
        data["Age"][row] = AgeRange.B2130.value
    elif data["Age"][row] == "31-40":
        data["Age"][row] = AgeRange.B3140.value
    elif data["Age"][row] == "41-50":
        data["Age"][row] = AgeRange.B4150.value
    elif data["Age"][row] == "20 ve altı":
        data["Age"][row] = AgeRange.MINUS18.value
    elif data["Age"][row] == "50 ve yukarısı":
        data["Age"][row] = AgeRange.PLUS50.value

for row in range(dataRowCount):
    if data["Education"][row] == "Lise":
        data["Education"][row] = Education.HSCHOOL.value
    elif data["Education"][row] == "Yüksek Okul (2 yıl)":
        data["Education"][row] = Education.JCOLLEGE.value
    elif data["Education"][row] == "Üniversite":
        data["Education"][row] = Education.UNIVERSITY.value
    elif data["Education"][row] == "Yüksek Lisans":
        data["Education"][row] = Education.MASTERS.value
    elif data["Education"][row] == "Doktora":
        data["Education"][row] = Education.PHD.value
    elif data["Education"][row] == "Lise öncesi":
        data["Education"][row] = Education.OTHER.value
        
for row in range(dataRowCount):
    if data["Gender"][row] == "Erkek":
        data["Gender"][row] = Gender.MALE.value
    elif data["Gender"][row] == "Kadın":
        data["Gender"][row] = Gender.FEMALE.value

In [336]:
dataRowCount = data.shape[0]
newAnswerTotal = []
for row in range(dataRowCount):
    newAnswer = []
    for choice in range(len(data["Answer"][0])):
        if data["Answer"][row][choice] == "a":
            newAnswer.append(answerSheet[choice + 1][0])
        else:
            newAnswer.append(answerSheet[choice + 1][1])
    newAnswerTotal.append(newAnswer)

for row in range(dataRowCount):
    data["Answer"][row] = newAnswerTotal[row]

In [337]:
#Function return Personality type by subject's row number
def getPersonalityType(rowNumber):
    iceDonukCounter = 0
    duyusalCounter = 0
    dusunselCounter = 0
    yargisalCounter = 0
    disaDonukCounter = 0
    sezgiselCounter = 0
    duygusalCounter = 0
    algisalCounter = 0

    for choice in range(len(data["Answer"][rowNumber])):
        if data["Answer"][rowNumber][choice] == SCORETYPE.ICEDONUK:
            iceDonukCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.DISADONUK:
            disaDonukCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.DUYUSAL:
            duyusalCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.SEZGISEL:
            sezgiselCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.DUSUNSEL:
            dusunselCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.DUYGUSAL:
            duygusalCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.YARGISAL:
            yargisalCounter += 1
        elif data["Answer"][rowNumber][choice] == SCORETYPE.ALGISAL:
            algisalCounter += 1

    personalityType = ""

    if iceDonukCounter > disaDonukCounter:
        personalityType += "Ç"
    else:
        personalityType += "Ş"

    if sezgiselCounter > duyusalCounter:
        personalityType += "S"
    else:
        personalityType += "D"

    if  duygusalCounter > dusunselCounter:
        personalityType += "U"
    else:
        personalityType += "Ü"

    if algisalCounter > yargisalCounter:
        personalityType += "A"
    else:
        personalityType += "Y"
    
    return personalityType

In [371]:
def 
dataRowCount = data.shape[0]

predictors = []
for rowNumber in range(dataRowCount):
    if data["Answer"][rowNumber][0] == SCORETYPE.DUYUSAL:        
        predictors.append(0)
    elif data["Answer"][rowNumber][0] == SCORETYPE.SEZGISEL:
        predictors.append(1)
predicted = []
for rowNumber in range(dataRowCount):
    if "D" in data["PType"][rowNumber]:        
        predicted.append(0)
    elif "S" in data["PType"][rowNumber]:
        predicted.append(1)



In [373]:
sp.stats.pearsonr(predictorsAsDuyusalVSSezgisel, predictedAsDuyusalVSSezgisel)[0]

0.4054489460834503