In [5]:
import pandas as pd
import glob
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf
from matplotlib.backends.backend_pdf import PdfPages

%matplotlib inline 


# Seaborn visualization library
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import KMeans

from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression


from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.ensemble import RandomForestClassifier

import scipy

In [2]:
pd.set_option('display.max_columns', 1000)
pd.options.display.max_rows=1000

pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

plt.rcParams.update({'figure.max_open_warning': 0})


In [3]:
import random
random.seed( 0 )

# Load in Data

In [4]:
root = r"../data/input/07 Samsung UX Index - Web App Implementation/"

fname_data = root + r"Samsung UX Index Survey_Data.csv"
df_data = pd.read_csv(fname_data)

fname_vaxmap = root + r"Samsung UX Index Survey_Datamap.xlsx"
df_varmap = pd.read_excel(fname_vaxmap, header=1, sheet_name=0)
df_valmap = pd.read_excel(fname_vaxmap, header=1, sheet_name=1)

  interactivity=interactivity, compiler=compiler, result=result)


# Examine ATTRIBUTE Importance (Zclass)

In [13]:
path = r'/Users/lubagloukhov/Documents/Consulting/Samsung/UXi/data/output'
all_files = glob.glob(path + "*/*/Seg1_KNN3_zclust0.05.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    df['analysisloc'] = filename.split('/')[9]
    li.append(df)

zclust_frame = pd.concat(li, axis=0, ignore_index=True)

print(zclust_frame.shape)
zclust_frame.head()

(2484, 8)


Unnamed: 0,Variable,Label,interp,clusterA,clusterB,stat,pvalue,analysisloc
0,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,,0,0,0.0,1.0,20191214_144414_activitiesxsatisfactionxdemog
1,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Accept H0: cluster 0 <= cluster 1,0,1,2.230173,0.025819,20191214_144414_activitiesxsatisfactionxdemog
2,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2,0,2,-3.704824,0.000224,20191214_144414_activitiesxsatisfactionxdemog
3,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Accept H0: cluster 1 >= cluster 0,1,0,-2.230173,0.025819,20191214_144414_activitiesxsatisfactionxdemog
4,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,,1,1,0.0,1.0,20191214_144414_activitiesxsatisfactionxdemog


In [23]:
frame_Accept005 = frame[frame.interp.fillna(value='').str.contains('Reject')]

print(frame_Accept005.shape)
print(float(frame_Accept005.shape[0]/2484))
frame_Accept005.head()

(370, 8)
0.14895330112721417


Unnamed: 0,Variable,Label,interp,clusterA,clusterB,stat,pvalue,analysisloc
2,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2,0,2,-3.704824,0.0002242572,20191214_144414_activitiesxsatisfactionxdemog
5,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 1 < cluster 2,1,2,-5.217989,2.237562e-07,20191214_144414_activitiesxsatisfactionxdemog
6,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 0,2,0,3.704824,0.0002242572,20191214_144414_activitiesxsatisfactionxdemog
7,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 1,2,1,5.217989,2.237562e-07,20191214_144414_activitiesxsatisfactionxdemog
28,d3_4,D3. Not currently employed or in school - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 1,0,1,-3.375084,0.0007492897,20191214_144414_activitiesxsatisfactionxdemog


In [21]:
frame_Accept005.groupby('Variable').Label.count().sort_values()

Variable
d4_3      2
d7_4      2
d3_2      2
d4_7      2
d7_97     2
d6        4
d4_4      4
d1_3      6
d4_5      6
d3_3     12
d7_2     16
d7_99    20
d4_2     24
d4_6     30
d4_1     30
d7_1     32
d3_1     42
d1_1     42
d1_2     44
d3_4     48
Name: Label, dtype: int64

In [36]:
frame_Accept001 = frame_Accept005[frame_Accept005.pvalue<.0000001]
# frame[frame.interp.fillna(value='').str.contains('Reject')]

print(frame_Accept001.shape)
print(float(frame_Accept001.shape[0]/2484))
frame_Accept001.head()

(42, 8)
0.016908212560386472


Unnamed: 0,Variable,Label,interp,clusterA,clusterB,stat,pvalue,analysisloc
416,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2,0,2,-5.852438,5.889341e-09,20191214_143445_usagemetricsxdemog
420,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 0,2,0,5.852438,5.889341e-09,20191214_143445_usagemetricsxdemog
461,d7_1,D7. White - What is your race?,Reject H0 in favor of Ha: cluster 0 > cluster 2,0,2,6.007209,2.361054e-09,20191214_143445_usagemetricsxdemog
465,d7_1,D7. White - What is your race?,Reject H0 in favor of Ha: cluster 2 < cluster 0,2,0,-6.007209,2.361054e-09,20191214_143445_usagemetricsxdemog
595,d1_1,D1. Are you? Male,Reject H0 in favor of Ha: cluster 0 < cluster 1,0,1,-8.38066,1.232077e-16,20191214_143445_usagemetricsxdemog


In [54]:
print(len(frame_Accept001.groupby('analysisloc').Variable.count()))
frame_Accept001.groupby('analysisloc').Variable.count()


5


analysisloc
20191214_143315_loyaltymetricsxdemog            8
20191214_143350_overallqualityxdemog            2
20191214_143445_usagemetricsxdemog             12
20191214_143836_activitiesximportancexdemog    12
20191214_144107_ activitiesxrecencyxdemog??     8
Name: Variable, dtype: int64

In [78]:
frame_Accept001_usage = frame_Accept001[frame_Accept001.analysisloc==
                                        '20191214_143350_overallqualityxdemog']
print(frame_Accept001_usage.groupby(['Variable','Label']).interp.count())
frame_Accept001_usage[['Label','interp']]#.interp


Variable  Label                                                                                                  
d3_1      D3. Student (part-or full-time) - Which of the following best describes your current employment status?    2
Name: interp, dtype: int64


Unnamed: 0,Label,interp
2279,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2
2283,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 0


# Examine ATTITUDE Importance (Zclass)