In [5]:
import pandas as pd
import glob
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.backends.backend_pdf
from matplotlib.backends.backend_pdf import PdfPages

%matplotlib inline 


# Seaborn visualization library
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import KMeans

from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression


from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.ensemble import RandomForestClassifier

import scipy

In [2]:
pd.set_option('display.max_columns', 1000)
pd.options.display.max_rows=1000

pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

plt.rcParams.update({'figure.max_open_warning': 0})


In [3]:
import random
random.seed( 0 )

# Load in Data

In [4]:
root = r"../data/input/07 Samsung UX Index - Web App Implementation/"

fname_data = root + r"Samsung UX Index Survey_Data.csv"
df_data = pd.read_csv(fname_data)

fname_vaxmap = root + r"Samsung UX Index Survey_Datamap.xlsx"
df_varmap = pd.read_excel(fname_vaxmap, header=1, sheet_name=0)
df_valmap = pd.read_excel(fname_vaxmap, header=1, sheet_name=1)

  interactivity=interactivity, compiler=compiler, result=result)


# Examine ATTRIBUTE Importance (Zclass)

In [13]:
path = r'/Users/lubagloukhov/Documents/Consulting/Samsung/UXi/data/output'
all_files = glob.glob(path + "*/*/Seg1_KNN3_zclust0.05.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    df['analysisloc'] = filename.split('/')[9]
    li.append(df)

zclust_frame = pd.concat(li, axis=0, ignore_index=True)

print(zclust_frame.shape)
zclust_frame.head()

(2484, 8)


Unnamed: 0,Variable,Label,interp,clusterA,clusterB,stat,pvalue,analysisloc
0,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,,0,0,0.0,1.0,20191214_144414_activitiesxsatisfactionxdemog
1,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Accept H0: cluster 0 <= cluster 1,0,1,2.230173,0.025819,20191214_144414_activitiesxsatisfactionxdemog
2,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2,0,2,-3.704824,0.000224,20191214_144414_activitiesxsatisfactionxdemog
3,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Accept H0: cluster 1 >= cluster 0,1,0,-2.230173,0.025819,20191214_144414_activitiesxsatisfactionxdemog
4,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,,1,1,0.0,1.0,20191214_144414_activitiesxsatisfactionxdemog


In [23]:
frame_Accept005 = frame[frame.interp.fillna(value='').str.contains('Reject')]

print(frame_Accept005.shape)
print(float(frame_Accept005.shape[0]/2484))
frame_Accept005.head()

(370, 8)
0.14895330112721417


Unnamed: 0,Variable,Label,interp,clusterA,clusterB,stat,pvalue,analysisloc
2,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2,0,2,-3.704824,0.0002242572,20191214_144414_activitiesxsatisfactionxdemog
5,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 1 < cluster 2,1,2,-5.217989,2.237562e-07,20191214_144414_activitiesxsatisfactionxdemog
6,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 0,2,0,3.704824,0.0002242572,20191214_144414_activitiesxsatisfactionxdemog
7,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 1,2,1,5.217989,2.237562e-07,20191214_144414_activitiesxsatisfactionxdemog
28,d3_4,D3. Not currently employed or in school - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 1,0,1,-3.375084,0.0007492897,20191214_144414_activitiesxsatisfactionxdemog


In [21]:
frame_Accept005.groupby('Variable').Label.count().sort_values()

Variable
d4_3      2
d7_4      2
d3_2      2
d4_7      2
d7_97     2
d6        4
d4_4      4
d1_3      6
d4_5      6
d3_3     12
d7_2     16
d7_99    20
d4_2     24
d4_6     30
d4_1     30
d7_1     32
d3_1     42
d1_1     42
d1_2     44
d3_4     48
Name: Label, dtype: int64

In [36]:
frame_Accept001 = frame_Accept005[frame_Accept005.pvalue<.0000001]
# frame[frame.interp.fillna(value='').str.contains('Reject')]

print(frame_Accept001.shape)
print(float(frame_Accept001.shape[0]/2484))
frame_Accept001.head()

(42, 8)
0.016908212560386472


Unnamed: 0,Variable,Label,interp,clusterA,clusterB,stat,pvalue,analysisloc
416,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 0 < cluster 2,0,2,-5.852438,5.889341e-09,20191214_143445_usagemetricsxdemog
420,d3_1,D3. Student (part-or full-time) - Which of the following best describes your current employment status?,Reject H0 in favor of Ha: cluster 2 > cluster 0,2,0,5.852438,5.889341e-09,20191214_143445_usagemetricsxdemog
461,d7_1,D7. White - What is your race?,Reject H0 in favor of Ha: cluster 0 > cluster 2,0,2,6.007209,2.361054e-09,20191214_143445_usagemetricsxdemog
465,d7_1,D7. White - What is your race?,Reject H0 in favor of Ha: cluster 2 < cluster 0,2,0,-6.007209,2.361054e-09,20191214_143445_usagemetricsxdemog
595,d1_1,D1. Are you? Male,Reject H0 in favor of Ha: cluster 0 < cluster 1,0,1,-8.38066,1.232077e-16,20191214_143445_usagemetricsxdemog


In [54]:
print(len(frame_Accept001.groupby('analysisloc').Variable.count()))
frame_Accept001.groupby('analysisloc').Variable.count()


5


analysisloc
20191214_143315_loyaltymetricsxdemog            8
20191214_143350_overallqualityxdemog            2
20191214_143445_usagemetricsxdemog             12
20191214_143836_activitiesximportancexdemog    12
20191214_144107_ activitiesxrecencyxdemog??     8
Name: Variable, dtype: int64

In [92]:
frame_Accept001_usage = frame_Accept001[frame_Accept001.analysisloc==
                                        '20191214_143836_activitiesximportancexdemog']
print(frame_Accept001_usage.groupby(['Variable','Label']).interp.count())
frame_Accept001_usage[['Label','interp']]#.interp


Variable  Label                                             
d1_1      D1. Are you? Male                                     4
d1_2      D1. Are you? Female                                   4
d7_1      D7. White - What is your race?                        2
d7_2      D7. Black or African American - What is your race?    2
Name: interp, dtype: int64


Unnamed: 0,Label,interp
2120,D7. White - What is your race?,Reject H0 in favor of Ha: cluster 1 > cluster 2
2122,D7. White - What is your race?,Reject H0 in favor of Ha: cluster 2 < cluster 1
2129,D7. Black or African American - What is your race?,Reject H0 in favor of Ha: cluster 1 < cluster 2
2131,D7. Black or African American - What is your race?,Reject H0 in favor of Ha: cluster 2 > cluster 1
2251,D1. Are you? Male,Reject H0 in favor of Ha: cluster 0 > cluster 1
2253,D1. Are you? Male,Reject H0 in favor of Ha: cluster 1 < cluster 0
2255,D1. Are you? Male,Reject H0 in favor of Ha: cluster 1 < cluster 2
2257,D1. Are you? Male,Reject H0 in favor of Ha: cluster 2 > cluster 1
2260,D1. Are you? Female,Reject H0 in favor of Ha: cluster 0 < cluster 1
2262,D1. Are you? Female,Reject H0 in favor of Ha: cluster 1 > cluster 0


# Examine ATTRIBUTE Importance (varimp)

In [84]:
path = r'/Users/lubagloukhov/Documents/Consulting/Samsung/UXi/data/output'
all_files = glob.glob(path + "*/*/*vimi*.csv")


li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    df['analysisloc'] = filename.split('/')[9]
    df['analysisclust'] = int(filename.split('/')[10].split('_')[-1].replace('.csv','').replace('vimi',''))
    li.append(df)

vimi_frame = pd.concat(li, axis=0, ignore_index=True)

print(vimi_frame.shape)
vimi_frame.head()

(3882, 8)


Unnamed: 0,Variable,Label,imp,coeff,imp_rank,coeff_rank,analysisloc,analysisclust
0,qxactivitiesxsatisfaction_15,Q_Activities_Satisfaction. Taking a picture with the rear-facing camera - How satisfied are you with your [MODEL]’s performance on each of the activities below?,0.16415,-1.243294,1.0,1.0,20191214_144414_activitiesxsatisfactionxdemog,2
1,qxactivitiesxsatisfaction_16,Q_Activities_Satisfaction. Taking a picture (selfie) with the front-facing camera - How satisfied are you with your [MODEL]’s performance on each of the activities below?,0.150465,-1.04702,2.0,2.0,20191214_144414_activitiesxsatisfactionxdemog,2
2,qxactivitiesxsatisfaction_5,"Q_Activities_Satisfaction. Viewing screen (e.g., watching video, viewing pictures, playing games) - How satisfied are you with your [MODEL]’s performance on each of the activities below?",0.070108,-0.888198,3.0,5.0,20191214_144414_activitiesxsatisfactionxdemog,2
3,qxactivitiesxsatisfaction_17,Q_Activities_Satisfaction. Recording a video - How satisfied are you with your [MODEL]’s performance on each of the activities below?,0.066426,-1.019242,4.0,4.0,20191214_144414_activitiesxsatisfactionxdemog,2
4,qxactivitiesxsatisfaction_14,Q_Activities_Satisfaction. Sending and receiving a text message - How satisfied are you with your [MODEL]’s performance on each of the activities below?,0.062579,-0.877119,5.0,6.0,20191214_144414_activitiesxsatisfactionxdemog,2


# The most important usage questions determining whether a user will be in cluster 0 versus cluster 1 or 2 is

In [88]:
vimi_frame[(vimi_frame.analysisloc.str.contains('usage'))&(vimi_frame.analysisclust==0)]

Unnamed: 0,Variable,Label,imp,coeff,imp_rank,coeff_rank,analysisloc,analysisclust
389,qxcurrentxos,"Q_Current_OS. Which version of [SHOW IF BRAND=APPLE: iOS; SHOW IF BRAND = SAMSUNG, LG, MOTOROLA, GOOGLE: Android] are you currently using on your smartphone? If you are not sure, you can check this in your settings.",0.865176,4.959314,1.0,1.0,20191214_143445_usagemetricsxdemog,0
390,qxcurrentxstorage,"Q_Current_Storage. What is your [MODEL]’s storage capacity? Please specify the internal storage only, do not include the external storage on SD card.",0.091052,0.186615,2.0,2.0,20191214_143445_usagemetricsxdemog,0
391,qxunlocking,Q_Unlocking. Which method do you currently use to unlock your [MODEL]?,0.015388,0.03407,3.0,6.0,20191214_143445_usagemetricsxdemog,0
392,qxtransition_1,"QxTransition_1. How easy was it to transfer settings, contacts and content (pictures, music, videos, etc.) from your old smartphone to the new one?",0.010522,-0.059047,4.0,5.0,20191214_143445_usagemetricsxdemog,0
393,qxtime,Q_Time. How long have you been using your [MODEL]?,0.007024,0.005963,5.0,7.0,20191214_143445_usagemetricsxdemog,0
394,qxcurrentxcarrier,Q_Current_Carrier. Which mobile carrier do you use on your [MODEL]?,0.006177,0.067933,6.0,3.0,20191214_143445_usagemetricsxdemog,0
395,qxpreviousxbrand,Q_Previous_Brand. Which smartphone brand did you use before [MODEL]?,0.004661,0.059531,7.0,4.0,20191214_143445_usagemetricsxdemog,0


# The most important activities importance questions determining whether a user will be in cluster 1 versus cluster 0 or 2 is

In [91]:
vimi_frame[(vimi_frame.analysisloc.str.contains('activitiesximportance'))&(vimi_frame.analysisclust==1)]

Unnamed: 0,Variable,Label,imp,coeff,imp_rank,coeff_rank,analysisloc,analysisclust
3820,qxactivitiesximportance_25,"Q_Activities_Importance. Using an AR (Augmented Reality) shopping assistant (e.g., take picture of an item and get a price comparison or add virtual furniture and appli - How important is it for you that a smartphone performs well on the activities below?",0.121277,-0.491632,1.0,1.0,20191214_143836_activitiesximportancexdemog,1
3821,qxactivitiesximportance_6,Q_Activities_Importance. Setting up multiple windows / split screen - How important is it for you that a smartphone performs well on the activities below?,0.070377,-0.428473,2.0,2.0,20191214_143836_activitiesximportancexdemog,1
3822,qxactivitiesximportance_24,Q_Activities_Importance. Asking digital assistant a question - How important is it for you that a smartphone performs well on the activities below?,0.065527,-0.308414,3.0,5.0,20191214_143836_activitiesximportancexdemog,1
3823,qxactivitiesximportance_22,Q_Activities_Importance. Creating and editing a note - How important is it for you that a smartphone performs well on the activities below?,0.061392,-0.205284,4.0,7.0,20191214_143836_activitiesximportancexdemog,1
3824,qxactivitiesximportance_15,Q_Activities_Importance. Taking a picture with the rear-facing camera - How important is it for you that a smartphone performs well on the activities below?,0.048027,0.347287,5.0,3.0,20191214_143836_activitiesximportancexdemog,1
3825,qxactivitiesximportance_21,"Q_Activities_Importance. Tracking your wellbeing (steps, exercise, sleep, etc.) - How important is it for you that a smartphone performs well on the activities below?",0.042955,-0.26524,6.0,6.0,20191214_143836_activitiesximportancexdemog,1
3826,qxactivitiesximportance_14,Q_Activities_Importance. Sending and receiving a text message - How important is it for you that a smartphone performs well on the activities below?,0.040587,0.327466,7.0,4.0,20191214_143836_activitiesximportancexdemog,1
3827,qxactivitiesximportance_23,"Q_Activities_Importance. Making a mobile payment with your smartphone at a store terminal (e.g., Samsung Pay / Apple Pay / Google Pay) - How important is it for you that a smartphone performs well on the activities below?",0.040335,-0.156243,8.0,9.0,20191214_143836_activitiesximportancexdemog,1
3828,qxactivitiesximportance_3,Q_Activities_Importance. Transferring files from smartphone to a computer - How important is it for you that a smartphone performs well on the activities below?,0.036899,-0.135855,9.0,11.0,20191214_143836_activitiesximportancexdemog,1
3829,qxactivitiesximportance_19,Q_Activities_Importance. Sharing a photo or file - How important is it for you that a smartphone performs well on the activities below?,0.035788,0.094626,10.0,13.0,20191214_143836_activitiesximportancexdemog,1


# The most important loyalty metrics questions determining whether a user will be in cluster 0 versus cluster 1 or 2 is

In [93]:
vimi_frame[(vimi_frame.analysisloc.str.contains('loyalty'))&(vimi_frame.analysisclust==0)]

Unnamed: 0,Variable,Label,imp,coeff,imp_rank,coeff_rank,analysisloc,analysisclust
1940,qxadvocacy01_1,"Q_Advocacy01. How likely are you to recommend your [MODEL] or another [BRAND] smartphone to a family member, friend, or colleague looking to purchase a new smartphone?",0.441661,5.326333,1.0,2.0,20191214_143315_loyaltymetricsxdemog,0
1941,qxadvocacy02_1,Q_Advocacy02. How likely are you to leave a positive online review for your [MODEL]?,0.236124,5.957574,2.0,1.0,20191214_143315_loyaltymetricsxdemog,0
1942,qxenrichment_1,"Q_Enrichment. Based on your experience with your [MODEL], how likely are you to consider [BRAND] products if you decide to buy a new tablet, a laptop, or any other electronic product produced by this brand?",0.213951,5.035969,3.0,3.0,20191214_143315_loyaltymetricsxdemog,0
1943,qxretention_1,Q_Retention. How likely are you to select [BRAND] the next time you decide to upgrade/purchase a new smartphone?,0.108264,4.9648,4.0,4.0,20191214_143315_loyaltymetricsxdemog,0


# The most important overall quality questions determining whether a user will be in cluster 0 versus cluster 1 or 2 is

In [94]:
vimi_frame[(vimi_frame.analysisloc.str.contains('quality'))&(vimi_frame.analysisclust==0)]

Unnamed: 0,Variable,Label,imp,coeff,imp_rank,coeff_rank,analysisloc,analysisclust
275,qxactivitiesxqualityxindicators_1_8,"Q_Activities_Quality_Indicators. Reinforces my trust in [BRAND] - Using your quick settings (e.g., toggle WiFi, Airplane mode, brightness, volume, flashlight) - How much do you agree or disagree with the following about different activities you do with [MO",0.036483,-0.177055,1.0,6.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
276,qxactivitiesxqualityxindicators_1_15,"Q_Activities_Quality_Indicators. Reinforces my trust in [BRAND] - Taking a picture with the rear-facing camera - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience wit",0.029249,-0.055918,2.0,68.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
277,qxactivitiesxqualityxindicators_2_12,"Q_Activities_Quality_Indicators. Is enjoyable - Scheduling a calendar event / setting reminders - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience with the preinstal",0.025901,-0.067462,3.0,59.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
278,qxactivitiesxqualityxindicators_3_9,"Q_Activities_Quality_Indicators. Meets my needs / provides the desirable result - Customizing settings (e.g., changing wallpaper, changing security/privacy requirements, etc.) - How much do you agree or disagree with the following about different activitie",0.025089,-0.067302,4.0,61.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
279,qxactivitiesxqualityxindicators_1_19,"Q_Activities_Quality_Indicators. Reinforces my trust in [BRAND] - Sharing a photo or file - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience with the preinstalled ma",0.023911,-0.07629,5.0,51.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
280,qxactivitiesxqualityxindicators_1_17,"Q_Activities_Quality_Indicators. Reinforces my trust in [BRAND] - Recording a video - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience with the preinstalled manufact",0.02151,0.08528,6.0,44.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
281,qxactivitiesxqualityxindicators_4_20,"Q_Activities_Quality_Indicators. Is simple and easy - Using GPS or location services (for example, maps) - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience with the",0.02112,0.023344,7.0,87.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
282,qxactivitiesxqualityxindicators_4_17,"Q_Activities_Quality_Indicators. Is simple and easy - Recording a video - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience with the preinstalled manufacturer apps on",0.02078,0.043284,8.0,75.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
283,qxactivitiesxqualityxindicators_4_15,"Q_Activities_Quality_Indicators. Is simple and easy - Taking a picture with the rear-facing camera - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of your experience with the preins",0.019273,0.147804,9.0,12.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
284,qxactivitiesxqualityxindicators_3_15,"Q_Activities_Quality_Indicators. Meets my needs / provides the desirable result - Taking a picture with the rear-facing camera - How much do you agree or disagree with the following about different activities you do with [MODEL]? Again, please think of you",0.018946,-0.008371,10.0,98.0,20191214_144234_activitiesxqualityxindicatorsxdemog,0
