# Facebook & Insta Chi Square

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pd.set_option("display.max_columns", 300)
pd.set_option("display.max_rows", 200)

In [3]:
fpath = os.path.join('data')
df_sns_clean = pd.read_csv(os.path.join(fpath,'df_sns_cleaned.csv'),encoding = 'utf-8')


### 3Model

In [4]:
from scipy import stats
import researchpy as rp

In [5]:
def t_test_sns(df,dv):
    data = df.copy()
    
    facebook = data[data['X1SNS'] == 1]
    insta = data[data['X1SNS'] == 2]
    
    facebook_dv = facebook[dv]
    insta_dv = insta[dv]
    
    result = rp.ttest(facebook_dv,insta_dv,group1_name = 'Facebook',group2_name = 'Instagram')
    
    return result

In [6]:
def chi_sns(df,dv_idx,cat_dict = {1:'Y',0:'N'}):
    data = df.copy()
    
    sns_dict = {1:'Facebook',2:'instagram'}
    for k,v in sns_dict.items():
        
        data.loc[data['X1SNS']==k,['X1SNS']] = v
        
    
    yn_dict = cat_dict
    for k,v in yn_dict.items():
        
        data.loc[data[dv_idx]==k,[dv_idx]] = v
    
    iv = data['X1SNS']
    dv = data[dv_idx]
    
    #facebook['1SNS'] = 'facebook'
    #insta['1SNS'] = 'insta'
    
    
    result = rp.crosstab(iv, dv, test = 'chi-square', margins= True, correction = None, exact = False, expected_freqs= False)
    
    return result

In [7]:
def chi_iter_sns(df,dv_idx,cat_dict = None):
    data = df.copy()
    
    
    
    if cat_dict:
        
        test_dict = cat_dict
    
    
        for k,v in test_dict.items():
            data.loc[data[dv_idx]== k,dv_idx] = v

    
    sns_dict = {1:'Facebook',2:'instagram'}
    for k,v in sns_dict.items():
        
        data.loc[data['X1SNS']==k,['X1SNS']] = v
        
    iv = data['X1SNS']
    
        
    dv_dummies = pd.get_dummies(data[dv_idx])
    
    #facebook['1SNS'] = 'facebook'
    #insta['1SNS'] = 'insta'
    
    
    for dv_idx in dv_dummies:
        dv = dv_dummies[dv_idx]
        result = rp.crosstab(iv, dv, test = 'chi-square', margins= True, correction = None, exact = False, expected_freqs= False)
        
        
        
        p = result[1].iloc[1,1]
        score = importance(p)
        
        print('[%s]'%dv_idx)
        print(result[0],'\n')
        print(result[1],'\n')
        print('Importance: %s\n'%score)


 






In [8]:
def importance(p):
    score = 'X'
    if p <= 0.001:
        score = '***'
    elif p <= 0.01:
        score = '**'
    elif p <= 0.05:
        score = '*'
    
    return score
   # result[1].iloc[1,1]

In [9]:
col = 'X3model'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)


df_model = df_sns_clean[df_sns_clean['X3model'] == 1]
chi_col = ['X32face', 'X33body']
for col in chi_col:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    p = result[1].iloc[1,1]
    print(result[0],'\n')
    print(result[1],'\n')
    score = importance(p)
    print('Importance: %s\n'%score)
    
    
#result[1].iloc[1,1]

[X3model]
          X3model            
                N     Y   All
X1SNS                        
Facebook      312   688  1000
instagram     337   660   997
All           649  1348  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.5401
1                    p-value =    0.2146
2               Cramer's phi =    0.0278 

Importance: X

[X32face]
          X32face            
                N     Y   All
X1SNS                        
Facebook      140   548   688
instagram     150   510   660
All           290  1058  1348 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.1286
1                    p-value =    0.2881
2               Cramer's phi =    0.0289 

Importance: X

[X33body]
          X33body           
              2.0    Y   All
X1SNS                       
Facebook      395  293   688
instagram     358  301   659
All           753  594  1347 

                Chi-square test  results
0  Pearson Chi-square ( 1.0

In [10]:
result = t_test_sns(df_model,'X31number')
p = result[1].iloc[3,1]
score = importance(p)

print('[X31number]')
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X31number]
    Variable       N      Mean        SD        SE  95% Conf.  Interval
0   Facebook   688.0  1.861919  2.848908  0.108614   1.648664  2.075173
1  Instagram   660.0  2.022727  3.016057  0.117400   1.792204  2.253250
2   combined  1348.0  1.940653  2.931949  0.079857   1.783996  2.097310 

                                Independent t-test    results
0             Difference (Facebook - Instagram) =     -0.1608
1                            Degrees of freedom =   1346.0000
2                                             t =     -1.0066
3                         Two side test p value =      0.3143
4  Mean of Facebook > mean of Instagram p value =      0.1571
5  Mean of Facebook < mean of Instagram p value =      0.8429
6                                     Cohen's d =     -0.0548
7                                     Hedge's g =     -0.0548
8                                 Glass's delta =     -0.0564
9                                             r =      0.0274 

Importance: X


In [11]:
position_col_name = 'X34modelposition_'
df_position = df_model

print("[[%s]]\n\n"%(position_col_name.replace("_","")))
position_col = [position_col_name in s for s  in df_position.columns]
position_columns = df_model.loc[:,position_col].columns

for col in position_columns:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    print(result[0],'\n')
    print(result[1],'\n')
    
    p = result[1].iloc[1,1]
    score = importance(p)
    print('Importance: %s\n'%score)

[[X34modelposition]]


[X34modelposition_1]
          X34modelposition_1           
                           N    Y   All
X1SNS                                  
Facebook                 306  382   688
instagram                288  372   660
All                      594  754  1348 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.0965
1                    p-value =    0.7560
2               Cramer's phi =    0.0085 

Importance: X

[X34modelposition_2]
          X34modelposition_2            
                           N     Y   All
X1SNS                                   
Facebook                  78   610   688
instagram                 59   601   660
All                      137  1211  1348 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    2.1212
1                    p-value =    0.1453
2               Cramer's phi =    0.0397 

Importance: X

[X34modelposition_3]
          X34modelposition_3           
                      

### 4background

In [12]:
chi_iter_sns(df_sns_clean,'X4background',{1:'wall',2:'indoor',3:'outdoor',4:'others'})

[indoor]
          indoor           
               0    1   All
X1SNS                      
Facebook     824  176  1000
instagram    880  117   997
All         1704  293  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =   13.7164
1                    p-value =    0.0002
2               Cramer's phi =    0.0829 

Importance: ***

[others]
          others          
               0   1   All
X1SNS                     
Facebook     944  56  1000
instagram    956  41   997
All         1900  97  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    2.3909
1                    p-value =    0.1220
2               Cramer's phi =    0.0346 

Importance: X

[outdoor]
          outdoor           
                0    1   All
X1SNS                       
Facebook      675  325  1000
instagram     765  232   997
All          1440  557  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =   21.1484
1               

### 5Text

In [13]:
col = 'X5text'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X5text]
          X5text           
               N    Y   All
X1SNS                      
Facebook     839  161  1000
instagram    884  112   996
All         1723  273  1996 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    9.9622
1                    p-value =    0.0016
2               Cramer's phi =    0.0706 

Importance: **



In [14]:
df_text = df_sns_clean[df_sns_clean['X5text'] == 1]

In [15]:

#result = t_test_sns(df_test,'31number')


ttest_col = ['X51words']
for col in ttest_col:
    print('[%s]'%col)
    result = t_test_sns(df_text,col)
    p = result[1].iloc[3,1]
    print(result[0],'\n')
    print(result[1],'\n')
    score = importance(p)
    print('Importance: %s\n'%score)
    
    

[X51words]
    Variable      N      Mean         SD        SE  95% Conf.  Interval
0   Facebook  161.0  6.416149   8.595463  0.677417   5.078316  7.753982
1  Instagram  112.0  6.160714  13.061568  1.234202   3.715060  8.606368
2   combined  273.0  6.311355  10.634733  0.643643   5.044199  7.578511 

                                Independent t-test   results
0             Difference (Facebook - Instagram) =     0.2554
1                            Degrees of freedom =   271.0000
2                                             t =     0.1949
3                         Two side test p value =     0.8456
4  Mean of Facebook > mean of Instagram p value =     0.5772
5  Mean of Facebook < mean of Instagram p value =     0.4228
6                                     Cohen's d =     0.0240
7                                     Hedge's g =     0.0239
8                                 Glass's delta =     0.0297
9                                             r =     0.0118 

Importance: X



In [16]:
col = 'X54promotingmessa'

result = chi_sns(df_text,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X54promotingmessa]
          X54promotingmessa         
                          N   Y  All
X1SNS                               
Facebook                141  20  161
instagram               103   9  112
All                     244  29  273 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.3387
1                    p-value =    0.2473
2               Cramer's phi =    0.0700 

Importance: X



In [17]:
position_col_name = 'X53textposition_'
df_position = df_text

print("[[%s]]\n\n"%(position_col_name.replace("_","")))
position_col = [position_col_name in s for s  in df_position.columns]
position_columns = df_model.loc[:,position_col].columns

for col in position_columns:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    print(result[0],'\n')
    print(result[1],'\n')
    p = result[1].iloc[1,1]
    score = importance(p)
    print('Importance: %s\n'%score)

[[X53textposition]]


[X53textposition_1]
          X53textposition_1         
                          N   Y  All
X1SNS                               
Facebook                 76  17   93
instagram                40  11   51
All                     116  28  144 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.2275
1                    p-value =    0.6334
2               Cramer's phi =    0.0397 

Importance: X

[X53textposition_2]
          X53textposition_2         
                          N   Y  All
X1SNS                               
Facebook                 72  21   93
instagram                32  19   51
All                     104  40  144 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    3.5354
1                    p-value =    0.0601
2               Cramer's phi =    0.1567 

Importance: X

[X53textposition_3]
          X53textposition_3         
                          N   Y  All
X1SNS                             

### 6Brandname

In [18]:
col = 'X6brandname'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X6brandname]
          X6brandname           
                    N    Y   All
X1SNS                           
Facebook          812  188  1000
instagram         811  186   997
All              1623  374  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.0068
1                    p-value =    0.9343
2               Cramer's phi =    0.0018 

Importance: X



In [19]:
df_brand = df_sns_clean[(df_sns_clean['X6brandname'] == 1)]

cols = ['X61brandexpose','X65partofprod']
for col in cols: 
    result = chi_sns(df_brand,col)
    p = result[1].iloc[1,1]
    score = importance(p)

    print('[%s]'%col)
    print(result[0],'\n')
    print(result[1],'\n')
    print('Importance: %s\n'%score)
    
col = 'X64brandvisib'
result = chi_sns(df_brand,col,{0:'low',1:'high'})
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')

print('Importance: %s\n'%score)

[X61brandexpose]
          X61brandexpose          
                       N    Y  All
X1SNS                             
Facebook              11  177  188
instagram             25  161  186
All                   36  338  374 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    6.1913
1                    p-value =    0.0128
2               Cramer's phi =    0.1287 

Importance: *

[X65partofprod]
          X65partofprod          
                      N    Y  All
X1SNS                            
Facebook             91   97  188
instagram            34  152  186
All                 125  249  374 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =   38.1310
1                    p-value =    0.0000
2               Cramer's phi =    0.3193 

Importance: ***

[X64brandvisib]
          X64brandvisib          
                   high  low  All
X1SNS                            
Facebook             94   94  188
instagram            95   91  186

In [20]:

position_col_name = 'X63brandposition_'
df_position = df_brand

print("[[%s]]\n\n"%(position_col_name.replace("_","")))
position_col = [position_col_name in s for s  in df_position.columns]
position_columns = df_model.loc[:,position_col].columns

for col in position_columns:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    p = result[1].iloc[1,1]
    print(result[0],'\n')
    print(result[1],'\n')
    score = importance(p)
    print('Importance: %s\n'%score)

[[X63brandposition]]


[X63brandposition_1]
          X63brandposition_1         
                           N   Y  All
X1SNS                                
Facebook                  70   8   78
instagram                 58  11   69
All                      128  19  147 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.0516
1                    p-value =    0.3051
2               Cramer's phi =    0.0846 

Importance: X

[X63brandposition_2]
          X63brandposition_2         
                           N   Y  All
X1SNS                                
Facebook                  67  11   78
instagram                 55  14   69
All                      122  25  147 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.9930
1                    p-value =    0.3190
2               Cramer's phi =    0.0822 

Importance: X

[X63brandposition_3]
          X63brandposition_3         
                           N   Y  All
X1SNS           

In [21]:

position_col_name = 'X66whproduct_'
df_position = df_brand

print("[[%s]]\n\n"%(position_col_name.replace("_","")))
position_col = [position_col_name in s for s  in df_position.columns]
position_columns = df_model.loc[:,position_col].columns

for col in position_columns:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    print(result[0],'\n')
    print(result[1],'\n')
    p = result[1].iloc[1,1]
    score = importance(p)
    print('Importance: %s\n'%score)

[[X66whproduct]]


[X66whproduct_accessories]
          X66whproduct_accessories       
                                 N  Y All
X1SNS                                    
Facebook                        43  1  44
instagram                       53  0  53
All                             96  1  97 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.2171
1                    p-value =    0.2699
2               Cramer's phi =    0.1120 

Importance: X

[X66whproduct_bag]
          X66whproduct_bag       
                         N  Y All
X1SNS                            
Facebook                39  5  44
instagram               51  2  53
All                     90  7  97 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    2.0685
1                    p-value =    0.1504
2               Cramer's phi =    0.1460 

Importance: X

[X66whproduct_clothes_bottom]
          X66whproduct_clothes_bottom       
                                    N 

### 7Brandlogo

In [22]:
col = 'X7logo'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X7logo]
          X7logo           
               N    Y   All
X1SNS                      
Facebook     623  377  1000
instagram    591  406   997
All         1214  783  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.9131
1                    p-value =    0.1666
2               Cramer's phi =    0.0310 

Importance: X



In [23]:
df_logo = df_sns_clean[(df_sns_clean['X7logo'] == 1)]

##
col = 'X71logoexposure'
result = chi_sns(df_logo,col,{1:'whole',0:'part'})
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

##
col = 'X75logovisib'
result = chi_sns(df_logo,col,{0:'low',1:'high'})
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

##
col = 'X76logopartofprod'
result = chi_sns(df_logo,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X71logoexposure]
          X71logoexposure           
                     part whole  All
X1SNS                               
Facebook               14   362  376
instagram              30   376  406
All                    44   738  782 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    4.9401
1                    p-value =    0.0262
2               Cramer's phi =    0.0795 

Importance: *

[X75logovisib]
          X75logovisib          
                  high  low  All
X1SNS                           
Facebook           192  185  377
instagram          247  159  406
All                439  344  783 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    7.7924
1                    p-value =    0.0052
2               Cramer's phi =    0.0998 

Importance: **

[X76logopartofprod]
          X76logopartofprod          
                          N    Y  All
X1SNS                                
Facebook                107  270  377
instagra

In [24]:
chi_iter_sns(df_logo,'X74logotype',{1:'Image',2:'Text',3:'Both'})

[Both]
          Both          
             0    1  All
X1SNS                   
Facebook   272  105  377
instagram  273  133  406
All        545  238  783 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    2.2249
1                    p-value =    0.1358
2               Cramer's phi =    0.0533 

Importance: X

[Image]
          Image          
              0    1  All
X1SNS                    
Facebook    167  210  377
instagram   173  233  406
All         340  443  783 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.2262
1                    p-value =    0.6343
2               Cramer's phi =    0.0170 

Importance: X

[Text]
          Text          
             0    1  All
X1SNS                   
Facebook   315   62  377
instagram  366   40  406
All        681  102  783 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    7.5007
1                    p-value =    0.0062
2               Cramer's phi = 

In [25]:
position_col_name = 'X73logoposition_'
df_position = df_logo

print("[[%s]]\n\n"%(position_col_name.replace("_","")))
position_col = [position_col_name in s for s  in df_position.columns]
position_columns = df_model.loc[:,position_col].columns

for col in position_columns:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    print(result[0],'\n')
    print(result[1],'\n')
    p = result[1].iloc[1,1]
    score = importance(p)
    print('Importance: %s\n'%score)

[[X73logoposition]]


[X73logoposition_1]
          X73logoposition_1         
                          N   Y  All
X1SNS                               
Facebook                191  29  220
instagram               194  20  214
All                     385  49  434 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.5938
1                    p-value =    0.2068
2               Cramer's phi =    0.0606 

Importance: X

[X73logoposition_2]
          X73logoposition_2         
                          N   Y  All
X1SNS                               
Facebook                173  47  220
instagram               167  47  214
All                     340  94  434 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.0229
1                    p-value =    0.8796
2               Cramer's phi =    0.0073 

Importance: X

[X73logoposition_3]
          X73logoposition_3         
                          N   Y  All
X1SNS                             

### 8Product Exposure

In [26]:
pd.DataFrame(df_sns_clean[(df_sns_clean['X8product'] == 1)].isna().sum()).T

Unnamed: 0,BrandID,BrandCategory,PhotoID,PostURL,PhotoURL,PostDate,PostTime,DownloadDate,Type,type_video,Like,LikeAlbum,Comments,Comments_album,NumHashtags,Content,ContentWithoutLink,X1SNS,X2brand,X3model,X31number,X32face,X33body,X34modelposition,X4background,X5text,X51words,X52,X53textposition,X54promotingmessa,X6brandname,X61brandexpose,X62,X63brandposition,X64brandvisib,X65partofprod,X66whproduct,X7logo,X71logoexposure,X72,X73logoposition,X74logotype,X75logovisib,X76logopartofprod,X8product,X81productexpose,X82,X83productposition,X9,X10angle,X11,X12hashtag,X12_1number,X12_2_1content,X12_2_2,X12_2_3,X12_2_4,X12_2_5,X12_2_6,X13text,X13_1,X13_2promo,X13_2promo_wo_kor,X13_3link,XBrandCategory_SPA,XBrandCategory_casual,XBrandCategory_luxuries,XBrandCategory_sport,X34modelposition_1,X34modelposition_2,X34modelposition_3,X34modelposition_4,X34modelposition_5,X34modelposition_6,X34modelposition_7,X34modelposition_8,X34modelposition_9,X4background_indoor,X4background_others,X4background_outdoor,X4background_wall,X53textposition_1,X53textposition_2,X53textposition_3,X53textposition_4,X53textposition_5,X53textposition_6,X53textposition_7,X53textposition_8,X53textposition_9,X63brandposition_1,X63brandposition_2,X63brandposition_3,X63brandposition_4,X63brandposition_5,X63brandposition_6,X63brandposition_7,X63brandposition_8,X63brandposition_9,X66whproduct_accessories,X66whproduct_bag,X66whproduct_clothes_bottom,X66whproduct_clothes_top,X66whproduct_cosmetics,X66whproduct_hat,X66whproduct_others,X66whproduct_shoes,X73logoposition_1,X73logoposition_2,X73logoposition_3,X73logoposition_4,X73logoposition_5,X73logoposition_6,X73logoposition_7,X73logoposition_8,X73logoposition_9,X74logotype_both,X74logotype_image,X74logotype_text,X83productposition_1,X83productposition_2,X83productposition_3,X83productposition_4,X83productposition_5,X83productposition_6,X83productposition_7,X83productposition_8,X83productposition_9,X10angle_extreme_high,X10angle_eye,X10angle_high,X10angle_low,X10angle_other
0,0,0,0,0,2,0,0,0,0,0,4,1610,933,1614,3,34,37,0,0,0,452,452,453,452,0,1,1571,1761,1571,1571,0,1441,1761,1441,1441,1441,1511,0,1058,1760,1057,1057,1057,1056,0,0,1761,0,1761,0,1761,0,0,0,0,0,0,0,0,0,0,39,66,37,0,0,0,0,452,452,452,452,452,452,452,452,452,0,0,0,0,1571,1571,1571,1571,1571,1571,1571,1571,1571,1441,1441,1441,1441,1441,1441,1441,1441,1441,1511,1511,1511,1511,1511,1511,1511,1511,1057,1057,1057,1057,1057,1057,1057,1057,1057,1057,1057,1057,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
col = 'X8product'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

df_product = df_sns_clean[(df_sns_clean['X8product'] == 1)]

col = 'X81productexpose'
result = chi_sns(df_product,col,{1:'Whole',0:'Part'})
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)


[X8product]
          X8product            
                  N     Y   All
X1SNS                          
Facebook        152   848  1000
instagram        84   913   997
All             236  1761  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =   21.9880
1                    p-value =    0.0000
2               Cramer's phi =    0.1049 

Importance: ***

[X81productexpose]
          X81productexpose            
                      Part Whole   All
X1SNS                                 
Facebook               154   694   848
instagram              207   706   913
All                    361  1400  1761 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    5.4923
1                    p-value =    0.0191
2               Cramer's phi =    0.0558 

Importance: *



In [28]:
position_col_name = 'X83productposition_'
df_position = df_product

print("[[%s]]\n\n"%(position_col_name.replace("_","")))
position_col = [position_col_name in s for s  in df_position.columns]
position_columns = df_model.loc[:,position_col].columns

for col in position_columns:
    print('[%s]'%col)
    result = chi_sns(df_model,col)
    print(result[0],'\n')
    print(result[1],'\n')
    p = result[1].iloc[1,1]
    score = importance(p)
    print('Importance: %s\n'%score)


[[X83productposition]]


[X83productposition_1]
          X83productposition_1           
                             N    Y   All
X1SNS                                    
Facebook                   371  288   659
instagram                  399  250   649
All                        770  538  1308 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    3.6260
1                    p-value =    0.0569
2               Cramer's phi =    0.0527 

Importance: X

[X83productposition_2]
          X83productposition_2           
                             N    Y   All
X1SNS                                    
Facebook                   200  459   659
instagram                  222  427   649
All                        422  886  1308 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    2.2264
1                    p-value =    0.1357
2               Cramer's phi =    0.0413 

Importance: X

[X83productposition_3]
          X83productposition_3      

### Camera Angle

In [29]:
angle = {1:'extreme high',2:'high',3:'eye',4:'low',5:'dutch',6:'other'}
chi_iter_sns(df_sns_clean,'X10angle',angle)

[dutch]
          dutch         
              0  1   All
X1SNS                   
Facebook   1000  0  1000
instagram   996  1   997
All        1996  1  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    1.0035
1                    p-value =    0.3165
2               Cramer's phi =    0.0224 

Importance: X

[extreme high]
          extreme high          
                     0   1   All
X1SNS                           
Facebook           982  18  1000
instagram          973  24   997
All               1955  42  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.8941
1                    p-value =    0.3444
2               Cramer's phi =    0.0212 

Importance: X

[eye]
           eye            
             0     1   All
X1SNS                     
Facebook   259   741  1000
instagram  250   747   997
All        509  1488  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =    0.1788
1          

### Hash Tag

In [31]:
col = 'X12hashtag'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X12hashtag]
          X12hashtag            
                   N     Y   All
X1SNS                           
Facebook         753   247  1000
instagram        239   758   997
All              992  1005  1997 

                Chi-square test   results
0  Pearson Chi-square ( 1.0) =   526.1452
1                    p-value =     0.0000
2               Cramer's phi =     0.5133 

Importance: ***



In [32]:
df_sns_clean['X12_2_5'].describe()

count    1997.0
mean        0.0
std         0.0
min         0.0
25%         0.0
50%         0.0
75%         0.0
max         0.0
Name: X12_2_5, dtype: float64

In [33]:
df_hashtag = df_sns_clean[df_sns_clean['X12hashtag']==1]

In [34]:
print('[[Total]]\n')

ttest_col = ['X12_1number','X12_2_1content', 'X12_2_2', 'X12_2_3', 'X12_2_4', 'X12_2_6']


for col in ttest_col:
    print('[%s]'%col)
    result = t_test_sns(df_sns_clean,col)
    p = result[1].iloc[3,1]
    print(result[0],'\n')
    print(result[1],'\n')
    score = importance(p)
    print('Importance: %s\n'%score)
    
print('[[When Hashtag]]\n')

ttest_col = ['X12_1number','X12_2_1content', 'X12_2_2', 'X12_2_3', 'X12_2_4', 'X12_2_6']


for col in ttest_col:
    print('[%s]'%col)
    result = t_test_sns(df_hashtag,col)
    p = result[1].iloc[3,1]
    print(result[0],'\n')
    print(result[1],'\n')
    score = importance(p)
    print('Importance: %s\n'%score)
    

[[Total]]

[X12_1number]
    Variable       N      Mean        SD        SE  95% Conf.  Interval
0   Facebook  1000.0  0.292000  0.590834  0.018684   0.255336  0.328664
1  Instagram   997.0  1.348044  1.396434  0.044226   1.261258  1.434830
2   combined  1997.0  0.819229  1.194456  0.026729   0.766809  0.871648 

                                Independent t-test    results
0             Difference (Facebook - Instagram) =     -1.0560
1                            Degrees of freedom =   1995.0000
2                                             t =    -22.0193
3                         Two side test p value =      0.0000
4  Mean of Facebook > mean of Instagram p value =      0.0000
5  Mean of Facebook < mean of Instagram p value =      1.0000
6                                     Cohen's d =     -0.9855
7                                     Hedge's g =     -0.9851
8                                 Glass's delta =     -1.7874
9                                             r =      0.4422 

I

## Text

In [35]:
pd.DataFrame(df_sns_clean.isna().sum()).T

Unnamed: 0,BrandID,BrandCategory,PhotoID,PostURL,PhotoURL,PostDate,PostTime,DownloadDate,Type,type_video,Like,LikeAlbum,Comments,Comments_album,NumHashtags,Content,ContentWithoutLink,X1SNS,X2brand,X3model,X31number,X32face,X33body,X34modelposition,X4background,X5text,X51words,X52,X53textposition,X54promotingmessa,X6brandname,X61brandexpose,X62,X63brandposition,X64brandvisib,X65partofprod,X66whproduct,X7logo,X71logoexposure,X72,X73logoposition,X74logotype,X75logovisib,X76logopartofprod,X8product,X81productexpose,X82,X83productposition,X9,X10angle,X11,X12hashtag,X12_1number,X12_2_1content,X12_2_2,X12_2_3,X12_2_4,X12_2_5,X12_2_6,X13text,X13_1,X13_2promo,X13_2promo_wo_kor,X13_3link,XBrandCategory_SPA,XBrandCategory_casual,XBrandCategory_luxuries,XBrandCategory_sport,X34modelposition_1,X34modelposition_2,X34modelposition_3,X34modelposition_4,X34modelposition_5,X34modelposition_6,X34modelposition_7,X34modelposition_8,X34modelposition_9,X4background_indoor,X4background_others,X4background_outdoor,X4background_wall,X53textposition_1,X53textposition_2,X53textposition_3,X53textposition_4,X53textposition_5,X53textposition_6,X53textposition_7,X53textposition_8,X53textposition_9,X63brandposition_1,X63brandposition_2,X63brandposition_3,X63brandposition_4,X63brandposition_5,X63brandposition_6,X63brandposition_7,X63brandposition_8,X63brandposition_9,X66whproduct_accessories,X66whproduct_bag,X66whproduct_clothes_bottom,X66whproduct_clothes_top,X66whproduct_cosmetics,X66whproduct_hat,X66whproduct_others,X66whproduct_shoes,X73logoposition_1,X73logoposition_2,X73logoposition_3,X73logoposition_4,X73logoposition_5,X73logoposition_6,X73logoposition_7,X73logoposition_8,X73logoposition_9,X74logotype_both,X74logotype_image,X74logotype_text,X83productposition_1,X83productposition_2,X83productposition_3,X83productposition_4,X83productposition_5,X83productposition_6,X83productposition_7,X83productposition_8,X83productposition_9,X10angle_extreme_high,X10angle_eye,X10angle_high,X10angle_low,X10angle_other
0,0,0,0,0,3,0,0,0,0,0,5,1836,1019,1841,5,37,40,0,0,0,648,648,649,648,0,1,1722,1997,1722,1722,0,1623,1997,1623,1623,1623,1747,0,1215,1996,1214,1214,1214,1213,0,236,1997,236,1997,0,1997,0,0,0,0,0,0,0,0,0,0,42,70,40,0,0,0,0,648,648,648,648,648,648,648,648,648,0,0,0,0,1722,1722,1722,1722,1722,1722,1722,1722,1722,1623,1623,1623,1623,1623,1623,1623,1623,1623,1747,1747,1747,1747,1747,1747,1747,1747,1214,1214,1214,1214,1214,1214,1214,1214,1214,1214,1214,1214,236,236,236,236,236,236,236,236,236,0,0,0,0,0


In [36]:
len(df_sns_clean['ContentWithoutLink'][0])

158

In [37]:
col = 'X13text'

result = chi_sns(df_sns_clean,col)
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X13text]
          X13text            
                N     Y   All
X1SNS                        
Facebook       42   958  1000
instagram       0   997   997
All            42  1955  1997 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =   42.7736
1                    p-value =    0.0000
2               Cramer's phi =    0.1464 

Importance: ***



In [38]:
ttest_col = ['X13_1']


for col in ttest_col:
    print('[%s]'%col)
    result = t_test_sns(df_sns_clean,col)
    p = result[1].iloc[3,1]
    print(result[0],'\n')
    print(result[1],'\n')
    score = importance(p)
    print('Importance: %s\n'%score)

[X13_1]
    Variable       N       Mean         SD        SE  95% Conf.   Interval
0   Facebook  1000.0  21.880000  19.088792  0.603641  20.695451  23.064549
1  Instagram   997.0  22.413240  15.554405  0.492613  21.446561  23.379918
2   combined  1997.0  22.146219  17.411835  0.389633  21.382090  22.910349 

                                Independent t-test    results
0             Difference (Facebook - Instagram) =     -0.5332
1                            Degrees of freedom =   1995.0000
2                                             t =     -0.6842
3                         Two side test p value =      0.4939
4  Mean of Facebook > mean of Instagram p value =      0.2470
5  Mean of Facebook < mean of Instagram p value =      0.7530
6                                     Cohen's d =     -0.0306
7                                     Hedge's g =     -0.0306
8                                 Glass's delta =     -0.0279
9                                             r =      0.0153 

Import

In [39]:
df_posting = df_sns_clean[(df_sns_clean['X13text']==1)&(df_sns_clean['X13_2promo']!=3)]
result = chi_sns(df_posting,'X13_2promo')
p = result[1].iloc[1,1]
score = importance(p)

print('[%s]'%col)
print(result[0],'\n')
print(result[1],'\n')
print('Importance: %s\n'%score)

[X13_1]
          X13_2promo           
                 2.0    Y   All
X1SNS                          
Facebook         532  398   930
instagram        422  575   997
All              954  973  1927 

                Chi-square test  results
0  Pearson Chi-square ( 1.0) =   42.6038
1                    p-value =    0.0000
2               Cramer's phi =    0.1487 

Importance: ***

