In [31]:
import pandas as pd
from scipy.stats import ttest_ind

In [32]:
main_data = pd.read_csv('주거래.csv')  
sub_data = pd.read_csv('부거래.csv')

In [33]:
sub_data.columns

Index(['고객ID', '월평균 거래건수', '최다가맹점업종명', '업종다양성지수', '거래 밀도 지수', '거래활동기간',
       '거래 연속성 비율', '총거래건수'],
      dtype='object')

In [34]:
variables = ['월평균 거래건수', '최다가맹점업종명', '업종다양성지수', '거래 밀도 지수', '거래활동기간',
       '거래 연속성 비율', '총거래건수']

In [35]:
sub_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   고객ID       56 non-null     int64  
 1   월평균 거래건수   56 non-null     int64  
 2   최다가맹점업종명   56 non-null     object 
 3   업종다양성지수    56 non-null     int64  
 4   거래 밀도 지수   56 non-null     float64
 5   거래활동기간     56 non-null     int64  
 6   거래 연속성 비율  56 non-null     int64  
 7   총거래건수      56 non-null     int64  
dtypes: float64(1), int64(6), object(1)
memory usage: 3.6+ KB


In [39]:
for col in main_data.select_dtypes(include=['object']).columns:
    main_data[col] = pd.to_numeric(main_data[col], errors='coerce').astype('Int64')

In [40]:
for col in sub_data.select_dtypes(include=['object']).columns:
    sub_data[col] = pd.to_numeric(sub_data[col], errors='coerce').astype('Int64')

In [41]:
results = {}
for var in variables:
    t_stat, p_value = ttest_ind(main_data[var], sub_data[var], equal_var=False) 
    results[var] = {'t-statistic': t_stat, 'p-value': p_value}

In [42]:
for var, stats in results.items():
    print(f"Variable: {var}")
    print(f"  T-statistic: {stats['t-statistic']:.4f}")
    print(f"  P-value: {stats['p-value']:.4f}")
    if stats['p-value'] < 0.05:
        print(f"  Result: Statistically significant difference between groups (p < 0.05)")
    else:
        print(f"  Result: No statistically significant difference between groups (p ≥ 0.05)")

Variable: 월평균 거래건수
  T-statistic: 29.5688
  P-value: 0.0000
  Result: Statistically significant difference between groups (p < 0.05)
Variable: 최다가맹점업종명
  T-statistic: nan
  P-value: nan
  Result: No statistically significant difference between groups (p ≥ 0.05)
Variable: 업종다양성지수
  T-statistic: 588.3957
  P-value: 0.0000
  Result: Statistically significant difference between groups (p < 0.05)
Variable: 거래 밀도 지수
  T-statistic: 29.5876
  P-value: 0.0000
  Result: Statistically significant difference between groups (p < 0.05)
Variable: 거래활동기간
  T-statistic: 3.6978
  P-value: 0.0005
  Result: Statistically significant difference between groups (p < 0.05)
Variable: 거래 연속성 비율
  T-statistic: 1.8767
  P-value: 0.0659
  Result: No statistically significant difference between groups (p ≥ 0.05)
Variable: 총거래건수
  T-statistic: 31.6488
  P-value: 0.0000
  Result: Statistically significant difference between groups (p < 0.05)
