In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats

In [3]:
# 1. "sample_data.csv"から表データをpandasでロードし、インデックスを"time"カラムに設定
df = pd.read_csv('sample_data.csv', index_col='time')
# "machine_file.csv"から表データをpandasでロード
machine_df = pd.read_csv('machine_file.csv')
# dfデータフレームに"type"カラムを追加
df = df.reset_index().merge(machine_df[['ID', 'type']], on='ID', how='left').set_index('time')

# 2. ロードしたデータから各IDごとの平均、分散、サンプルサイズをまとめたDataFrameを作る
result = df.groupby('ID').agg({'value': ['mean', 'var', 'count']})
result.columns = ['mean', 'var', 'count']

In [4]:
df.head()

Unnamed: 0_level_0,ID,TF,value,type
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-02-27 23:08:15,1BB040,False,0.1816,B
2023-03-04 13:29:52,1AD020,False,0.1117,C
2023-03-05 06:51:47,1AD010,True,0.0951,B
2023-02-28 21:54:54,1BB050,True,0.7507,A
2023-02-27 09:35:43,3HD100,True,0.6839,B


In [30]:
"""
レーベン分析の関数
帰無仮説は「ID間にvalueの母分散に異ならない」
input data: data(DataFrame)
    index: time
    column: ID
output data:
"""
def variance_test(data):
    # 帰無仮説: 各ID間でvalueの母分散は異ならない
    groups = [data['value'][data['ID'] == ID] for ID in data['ID'].unique()]
    if len(groups) < 2:
        print('Error: Must enter at least two input sample vectors.')
        return
    result = stats.levene(*groups)
    print(f'LeveneResult(statistic={result.statistic}, pvalue={result.pvalue})')

In [None]:
"""
レーベン分析の関数
帰無仮説は「ID間にvalueの母分散に異ならない」
input data: data(DataFrame)
    index: time
    column: ID
output data:
"""
def variance_bartlett_test(data):
    # 帰無仮説: 各ID間でvalueの母分散は異ならない
    groups = [data['value'][data['ID'] == ID] for ID in data['ID'].unique()]
    if len(groups) < 2:
        print('Error: Must enter at least two input sample vectors.')
        return
    result = stats.bartlett(*groups)
    print(f'LeveneResult(statistic={result.statistic}, pvalue={result.pvalue})')

In [31]:
variance_test(df[df['type'] == 'D'])

Error: Must enter at least two input sample vectors.


In [32]:
types = df['type'].unique()
for t in types:
    #print(df[df['type'] == t])
    variance_test(df[df['type'] == t])

LeveneResult(statistic=1.3941460847169003, pvalue=0.2545138083829031)
LeveneResult(statistic=1.1260607917486987, pvalue=0.30950801834299213)
LeveneResult(statistic=0.4288114635893607, pvalue=0.5218762128473581)
Error: Must enter at least two input sample vectors.


In [12]:
df['value'][df['ID'] == '1BB040']

time
2023-02-27 23:08:15    0.1816
2023-03-05 20:09:10    0.5422
2023-03-01 15:52:03    0.7726
2023-02-27 23:17:18    0.7492
2023-03-04 12:17:03    0.4131
2023-03-02 17:58:02    0.3703
2023-02-27 18:42:48    0.0268
2023-03-04 03:36:59    0.4109
2023-03-02 10:54:10    0.2427
2023-02-28 17:28:04    0.6154
2023-03-01 20:49:16    0.8293
2023-03-02 15:03:51    0.2074
2023-03-02 11:23:14    0.4104
2023-03-03 17:14:24    0.0766
2023-03-05 19:18:51    0.0468
2023-03-03 18:27:43    0.8854
2023-02-28 12:55:54    0.8723
Name: value, dtype: float64