In [1]:
from statistics import NormalDist
import numpy as np
from GageRnR import GageRnR
from GageRnR import GageRnR, Component, Result
from pathlib import Path
import pandas as pd
from backend.research.make_csv import  MeasurementsData, EvaluateMetrics, EPorosity, IoU_pores, F1_binary

In [2]:
class MSA:
    r: int # number replications or trials
    t: int # number technicians
    p: int # number parts
    def __init__(self): ...
    def from_csv(self, path_to_csv: Path, drop_columns: list = None):
        if isinstance(path_to_csv, str):   path_to_csv = Path(path_to_csv)
        if '.csv' in path_to_csv.name:      df = pd.read_csv(path_to_csv)
        elif '.xlsx' in path_to_csv.name:   df = pd.read_excel(path_to_csv)
        else:   raise Exception('Unknown file format')
        data = []
        for operator in df['Operator'].unique():
            part_data = []
            for part in df['Parts'].unique():
                _data = df[(df['Operator'] == operator) & (df['Parts'] == part)].drop(columns=['Operator', 'Parts']).values[0]
                part_data.append(_data)
            data.append(part_data)
        self.data = np.array(data)
        self.calculate_grnr()
        self.calculate_sgm()
        self.calculate_metrics()
        return self
    
    def from_array(self, data: np.ndarray):
        self.data = data
        self.calculate_grnr()
        self.calculate_sgm()
        self.calculate_metrics()
        return self
    
    def calculate_grnr(self):
        self.g = GageRnR(self.data)
        self.result = self.g.calculate()
        self.RWI_df = self.result[Result.DF][Component.OPERATOR_BY_PART] + self.result[Result.DF][Component.MEASUREMENT]
        self.RWI_SS = self.result[Result.SS][Component.OPERATOR_BY_PART] + self.result[Result.SS][Component.MEASUREMENT]
        self.RWI_MS = self.RWI_SS / self.RWI_df
        self.p_interaction = self.result[Result.P][Component.OPERATOR_BY_PART]
        self.p_big = self.p_interaction > 0.05
        
    @property
    def t(self):   
        """Number of operators""" 
        return self.g.operators
    @property
    def p(self):
        """number of parts"""
        return self.g.parts
    @property
    def r(self):    
        """Number of measurements or replications"""
        return self.g.measurements
    
    def calculate_sgm(self):
        if self.p_big:
            self.sgm2_rpt = self.RWI_MS
        else:
            self.sgm2_rpt = self.result[Result.MS][Component.MEASUREMENT]

        self.sgm2_txp = (self.result[Result.MS][Component.OPERATOR_BY_PART] - self.sgm2_rpt) / self.r
        
        if self.p_big:
            self.sgm2_part = (
                self.result[Result.MS][Component.PART] - self.RWI_MS
            ) / (self.r * self.t)
        else:
            self.sgm2_part = (
                self.result[Result.MS][Component.PART] - self.result[Result.MS][Component.OPERATOR_BY_PART]
            ) / (self.r * self.t)
            
        if self.p_big:
            self.sgm2_tech = (
                self.result[Result.MS][Component.OPERATOR] - self.RWI_MS
            ) / (self.r * self.p)
            
        else: 
            self.sgm2_tech = (
                self.result[Result.MS][Component.OPERATOR] - self.result[Result.MS][Component.OPERATOR_BY_PART]
            ) / (self.r * self.p)
            
        self.sgm2_rpt  = max(0, self.sgm2_rpt)
        self.sgm2_txp  = max(0, self.sgm2_txp)
        self.sgm2_part = max(0, self.sgm2_part)
        self.sgm2_tech = max(0, self.sgm2_tech)
    
    def calculate_metrics(self):
        self._GageRnR = self.sgm2_rpt + self.sgm2_tech
        self._EV = self.sgm2_rpt
        self._TechVar = self.sgm2_tech + self.sgm2_txp
        self._P2P = self.sgm2_part
        self._Total_Var = self.sgm2_rpt + self.sgm2_part + self.sgm2_tech + self.sgm2_txp
        
    def GageRnR(self, prcntg: bool = True):
        if prcntg:      return self._GageRnR / self._Total_Var * 100
        else:           return self._GageRnR
    def EV(self, prcntg: bool = True):
        if prcntg:      return self._EV / self._Total_Var * 100
        else:           return self._EV
    def TechVar(self, prcntg: bool = True):
        if prcntg:      return self._TechVar / self._Total_Var * 100
        else:           return self._TechVar
    def P2P(self, prcntg: bool = True):
        if prcntg:      return self._P2P / self._Total_Var * 100
        else:           return self._P2P
    def Total_Var(self, prcntg: bool = True):
        if prcntg:      return self._Total_Var / self._Total_Var * 100
        else:           return self._Total_Var
        
    def summary(self):
        print(self.g.summary())
        
    def get_result(self, prcntg: bool = True):
        print(f"""
    GageRnR: {round(self.GageRnR(prcntg), 2)}{'%' if prcntg else ''}
    EV: {round(self.EV(prcntg), 2)}{'%' if prcntg else ''}
    TechVar: {round(self.TechVar(prcntg), 2)}{'%' if prcntg else ''}
    P2P: {round(self.P2P(prcntg), 2)}{'%' if prcntg else ''}
    Total_Var: {round(self.Total_Var(prcntg), 2)} {'%' if prcntg else ''}
              """)
        
    def ANOVA(self):
        data = [
            [self.GageRnR(False), self.GageRnR()],
            [self.EV(False), self.EV()],
            [self.TechVar(False), self.TechVar()],
            [self.P2P(False), self.P2P()],
            [self.Total_Var(False), self.Total_Var()]
        ]
        df = pd.DataFrame(data, columns=['      Variance', 'Variance / Total Variance, %'])
        df.index = ['GageRnR', 'EV', 'TechVar', 'P2P', 'Total_Var']
        return df

In [11]:
main_folder = Path('..')
users_folder1 = Path('../msa/exp/1')

csv_path1 = Path('backend/research/experiments.csv')
csv_path2 = Path('backend/research/data/exp2.csv')
users_folder2 = Path('../msa/exp/2')

metrics = EvaluateMetrics(metrics=[EPorosity, IoU_pores], border_metrics=[F1_binary, IoU_pores], n_diameter=7)
data = MeasurementsData(
    users_folder1, csv_path1, main_folder, metrics, part_col='Image'
)
# data.delete_attempt_from_parts_names(part_col='Parts')
#'Porosity relative difference',    'IoU_pores', 'F1 binary_7', 'IoU_pores_7']
df = data.convert_to_csv(Path('table.csv'), 'Annotation_accuracy')
df.head()
# data.df

ERROR:root:Users data is not loaded!


Unnamed: 0,Operator,Parts,attempt_1,attempt_2,attempt_3
0,A,ex0_300,0.277412,0.293515,0.30192
1,A,ex3_300,0.146538,0.099781,0.125287
2,A,ex1_300,0.359065,0.414201,0.377572
3,A,ex2_300,0.132265,0.107068,0.081827
4,B,ex0_300,0.336635,0.344507,0.322394


In [12]:
msa = MSA().from_csv(path_to_csv='table.csv')
prcntg = True
msa.ANOVA()
# msa.get_result(prcntg=prcntg)
#msa.GageRnR(prcntg=prcntg), msa.EV(prcntg=prcntg), msa.TechVar(prcntg=prcntg), msa.P2P(prcntg=prcntg), msa.Total_Var(prcntg=prcntg)

Unnamed: 0,Variance,"Variance / Total Variance, %"
GageRnR,0.01012,47.36896
EV,0.00086,4.023396
TechVar,0.01251,58.559921
P2P,0.007994,37.416683
Total_Var,0.021364,100.0


In [18]:
msa.summary()

╒═══════════════════════╤══════╤═════════╤════════╤═══════╤═══════╤═══════════╤═══════════╕
│ Sources of Variance   │   DF │      SS │     MS │   Var │   Std │ F-value   │ P-value   │
╞═══════════════════════╪══════╪═════════╪════════╪═══════╪═══════╪═══════════╪═══════════╡
│ Operator              │    3 │ 121.296 │ 40.432 │ 2.938 │ 1.714 │ 7.810     │ 0.007     │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼───────────┤
│ Part                  │    3 │  46.962 │ 15.654 │ 0.873 │ 0.934 │ 3.024     │ 0.086     │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼───────────┤
│ Operator by Part      │    9 │  46.594 │  5.177 │ 0.407 │ 0.638 │ 1.459     │ 0.205     │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼───────────┤
│ Measurement           │   32 │ 113.52  │  3.547 │ 3.547 │ 1.883 │           │           │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼─

In [15]:
print(msa.g.summary())

╒═══════════════════════╤══════╤═════════╤════════╤═══════╤═══════╤═══════════╤═══════════╕
│ Sources of Variance   │   DF │      SS │     MS │   Var │   Std │ F-value   │ P-value   │
╞═══════════════════════╪══════╪═════════╪════════╪═══════╪═══════╪═══════════╪═══════════╡
│ Operator              │    2 │   0.1   │  0.05  │ 0.001 │ 0.029 │ 2.013     │ 0.163     │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼───────────┤
│ Part                  │    9 │ 116.529 │ 12.948 │ 1.436 │ 1.198 │ 521.610   │ 0.000     │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼───────────┤
│ Operator by Part      │   18 │   0.447 │  0.025 │ 0     │ 0     │ 0.413     │ 0.980     │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼───────────┤
│ Measurement           │   60 │   3.606 │  0.06  │ 0.06  │ 0.245 │           │           │
├───────────────────────┼──────┼─────────┼────────┼───────┼───────┼───────────┼─

In [17]:
msa.result[Result.DF][Component.MEASUREMENT]

60

In [16]:
msa.sgm2_rpt

0.06009666666666666

In [61]:
a = NormalDist(mu=100, sigma=0.1)
b = NormalDist(mu=100, sigma=0.3)
c = NormalDist(mu=30, sigma=0.2)
data = np.array([[detail.samples(3)
                 for detail in [a, b, c]]
                 for __ in range(4)])

In [62]:
msa = MSA().from_array(data=data)
prcntg = True
msa.GageRnR(prcntg=prcntg), msa.EV(prcntg=prcntg), msa.TechVar(prcntg=prcntg), msa.P2P(prcntg=prcntg), msa.Total_Var(prcntg=prcntg)

(0.003068732266719593,
 0.003068732266719593,
 0.0005258433260691697,
 99.99640542440721,
 100.0)

In [56]:
print(msa.g.summary())

╒═══════════════════════╤══════╤═══════╤═══════╤═══════╤═══════╤═══════════╤═══════════╕
│ Sources of Variance   │   DF │    SS │    MS │   Var │   Std │ F-value   │ P-value   │
╞═══════════════════════╪══════╪═══════╪═══════╪═══════╪═══════╪═══════════╪═══════════╡
│ Operator              │    3 │ 0.019 │ 0.006 │ 0     │ 0     │ 0.320     │ 0.811     │
├───────────────────────┼──────┼───────┼───────┼───────┼───────┼───────────┼───────────┤
│ Part                  │    3 │ 0.051 │ 0.017 │ 0     │ 0     │ 0.842     │ 0.505     │
├───────────────────────┼──────┼───────┼───────┼───────┼───────┼───────────┼───────────┤
│ Operator by Part      │    9 │ 0.18  │ 0.02  │ 0.003 │ 0.052 │ 2.226     │ 0.047     │
├───────────────────────┼──────┼───────┼───────┼───────┼───────┼───────────┼───────────┤
│ Measurement           │   32 │ 0.288 │ 0.009 │ 0.009 │ 0.095 │           │           │
├───────────────────────┼──────┼───────┼───────┼───────┼───────┼───────────┼───────────┤
│ Total              

In [49]:
msa.GageRnR(), msa.EV(), msa.TechVar(), msa.P2P(), msa.Total_Var()

(100.0, 100.0, 0.0, 0.0, 100.0)

In [44]:
data.shape

(4, 4, 3)

In [73]:
g = GageRnR(data)
g.calculate()
print(g.summary())

╒═══════════════════════╤══════╤════════╤═══════╤═══════╤═══════╤═══════════╤═══════════╕
│ Sources of Variance   │   DF │     SS │    MS │   Var │   Std │ F-value   │ P-value   │
╞═══════════════════════╪══════╪════════╪═══════╪═══════╪═══════╪═══════════╪═══════════╡
│ Operator              │    3 │  0.501 │ 0.167 │ 0     │ 0     │ 0.155     │ 0.924     │
├───────────────────────┼──────┼────────┼───────┼───────┼───────┼───────────┼───────────┤
│ Part                  │    3 │ 10.488 │ 3.496 │ 0.201 │ 0.449 │ 3.236     │ 0.075     │
├───────────────────────┼──────┼────────┼───────┼───────┼───────┼───────────┼───────────┤
│ Operator by Part      │    9 │  9.722 │ 1.08  │ 0.024 │ 0.154 │ 1.097     │ 0.393     │
├───────────────────────┼──────┼────────┼───────┼───────┼───────┼───────────┼───────────┤
│ Measurement           │   32 │ 31.525 │ 0.985 │ 0.985 │ 0.993 │           │           │
├───────────────────────┼──────┼────────┼───────┼───────┼───────┼───────────┼───────────┤
│ Total   

In [46]:
result = g.calculate()

In [51]:
g.measurements

3

In [47]:
result[Result.MS][Component.OPERATOR_BY_PART]

0.8749324015178365