In [1]:
import pandas as pd
import numpy as np

## Initial DF Setup

In [15]:
### Setting the size of the gage r&r
num_parts = 10 #input(How many parts ran?)
num_trials = 3 #input(How many trials per part?)
num_operators = 3 #input(How many operators?)

In [17]:
### Reading the data in
df_raw = pd.read_csv('RawData.csv') # creating the variable df to present the data frame
df_raw.head(3)

Unnamed: 0,DateTime,SerialNum,CharName,X Pos,Y Pos,Z Pos,TP Measure,Diameter,Length,PassTP,PassDiameter,PassLength,PassTot
0,06324_131250,EXT1C0070008018000277700000063332611A,C2_3,-48.127,95.77,0.0,0.283,6.989,10.883,True,True,True,True
1,06324_131250,EXT1C0070008018000277700000063332611A,C4A,-99.079,-12.134,0.0,0.079,7.896,0.0,True,False,True,False
2,06324_131250,EXT1C0070008018000277700000063332611A,C4B,-99.059,-108.026,0.0,0.097,7.906,0.0,True,True,True,True


In [21]:
### Simplify the serial number

# With a new column, extracting the simple serial number 
df_raw['SN'] = df_raw['SerialNum'].str[27:30]  # Indexing starts at 0, so the 28th character is at index 27

df_raw.head(1)


Unnamed: 0,DateTime,SerialNum,CharName,X Pos,Y Pos,Z Pos,TP Measure,Diameter,Length,PassTP,PassDiameter,PassLength,PassTot,SN
0,06324_131250,EXT1C0070008018000277700000063332611A,C2_3,-48.127,95.77,0.0,0.283,6.989,10.883,True,True,True,True,63


In [34]:
### Verify Data is Complete

# Verify unique serial numbers matches the number of parts required
if df_raw['SN'].unique().size == num_parts:
    print(f'Serial numbers match the number of parts required')
else:
    print(f'ERROR: numbers DO NOT match the number of parts required')

# Verify all measurments are present
#if df_raw['TP Measure'] == "UTM":
#    print(f'UTM Code: Not all measurements present, see index')



Serial numbers match the number of parts required


## Translating True Position

### Working with one char before iterating

In [41]:
### Working with one characteristic before iterating

# New data frame that only contains DateTime, CharName, and TP Measure
df_Char_TP = df_raw[['DateTime', 'CharName', 'TP Measure']]
    # Keep the Datetime to esnure the order of measurements

# Filtering on a single characteristic, in this case C2_3
df_single_char = df_Char_TP[df_Char_TP['CharName'] == 'C2_3']

len(df_single_char)

# Verify the number of measurements matches the number of parts, operators, trials required
if len(df_single_char) == num_parts*num_trials*num_operators: # len() returns the number of rows
    print('Number of measurements matches the gage r&r size')
else:
    print('ERROR: Number of measurements DOES NOT matches the gage r&r size')

df_single_char.head(3)


Number of measurements matches the gage r&r size


Unnamed: 0,DateTime,CharName,TP Measure
0,06324_131250,C2_3,0.283
31,06324_131539,C2_3,0.152
62,06324_131716,C2_3,0.165


In [53]:
### Translating Dataframe

# Renaming the TP Measure column
df_single_char_trans = df_single_char.rename(columns={'TP Measure': 'Char_23_TP'})
    # Pandas warning about using parameter inplace, need to set the DF equal to itself

# Dropping CharName
df_single_char_trans.drop(columns='CharName', inplace=True)

df_single_char_trans.head(3)


Unnamed: 0,DateTime,Char_23_TP
0,06324_131250,0.283
31,06324_131539,0.152
62,06324_131716,0.165


In [None]:
### Creating the column for the operator


In [5]:
### Building upon the above with iterating through the whole data frame

# Pull the different characteristics from CharName, without duplicates
char_names = df_Char_TP['CharName'].unique()
#print(char_names) # test
#print(type(char_names)) # test

# Iterate through by characteristics and generate separate data frames based CharName/TP
    # Have to index through char_names to select the specific character from df_Char_TP
print(char_names[0]) # so use the standard index syntax
    # Select character based on the index
df_single_char = df_Char_TP[df_Char_TP['CharName'] == char_names[0]] # filtering based on the index of char_names
# df_single_char # test
char_range = df_single_char['TP Measure'].max() - df_single_char['TP Measure'].min()
print(f'The range of {char_names[0]} is {char_range}')
    # Iterate
for cname in char_names:
    df_single_char = df_Char_TP[df_Char_TP['CharName'] == cname]
    char_range = df_single_char['TP Measure'].max() - df_single_char['TP Measure'].min()
    print(f'The range of {cname} is {char_range}')



C2_3
The range of C2_3 is 0.28300000000000003
The range of C2_3 is 0.28300000000000003
The range of C4A is 0.548
The range of C4B is 0.40399999999999997
The range of C4C is 0.351
The range of C4D is 0.296
The range of C4E is 0.269
The range of C4F is 0.29100000000000004
The range of C4G is 0.338
The range of C4H is 0.349
The range of C11 is 0.195
The range of C13A is 0.45899999999999996
The range of C13B is 0.6619999999999999
The range of C17A is 0.771
The range of C17B is 0.938
The range of C18A is 0.314
The range of C18B is 1.257
The range of C26A is 1.2730000000000001
The range of C26B is 1.031
The range of C26C is 0.594
The range of C26D is 1.321
The range of C27A is 1.119
The range of C27B is 0.986
The range of C27C is 0.7939999999999999
The range of C27D is 0.8240000000000001
The range of C31A is 0.0
The range of C37A is 0.0
The range of C50A is 0.0
The range of C31B is 0.0
The range of C37B is 0.0
The range of C50B is 0.0
The range of C52 is 0.0


In [None]:
### Cleaning up the above into clean code

# New data frame that only contains CharName and TP Measure
df_Char_TP = df[['CharName', 'TP Measure']]

# Pull the different characteristics from CharName, without duplicates
char_names = df_Char_TP['CharName'].unique() # pulls unique values

# Iterate through by characteristics and generate the ranges
for cname in char_names:
    df_single_char = df_Char_TP[df_Char_TP['CharName'] == cname] # filters based on character
    char_range = df_single_char['TP Measure'].max() - df_single_char['TP Measure'].min() # Calculates the range
    print(f'The range of {cname} is {char_range}') # prints the range


The range of C2_3 is 0.28300000000000003
The range of C4A is 0.548
The range of C4B is 0.40399999999999997
The range of C4C is 0.351
The range of C4D is 0.296
The range of C4E is 0.269
The range of C4F is 0.29100000000000004
The range of C4G is 0.338
The range of C4H is 0.349
The range of C11 is 0.195
The range of C13A is 0.45899999999999996
The range of C13B is 0.6619999999999999
The range of C17A is 0.771
The range of C17B is 0.938
The range of C18A is 0.314
The range of C18B is 1.257
The range of C26A is 1.2730000000000001
The range of C26B is 1.031
The range of C26C is 0.594
The range of C26D is 1.321
The range of C27A is 1.119
The range of C27B is 0.986
The range of C27C is 0.7939999999999999
The range of C27D is 0.8240000000000001
The range of C31A is 0.0
The range of C37A is 0.0
The range of C50A is 0.0
The range of C31B is 0.0
The range of C37B is 0.0
The range of C50B is 0.0
The range of C52 is 0.0


To Do:
-test to see if the ranges are accurate
-generate a new csv file or some kind of visualization
-conduct for feature size
-add addition statistics that would be for a gage rr
-in final code create functions to perform the various statistics
-control the number of decimal places on the output

Full program
-enter the gage rr trial.. is it a 1x1x10 or a 3x3x10 ect
    -test the data to confirm selection
-enter the tolerance limits of each characteristic
    -user input or a csv input?
        -maybe after determining the characteristics from the data iterate and ask for each tolerance
            -the idea is to keep the program universal to any part
-spit out results for each characteristic, how? a tab each in excel? In a notebook?

-take the math and create a library, tinyTAB
    - .cpk(data, char, ect, ect)



In [None]:
### Same as above but for standard deviation

# New data frame that only contains CharName and TP Measure
df_Char_TP = df[['CharName', 'TP Measure']]

# Pull the different characteristics from CharName, without duplicates
char_names = df_Char_TP['CharName'].unique() # pulls unique values

# Iterate through by characteristics and generate the ranges
for cname in char_names:
    df_single_char = df_Char_TP[df_Char_TP['CharName'] == cname] # filters based on character
    char_range = df_single_char['TP Measure'].std() # calculate the standard deviation
    print(f'The standard deviation of {cname} is {char_range}') # prints the range

The standard deviation of C2_3 is 0.06942778876418394
The standard deviation of C4A is 0.12484620451388617
The standard deviation of C4B is 0.10353614807183555
The standard deviation of C4C is 0.08667579972044405
The standard deviation of C4D is 0.08285895616485335
The standard deviation of C4E is 0.07422157080683289
The standard deviation of C4F is 0.07428240222891877
The standard deviation of C4G is 0.09016542987237355
The standard deviation of C4H is 0.08024973582433251
The standard deviation of C11 is 0.048589119120219716
The standard deviation of C13A is 0.11724667169038067
The standard deviation of C13B is 0.1569422932028053
The standard deviation of C17A is 0.09073559563152017
The standard deviation of C17B is 0.17415064474806657
The standard deviation of C18A is 0.07425321003834616
The standard deviation of C18B is 0.21990242870943835
The standard deviation of C26A is 0.3575121640457834
The standard deviation of C26B is 0.27308737654999965
The standard deviation of C26C is 0.17

Resources for Cpk
-https://www.linkedin.com/pulse/calculating-cp-cpk-uma-maheswari-manchala-2c/
-https://www.geeksforgeeks.org/process-capability-index-cpk-formula/



In [None]:
# Creating a Pandas series
data = [1, 2, 3, 4, 5]
s = pd.Series(data)

# Computing the mean of a Pandas series
mean = s.mean()
print(mean)



## Add operators to the data

In [2]:
# Need to add operator
# For this dataframe the first 10 instances is operator one and the next ten is operator 2..

# li = []
# i = 0
# li.append(1)
# print(li)

li = []
i = 0
while i <= 90:
    if i <= 30:
        li.append(1)
        i+=1
    elif i <= 60:
        li.append(2)
        i+=1
    elif i <= 90:
        li.append(3)
        i+=1

print(li)


[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
