In [3]:
from importlib import reload

import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm

import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib.cm as cm

import re

import pickle
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

  data_klasses = (pandas.Series, pandas.DataFrame, pandas.Panel)


In [4]:
import probscale
import seaborn as sns

In [5]:
import sys
sys.path.append('C:/dstools')
import sig_test

In [6]:
from pyhive import presto
req_kw = {
  'verify': 'C:/presto/Presto_JDBC_Driver/WDC_CA_bundle.pem'
}

# ETL Script

In [7]:
def get_ghl2_table (table, searchCond, sel, whereAdded = None, fName=None):
    
    con = presto.connect(
      host='bdp-e2e-presto.wdc.com',
      port=8446,
      protocol='https',
      catalog='hive',
      username='gaku.kiuchi@wdc.com',
      requests_kwargs=req_kw
    )

    batchsize = 100

    df = pd.DataFrame({})
    df_sn = pd.read_csv(searchCond[0])
    for strt in range(0, df_sn['sn'].shape[0], batchsize):
        sql = "select %s " % sel
        sql += "from ghl2."+table + " "
        sql += "where product='pdq' "
        sql += "and testcode in (%s) "  % re.sub('[\[\]]', '', '%s' % searchCond[1])
        sql += "and enddt between '%s' and '%s' " % (searchCond[2][0], searchCond[2][1]) 
        #sql += "and hddtrial in (%s) "  % re.sub('[\[\]]', '', '%s' % searchCond[3])    
        sql += "and hddsn in (%s) "     % re.sub('[\[\]]', '', '%s' % list(df_sn['sn'].values)[strt:strt+batchsize])
        sql += "and procid in (%s) "    % re.sub('[\[\]]', '', '%s' % searchCond[4])
        sql += "and qualifier in (%s) " % re.sub('[\[\]]', '', '%s' % searchCond[5])
        sql += "and pfcode not in ('9999', '99V6', '99V7') "
        if whereAdded != None:
            sql += "and %s " % whereAdded
        print ('current sql is %s' % sql)
        df = df.append( pd.read_sql(sql, con) )
        print(df.shape)

    con.close()
    
    if fName == None:
        df.to_pickle('df_%s.pkl'%table)
        df.to_csv('df_%s.csv'%table, index=False)
    else:
        df.to_pickle('%s.pkl'%fName)
        df.to_csv('%s.csv'%fName, index=False)
    return(df)

In [8]:
def get_vqaa_table (table, searchCond, sel, whereAdded = None):
    
    con = presto.connect(
      host='bdp-e2e-presto.wdc.com',
      port=8446,
      protocol='https',
      catalog='hive',
      username='gaku.kiuchi@wdc.com',
      requests_kwargs=req_kw
    )

    batchsize = 100

    df = pd.DataFrame({})
    df_sn = pd.read_csv(searchCond[0])
    for strt in range(0, df_sn['sn'].shape[0], batchsize):
        sql = "select %s " % sel
        sql += "from vqaa."+table + " "
        sql += "where product='pdq' "
        sql += "and testpgmver in (%s) "  % re.sub('[\[\]]', '', '%s' % searchCond[1])
        sql += "and enddt between '%s' and '%s' " % (searchCond[2][0], searchCond[2][1]) 
        sql += "and hddtrial in (%s) "  % re.sub('[\[\]]', '', '%s' % searchCond[3])    
        sql += "and hddsn in (%s) "     % re.sub('[\[\]]', '', '%s' % list(df_sn['sn'].values)[strt:strt+batchsize])
        sql += "and procid in (%s) "    % re.sub('[\[\]]', '', '%s' % searchCond[4])
        #sql += "and qualifier in (%s) " % re.sub('[\[\]]', '', '%s' % searchCond[5])
        #sql += "and pfcode not in ('9999', '99V6', '99V7') "
        if whereAdded != None:
            sql += "and %s " % whereAdded
        print ('current sql is %s' % sql)
        df = df.append( pd.read_sql(sql, con) )
        print(df.shape)

    con.close()
        
    df.to_pickle('df_%s.pkl'%table)
    df.to_csv('df_%s.csv'%table, index=False)
    return(df)

In [9]:
def get_sdet_table (table, searchCond, sel, whereAdded = None):
    
    con = presto.connect(
      host='bdp-e2e-presto.wdc.com',
      port=8446,
      protocol='https',
      catalog='hive',
      username='gaku.kiuchi@wdc.com',
      requests_kwargs=req_kw
    )

    batchsize = 100

    df = pd.DataFrame({})
    df_sn = pd.read_csv(searchCond[0])
    for strt in range(0, df_sn['slidersn'].shape[0], batchsize):
        sql = "select %s " % sel
        sql += "from hive."+table + " "
        sql += "where "
        sql += "storeday between '%s' and '%s' " % (searchCond[1][0], searchCond[1][1])
        sql += "and slidersn in (%s) " % re.sub('[\[\]]', '', '%s' % list(df_sn['slidersn'].values)[strt:strt+batchsize])
        if whereAdded != None:
            sql += "and %s " % whereAdded
        print ('current sql is %s' % sql)
        df = df.append( pd.read_sql(sql, con) )
        print(df.shape)

    con.close()
        
    df.to_pickle('df_%s.pkl'%table)
    df.to_csv('df_%s.csv'%table, index=False)
    return(df)

# ETL

In [10]:
#parameters
stpi      = 580.0
dtpi_cmr  = 500.0
dtpi_smr  = 602.0
tgtLbaCmr = 273906721.0
tgtLbaSmr = 333662709.0

In [11]:
#peak current

def IwPeak4G4(iw, ka, kd):
    return((-0.013954886*ka + 1.128161219) * iw + (1.973010793 * ka -1.407847328))

def IwPeak4G4i(iw, ka, kd):
    return((-0.015470889*ka + 1.684161818) * iw + (1.335027916 * ka +0.387637386))


In [12]:
def get_nth_char (strList, index=0):
    a = []
    for q in list(strList):
        a.append(q[index])
    return(a)

In [13]:
snlist_csv   = 'sn_a2a.csv'
listTestcode = ['PDQX083M']
listEnddt    = ['20201223', '20210101']
listHddTrial = ['Z07E', 'NPIF']

# Head Map Check

In [12]:
#Head Map
inputParamGhl2Rmr = [snlist_csv, 
                     listTestcode,
                     listEnddt, 
                     listHddTrial,
                     ['6400'],
                     ['10N0']
                    ]
dfHeadMap = get_ghl2_table('ccb_ci_rmr', inputParamGhl2Rmr, 
                           'hddsn, testcode, mfgid, mfgid_5, hddtrial, lhd, phd, \
                            row_number() over ( partition by hddsn, testcode, lhd order by enddate desc) as row_num')
dfHeadMap.to_pickle('dfHeadMap.pkl')
dfHeadMap = pd.read_pickle('dfHeadMap.pkl').query("row_num==1")
dfHeadMap

current sql is select hddsn, testcode, mfgid, mfgid_5, hddtrial, lhd, phd,                             row_number() over ( partition by hddsn, testcode, lhd order by enddate desc) as row_num from ghl2.ccb_ci_rmr where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('10N0') and pfcode not in ('9999', '99V6', '99V7') 
(28, 8)


Unnamed: 0,hddsn,testcode,mfgid,mfgid_5,hddtrial,lhd,phd,row_num
0,2FA02DNA,PDQX083M,KJ1C03,3,Z07E,0,9,1
1,2FA06G5A,PDQX083M,KJ1C03,3,NPIF,1,10,1
2,2FA00WRA,PDQX083M,KJ1C03,3,Z07E,0,9,1
3,2FA06H1A,PDQX083M,KJ1C03,3,NPIF,1,10,1
4,2FA02E8A,PDQX083M,KJ1C03,3,Z07E,0,9,1
5,2FA00WRA,PDQX083M,KJ1C03,3,Z07E,1,10,1
6,2FA06H1A,PDQX083M,KJ1C03,3,NPIF,0,9,1
7,2FA01EHA,PDQX083M,KJ1C03,3,Z07E,1,10,1
8,2FA06GNA,PDQX083M,KJ1C03,3,NPIF,0,9,1
9,2FA06G3A,PDQX083M,KJ1C03,3,NPIF,1,10,1


In [13]:
pd.pivot_table(dfHeadMap.query("lhd==0"), index=['hddsn', 'mfgid_5'], columns=['testcode'], values='hddtrial', aggfunc='count')

Unnamed: 0_level_0,testcode,PDQX083M
hddsn,mfgid_5,Unnamed: 2_level_1
2FA00WRA,3,1
2FA01E4A,3,1
2FA01EHA,3,1
2FA02DKA,3,1
2FA02DNA,3,1
2FA02E8A,3,1
2FA02EAA,3,1
2FA02EUA,3,1
2FA06EXA,3,1
2FA06G3A,3,1


# SER, OW, Pbo

In [34]:
## SER ##
inputParamGhl2Ser = [snlist_csv, 
                     listTestcode,
                     listEnddt, 
                     listHddTrial,
                     ['6400', '6600', '6800'],
                     ['1000', '1010', 
                      '2000', '2010',
                      '30N0', '3050', '3060', 
                      '40N0', '4050', '4060', 
                      '9070', '9050', '9060', 
                      'K070', 'K050', 'K060',
                      'B0N0', 'B010', 'B020', 
                      'M0N0', 'M010', 'M020',
                     ]
                     #['1000', '1010', '1020', 
                     # '10A0',
                     # '30N0', '3050', '3060']
                    ]
dfSer = get_ghl2_table('ccb_ci_ser', inputParamGhl2Ser, 
                       'hddsn, testcode, qualifier, mfgid, hddtrial, lhd, phd, band, ser, offset, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num ',
                       'band < 64'
                      )
dfSer

current sql is select hddsn, testcode, qualifier, mfgid, hddtrial, lhd, phd, band, ser, offset,                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num  from ghl2.ccb_ci_ser where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600', '6800') and qualifier in ('1000', '1010', '2000', '2010', '30N0', '3050', '3060', '40N0', '4050', '4060', '9070', '9050', '9060', 'K070', 'K050', 'K060', 'B0N0', 'B010', 'B020', 'M0N0', 'M010', 'M020') and pfcode not in ('9999', '99V6', '99V7') and band < 64 
(37120, 11)


Unnamed: 0,hddsn,testcode,qualifier,mfgid,hddtrial,lhd,phd,band,ser,offset,row_num
0,2FA00WRA,PDQX083M,3050,KJ1C03,Z07E,1,10,18,-2.13,-129,1
1,2FA00WRA,PDQX083M,3060,KJ1C03,Z07E,0,9,18,-1.95,-63,1
2,2FA00WRA,PDQX083M,4060,KJ1C03,Z07E,0,9,55,-2.09,13,1
3,2FA00WRA,PDQX083M,9060,KJ1C03,Z07E,0,9,53,-2.09,75,1
4,2FA00WRA,PDQX083M,B010,KJ1C03,Z07E,0,9,34,-1.91,52,1
...,...,...,...,...,...,...,...,...,...,...,...
37115,2FA06H1A,PDQX083M,9070,KJ1C03,NPIF,0,9,0,-1.97,-81,1
37116,2FA06H1A,PDQX083M,K060,KJ1C03,NPIF,0,9,28,-1.53,-70,1
37117,2FA06H1A,PDQX083M,K070,KJ1C03,NPIF,0,9,40,-2.00,-11,1
37118,2FA06H4A,PDQX083M,9060,KJ1C03,NPIF,1,10,12,-1.60,-58,1


In [15]:
## Roller SER ##
inputParamGhl2SerRoller = [snlist_csv, 
                           listTestcode,
                           listEnddt, 
                           listHddTrial,
                           ['6600', '6800'],
                           ['9040', 'B000', 'K040', 'M000']
                          ]
dfSerRoller = get_ghl2_table('ccb_ci_ser', inputParamGhl2SerRoller, 
                       'hddsn, testcode, qualifier, mfgid, hddtrial, lhd, phd, band, offset, \
                        aveperband, maxperband, minperband, rawmodulationstats0_average as aveperband0, rawmodulationstats0_max as maxperband0, rawmodulationstats0_min as minperband0, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num ',
                       'band < 64',
                       fName='df_ser_roller'
                      )
dfSerRoller

current sql is select hddsn, testcode, qualifier, mfgid, hddtrial, lhd, phd, band, offset,                         aveperband, maxperband, minperband, rawmodulationstats0_average as aveperband0, rawmodulationstats0_max as maxperband0, rawmodulationstats0_min as minperband0,                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num  from ghl2.ccb_ci_ser where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6600', '6800') and qualifier in ('9040', 'B000', 'K040', 'M000') and pfcode not in ('9999', '99V6', '99V7') and band < 64 
(104, 16)


Unnamed: 0,hddsn,testcode,qualifier,mfgid,hddtrial,lhd,phd,band,offset,aveperband,maxperband,minperband,aveperband0,maxperband0,minperband0,row_num
0,2FA02EUA,PDQX083M,M000,KJ1C03,Z07E,1,10,0,-107,-0.953717,-0.897335,-1.010669,-0.95,-0.89,-1.01,1
1,2FA02EUA,PDQX083M,K040,KJ1C03,Z07E,0,9,0,-158,-0.922196,-0.860727,-0.971677,-0.92,-0.86,-0.97,1
2,2FA00WRA,PDQX083M,K040,KJ1C03,Z07E,0,9,0,13,-0.758964,-0.687223,-0.817021,-0.75,-0.68,-0.81,1
3,2FA01EHA,PDQX083M,9040,KJ1C03,Z07E,0,9,0,-196,-1.045797,-0.983576,-1.202662,-1.04,-0.98,-1.20,1
4,2FA06EXA,PDQX083M,K040,KJ1C03,NPIF,1,10,0,321,-0.907694,-0.841458,-1.019577,-0.90,-0.84,-1.01,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,2FA06H4A,PDQX083M,B000,KJ1C03,NPIF,0,9,0,-159,-0.894293,-0.815097,-1.097663,-0.89,-0.81,-1.09,1
100,2FA02EAA,PDQX083M,9040,KJ1C03,Z07E,0,9,0,-197,-1.041509,-0.973302,-1.172008,-1.04,-0.97,-1.17,1
101,2FA06H4A,PDQX083M,M000,KJ1C03,NPIF,0,9,0,-103,-0.880439,-0.814313,-0.954642,-0.88,-0.81,-0.95,1
102,2FA02E8A,PDQX083M,M000,KJ1C03,Z07E,0,9,0,-40,-0.911112,-0.862313,-0.975347,-0.91,-0.86,-0.97,1


In [16]:
## OW ##
inputParamGhl2Ow = [snlist_csv, 
                    listTestcode,
                    listEnddt, 
                    listHddTrial,
                    ['6400', '6600', '6800'],
                    ['11N0', '21N0', '31N0', '41N0', '91H0', 'K1H0', 'B0N0', 'M0N0']
                   ]
dfOw = get_ghl2_table('ccb_ci_ow', inputParamGhl2Ow, 
                      'hddsn, enddt, pfcode, mfgid, hddtrial, testcode, qualifier, lhd, phd, band, owvalperp, owvalmaxperp, owvalconv, owvalmaxconv, \
                       row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num',
                      'band < 3'
                     )
dfOw = pd.read_pickle('df_ccb_ci_ow.pkl').query("band < 3 and row_num==1")


current sql is select hddsn, enddt, pfcode, mfgid, hddtrial, testcode, qualifier, lhd, phd, band, owvalperp, owvalmaxperp, owvalconv, owvalmaxconv,                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_ow where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600', '6800') and qualifier in ('11N0', '21N0', '31N0', '41N0', '91H0', 'K1H0', 'B0N0', 'M0N0') and pfcode not in ('9999', '99V6', '99V7') and band < 3 
(636, 15)


In [17]:
## PBO ##
inputParamGhl2Ow = [snlist_csv, 
                    listTestcode,
                    listEnddt, 
                    listHddTrial,
                    ['6400'],
                    ['3070', '4070']
                   ]
dfPbo = get_ghl2_table('ccb_mi_pbo', inputParamGhl2Ow, 
                      'hddsn, enddt, pfcode, mfgid, hddtrial, testcode, qualifier, lhd, phd, band, mRwSerPerRWNmSlope, mRwSerPerRWDacOffset, \
                       row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num',
                      'band = 0 '
                     )
dfPbo

current sql is select hddsn, enddt, pfcode, mfgid, hddtrial, testcode, qualifier, lhd, phd, band, mRwSerPerRWNmSlope, mRwSerPerRWDacOffset,                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_mi_pbo where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('3070', '4070') and pfcode not in ('9999', '99V6', '99V7') and band = 0  
(52, 13)


Unnamed: 0,hddsn,enddt,pfcode,mfgid,hddtrial,testcode,qualifier,lhd,phd,band,mRwSerPerRWNmSlope,mRwSerPerRWDacOffset,row_num
0,2FA02E8A,20201223,0,KJ1C03,Z07E,PDQX083M,4070,0,9,0,120.58131,-218.32483,1
1,2FA00WRA,20201223,0,KJ1C03,Z07E,PDQX083M,4070,0,9,0,273.20517,-205.10011,1
2,2FA01E4A,20201223,0,KJ1C03,Z07E,PDQX083M,3070,0,9,0,338.6328,-236.56519,1
3,2FA06EXA,20201229,0,KJ1C03,NPIF,PDQX083M,4070,0,9,0,366.99023,-218.17802,1
4,2FA02EUA,20201223,0,KJ1C03,Z07E,PDQX083M,3070,1,10,0,312.5297,-239.75557,1
5,2FA02EAA,20201223,0,KJ1C03,Z07E,PDQX083M,3070,0,9,0,291.38586,-216.12596,1
6,2FA01EHA,20201223,0,KJ1C03,Z07E,PDQX083M,4070,0,9,0,112.130325,-220.79355,1
7,2FA02E8A,20201223,0,KJ1C03,Z07E,PDQX083M,3070,0,9,0,293.7554,-219.79845,1
8,2FA06EXA,20201229,0,KJ1C03,NPIF,PDQX083M,3070,0,9,0,407.94778,-227.1621,1
9,2FA00WRA,20201223,0,KJ1C03,Z07E,PDQX083M,3070,0,9,0,308.58115,-213.58914,1


In [18]:
#TSW2 spec
# 0.0457 * OWC_Worst + 904_ser_roller_max + (0.16+0.194)*u407_rwser_rw_slp_nm_od*1e-3 > -1.5537
#TSWH spec
# 0.0065 * OWC_Worst + k04_ser_roller_max + (0.16+0.194)*u407_rwser_rw_slp_nm_od*1e-3 > -0.66903
coef = [0.0457, 0.0065];      #CMR, SMR
thresh = [-1.5537, -1.6286, -0.66903, -0.68404]; #CMR for SRST, CMR for Final, SMR for SRST, SMR for Final
dfh4S  = 0.16+0.194;     #nm, for SRST
dfh4F  = 0.324;          #nm, for Final

#OW
dfOw = pd.read_pickle('df_ccb_ci_ow.pkl').query("row_num==1 and band==0")
dfOw.loc[:,'firstQual'] = get_nth_char(list(dfOw.qualifier), index=0)
#SER
dfSer = pd.read_pickle('df_ser_roller.pkl').query("row_num==1 and band==0")
dfSer.loc[:,'firstQual'] = get_nth_char(list(dfSer.qualifier), index=0)
#Pbo
dfPbo = pd.read_pickle('df_ccb_mi_pbo.pkl').query("row_num==1 and band==0")

dfTsw = dfOw.query("band==0 and qualifier in ['91H0', 'K1H0', 'B0N0', 'M0N0']").merge(dfSer.query("qualifier in ['9040', 'K040', 'B000', 'M000']"), 
                                                                   left_on =['hddsn', 'mfgid','hddtrial', 'testcode', 'firstQual', 'lhd', 'phd', 'band', 'row_num'],
                                                                   right_on=['hddsn', 'mfgid','hddtrial', 'testcode', 'firstQual', 'lhd', 'phd', 'band', 'row_num'],
                                                                   suffixes=['_ow', '_ser'],
                                                                   how='inner'
                                                                  )
dfTsw=dfTsw.merge(dfPbo,
                  left_on =['hddsn', 'mfgid','hddtrial', 'testcode', 'lhd', 'phd', 'band', 'row_num'],
                  right_on=['hddsn', 'mfgid','hddtrial', 'testcode', 'lhd', 'phd', 'band', 'row_num'],
                  suffixes=['', '_pbo'],
                  how='left'
                 )

#ser with dfh
#SRST
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('9') | dfTsw.qualifier_ser.str.startswith('K'), 'dfh'] = dfh4S
#Final
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('B') | dfTsw.qualifier_ser.str.startswith('M'), 'dfh'] = dfh4F
dfTsw.loc[:,'ser_for_tsw_metric'] = dfTsw.loc[:,'maxperband'].values + dfTsw.loc[:,'mRwSerPerRWNmSlope'].values*dfTsw.loc[:,'dfh'].values*1e-3

#coef
#CMR
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('9') | dfTsw.qualifier_ser.str.startswith('B'),'coef'] = coef[0]
#SMR
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('K') | dfTsw.qualifier_ser.str.startswith('M'),'coef'] = coef[1]

#spec threshold
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('9'), 'thresh'] = thresh[0]
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('B'), 'thresh'] = thresh[1]
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('K'), 'thresh'] = thresh[2]
dfTsw.loc[dfTsw.qualifier_ser.str.startswith('M'), 'thresh'] = thresh[3]

#TSW Metric
dfTsw.loc[:,'tsw_metric'] = dfTsw.loc[:,'owvalmaxconv'].values * dfTsw.loc[:,'coef'].values + dfTsw.loc[:,'ser_for_tsw_metric'].values

dfTsw.to_pickle('dfTsw.pkl')
dfTsw.to_csv('dfTsw.csv')

dfTsw

Unnamed: 0,hddsn,enddt,pfcode,mfgid,hddtrial,testcode,qualifier_ow,lhd,phd,band,...,enddt_pbo,pfcode_pbo,qualifier,mRwSerPerRWNmSlope,mRwSerPerRWDacOffset,dfh,ser_for_tsw_metric,coef,thresh,tsw_metric
0,2FA06G3A,20201231,0000,KJ1C03,NPIF,PDQX083M,91H0,0,9,0,...,20201229,0000,4070,325.009700,-225.72420,0.354,-0.657730,0.0457,-1.55370,-1.903969
1,2FA06G3A,20201231,0000,KJ1C03,NPIF,PDQX083M,91H0,0,9,0,...,20201229,0000,3070,481.195530,-226.42197,0.354,-0.602440,0.0457,-1.55370,-1.848679
2,2FA01E4A,20201226,0000,KJ1C03,Z07E,PDQX083M,M0N0,1,10,0,...,20201223,0000,3070,271.092860,-214.22388,0.324,-0.868376,0.0065,-0.68404,-1.084046
3,2FA01E4A,20201226,0000,KJ1C03,Z07E,PDQX083M,M0N0,1,10,0,...,20201223,0000,4070,221.659900,-227.25575,0.324,-0.884393,0.0065,-0.68404,-1.100063
4,2FA01EHA,20201226,0000,KJ1C03,Z07E,PDQX083M,K1H0,0,9,0,...,20201223,0000,4070,112.130325,-220.79355,0.354,-0.744240,0.0065,-0.66903,-0.956530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,2FA06G3A,20210101,0000,KJ1C03,NPIF,PDQX083M,B0N0,0,9,0,...,20201229,0000,3070,481.195530,-226.42197,0.324,-0.663533,0.0457,-1.62860,-1.896519
204,2FA02E8A,20201226,0000,KJ1C03,Z07E,PDQX083M,B0N0,1,10,0,...,20201223,0000,4070,147.887180,-227.82300,0.324,-1.015378,0.0457,-1.62860,-2.605281
205,2FA02E8A,20201226,0000,KJ1C03,Z07E,PDQX083M,B0N0,1,10,0,...,20201223,0000,3070,302.915340,-229.67107,0.324,-0.965149,0.0457,-1.62860,-2.555052
206,2FA02DNA,20201226,0000,KJ1C03,Z07E,PDQX083M,B0N0,1,10,0,...,20201223,0000,4070,194.259890,-221.19695,0.324,-0.914470,0.0457,-1.62860,-2.450904


# RSBN

In [19]:
## RSBN ##

inputParamGhl2Rsbn = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400'],
                      ['1000', '2010']
                     ]
dfRsbn = get_ghl2_table('ccb_ci_crsbn_datareader', inputParamGhl2Rsbn, 
                        'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, phd, lhd, readerindex, datardrs, pridatardr, \
                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, readerindex order by enddate desc) as row_num',
                        'readerindex < 64'
                       )
dfRsbn = pd.read_pickle('df_ccb_ci_crsbn_datareader.pkl')
dfRsbn.loc[:,'datardrs']   = dfRsbn.loc[:,'datardrs'].values.astype(np.int64)
dfRsbn.loc[:,'pridatardr'] = dfRsbn.loc[:,'pridatardr'].values.astype(np.int64)
dfRsbnFlat = pd.pivot_table(dfRsbn.query("readerindex<64 and row_num==1"), 
                            index=['hddsn', 'mfgid', 'testcode', 'lhd', 'phd'], 
                            columns=['qualifier', 'readerindex'], 
                            values=['datardrs', 'pridatardr'], 
                            aggfunc=np.sum).reset_index(drop=False)
cols = [x[0] for x in list(dfRsbnFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfRsbnFlat.columns)[5:]]
dfRsbnFlat.columns = cols
dfRsbnFlat.to_pickle('dfRsbnFlat.pkl')
dfRsbnFlat

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, phd, lhd, readerindex, datardrs, pridatardr,                          row_number() over ( partition by hddsn, testcode, qualifier, lhd, readerindex order by enddate desc) as row_num from ghl2.ccb_ci_crsbn_datareader where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('1000', '2010') and pfcode not in ('9999', '99V6', '99V7') and readerindex < 64 
(3584, 12)


Unnamed: 0,hddsn,mfgid,testcode,lhd,phd,datardrs_1000_0,datardrs_1000_1,datardrs_1000_2,datardrs_1000_3,datardrs_1000_4,...,pridatardr_2010_54,pridatardr_2010_55,pridatardr_2010_56,pridatardr_2010_57,pridatardr_2010_58,pridatardr_2010_59,pridatardr_2010_60,pridatardr_2010_61,pridatardr_2010_62,pridatardr_2010_63
0,2FA00WRA,KJ1C03,PDQX083M,0,9,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
1,2FA00WRA,KJ1C03,PDQX083M,1,10,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
2,2FA01E4A,KJ1C03,PDQX083M,0,9,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
3,2FA01E4A,KJ1C03,PDQX083M,1,10,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
4,2FA01EHA,KJ1C03,PDQX083M,0,9,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
5,2FA01EHA,KJ1C03,PDQX083M,1,10,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
6,2FA02DKA,KJ1C03,PDQX083M,0,9,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
7,2FA02DKA,KJ1C03,PDQX083M,1,10,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
8,2FA02DNA,KJ1C03,PDQX083M,0,9,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0
9,2FA02DNA,KJ1C03,PDQX083M,1,10,3,3,3,3,3,...,0,0,0,0,0,0,0,0,0,0


In [20]:
## MCW ##
inputParamGhl2Mcw = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400'],
                      ['10N0', '2300']
                     ]
dfMcw = get_ghl2_table('ccb_ci_mcw', inputParamGhl2Mcw, 
                        'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, mcw, mcw_smr, \
                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num')
dfMcw = pd.read_pickle('df_ccb_ci_mcw.pkl').query("row_num==1")
dfMcw.loc[:,'mcw_nm']     = dfMcw.loc[:,'mcw'].values     * (1/100*25.4/dtpi_cmr*1e3)
dfMcw.loc[:,'mcw_smr_nm'] = dfMcw.loc[:,'mcw_smr'].values * (1/4096*25.4/dtpi_smr*1e3)
dfMcw.to_pickle('dfMcw.pkl')
dfMcw

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, mcw, mcw_smr,                          row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_mcw where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('10N0', '2300') and pfcode not in ('9999', '99V6', '99V7') 
(280, 12)


Unnamed: 0,hddsn,pfcode,testcode,mfgid,hddtrial,qualifier,band,phd,lhd,mcw,mcw_smr,row_num,mcw_nm,mcw_smr_nm
0,2FA06G3A,0000,PDQX083M,KJ1C03,NPIF,2300,6,10,1,0.000000,5698,1,0.000000,58.694813
1,2FA06H1A,0000,PDQX083M,KJ1C03,NPIF,2300,0,9,0,0.000000,5643,1,0.000000,58.128261
2,2FA02EAA,0000,PDQX083M,KJ1C03,Z07E,2300,4,9,0,0.000000,6951,1,0.000000,71.601903
3,2FA02DNA,0000,PDQX083M,KJ1C03,Z07E,2300,3,9,0,0.000000,6549,1,0.000000,67.460921
4,2FA06H1A,0000,PDQX083M,KJ1C03,NPIF,10N0,2,9,0,112.646484,0,1,57.224414,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275,2FA02EAA,0000,PDQX083M,KJ1C03,Z07E,2300,4,10,1,0.000000,6347,1,0.000000,65.380129
276,2FA02DNA,0000,PDQX083M,KJ1C03,Z07E,10N0,2,10,1,142.919922,0,1,72.603320,0.000000
277,2FA01E4A,0000,PDQX083M,KJ1C03,Z07E,2300,3,10,1,0.000000,6859,1,0.000000,70.654216
278,2FA02EAA,0000,PDQX083M,KJ1C03,Z07E,2300,5,9,0,0.000000,6865,1,0.000000,70.716021


In [21]:
## MRW/MWW ##
inputParamGhl2Mrw = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400'],
                      ['1000', '1010', '2000', '2010']
                     ]
dfMrw = get_ghl2_table('ccb_ci_mrw', inputParamGhl2Mrw, 
                       'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, mrw, mww, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num',
                       'band < 3'
                      )
dfMrw = pd.read_pickle('df_ccb_ci_mrw.pkl').query("row_num==1")
dfMrw.loc[dfMrw.qualifier.str.startswith('1'), 'scale'] = 1/100*25.4/dtpi_cmr*1e3
dfMrw.loc[dfMrw.qualifier.str.startswith('2'), 'scale'] = 1/100*25.4/dtpi_smr*1e3
dfMrw.loc[:,'mrw_nm'] = dfMrw.loc[:,'mrw'].values * dfMrw.loc[:, 'scale'].values
dfMrw.loc[:,'mww_nm'] = dfMrw.loc[:,'mww'].values * dfMrw.loc[:, 'scale'].values
dfMrw.to_pickle('dfMrw.pkl')
dfMrw

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, mrw, mww,                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_mrw where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('1000', '1010', '2000', '2010') and pfcode not in ('9999', '99V6', '99V7') and band < 3 
(336, 12)


Unnamed: 0,hddsn,pfcode,testcode,mfgid,hddtrial,qualifier,band,phd,lhd,mrw,mww,row_num,scale,mrw_nm,mww_nm
0,2FA06H1A,0000,PDQX083M,KJ1C03,NPIF,1010,0,10,1,55.307804,92.681020,1,0.508000,28.096364,47.081958
1,2FA06G3A,0000,PDQX083M,KJ1C03,NPIF,2000,1,10,1,61.486740,111.264400,1,0.421927,25.942910,46.945445
2,2FA02EAA,0000,PDQX083M,KJ1C03,Z07E,1000,1,10,1,54.350300,104.640390,1,0.508000,27.609952,53.157318
3,2FA06EXA,0000,PDQX083M,KJ1C03,NPIF,1010,0,10,1,56.898594,91.303220,1,0.508000,28.904486,46.382036
4,2FA06EXA,0000,PDQX083M,KJ1C03,NPIF,1010,1,10,1,54.601130,86.232880,1,0.508000,27.737374,43.806303
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,2FA02DNA,0000,PDQX083M,KJ1C03,Z07E,1000,1,10,1,50.104477,114.481094,1,0.508000,25.453074,58.156396
332,2FA02EUA,0000,PDQX083M,KJ1C03,Z07E,1010,1,9,0,56.639183,107.259010,1,0.508000,28.772705,54.487577
333,2FA06G5A,0000,PDQX083M,KJ1C03,NPIF,2000,1,9,0,61.759230,109.994650,1,0.421927,26.057881,46.409703
334,2FA06GNA,0000,PDQX083M,KJ1C03,NPIF,1010,2,9,0,50.711740,93.048300,1,0.508000,25.761564,47.268536


In [22]:
## AMP ##
inputParamGhl2Amp = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400', '6600', '6800'],
                      ['1000', '1010', '2000', '2010', 
                       '3000', '3010', '4000', '4010', 
                       '9000', '9010', 'K000', 'K010',
                       'B000', 'B010', 'M000', 'M010' 
                      ]
                     ]
dfAmp = get_ghl2_table('ccb_ci_amp', 
                       inputParamGhl2Amp, 
                       #'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, MRResistance, InputImpedance, SelectReaderGain, CalcedAeGain, AeGainCompensation_ResolutionAVG, AeGainCompensation_Amplitude1AVG, AeGainCompensation_Amplitude2AVG ')
                       'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, AeGainCompensation_ResolutionAVG as res, AeGainCompensation_Amplitude1AVG as amp2t, AeGainCompensation_Amplitude2AVG as amp10t, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num',
                       'band < 3'
                      )
dfAmp = pd.read_pickle('df_ccb_ci_amp.pkl')
dfAmpFlat = pd.pivot_table(dfAmp.query("band<3 and row_num==1"), 
                           index=['hddsn', 'mfgid', 'testcode', 'lhd', 'phd'], 
                           columns=['qualifier', 'band'], 
                           values=['res', 'amp2t', 'amp10t'], 
                           aggfunc=np.sum).reset_index(drop=False)
cols = [x[0] for x in list(dfAmpFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfAmpFlat.columns)[5:]]
dfAmpFlat.columns = cols
dfAmpFlat

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, AeGainCompensation_ResolutionAVG as res, AeGainCompensation_Amplitude1AVG as amp2t, AeGainCompensation_Amplitude2AVG as amp10t,                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_amp where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600', '6800') and qualifier in ('1000', '1010', '2000', '2010', '3000', '3010', '4000', '4010', '9000', '9010', 'K000', 'K010', 'B000', 'B010', 'M000', 'M010') and pfcode not in ('9999', '99V6', '99V7') and band < 3 
(1272, 13)


Unnamed: 0,hddsn,mfgid,testcode,lhd,phd,amp10t_1000_0,amp10t_1000_1,amp10t_1000_2,amp10t_1010_0,amp10t_1010_1,...,res_K000_2,res_K010_0,res_K010_1,res_K010_2,res_M000_0,res_M000_1,res_M000_2,res_M010_0,res_M010_1,res_M010_2
0,2FA00WRA,KJ1C03,PDQX083M,0,9,20.01129,20.740272,20.950719,30.569435,31.429739,...,46.121704,44.726101,37.911039,43.326862,46.834468,39.718455,47.502428,46.06668,38.779093,43.408444
1,2FA00WRA,KJ1C03,PDQX083M,1,10,25.20009,25.857309,24.655482,35.816331,36.955762,...,43.724431,46.132272,39.188333,42.238581,47.785332,41.285601,45.270483,45.923709,40.395768,43.473313
2,2FA01E4A,KJ1C03,PDQX083M,0,9,39.044141,41.29422,40.308202,37.001082,37.648768,...,44.648675,57.536523,43.010221,45.599423,57.844004,42.990452,44.857229,58.901323,43.790418,45.202265
3,2FA01E4A,KJ1C03,PDQX083M,1,10,23.577393,23.896757,24.37766,29.754934,30.291816,...,40.599038,46.728598,43.781748,43.575476,50.704032,41.373377,43.264144,47.656077,41.337828,42.039125
4,2FA01EHA,KJ1C03,PDQX083M,0,9,35.122501,35.713549,36.125898,14.590731,15.467241,...,47.052758,43.430076,37.95903,41.732016,48.762997,43.076186,47.295016,44.173342,38.817748,41.974056
5,2FA01EHA,KJ1C03,PDQX083M,1,10,40.824861,41.798014,40.372285,34.962966,34.383597,...,43.265971,48.85602,42.190053,43.511823,47.279141,41.558542,41.188629,48.445649,42.121875,40.87027
6,2FA02DKA,KJ1C03,PDQX083M,0,9,31.624529,32.3802,30.705867,20.717694,22.349174,...,,,,,,,,,,
7,2FA02DKA,KJ1C03,PDQX083M,1,10,41.285485,41.909585,42.105861,26.134887,26.275317,...,,,,,,,,,,
8,2FA02DNA,KJ1C03,PDQX083M,0,9,33.896105,34.857945,35.386386,30.010589,30.590788,...,46.895321,45.968132,40.422952,45.775785,50.817482,43.474568,46.329027,46.69001,40.11255,45.089957
9,2FA02DNA,KJ1C03,PDQX083M,1,10,40.301497,40.983026,39.426756,21.976225,21.6401,...,46.797104,44.28676,40.531783,42.118894,52.932821,48.234023,46.158332,45.040527,42.011879,41.272557


In [23]:
## Asym ##

inputParamGhl2Asym = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400', '6600', '6800'],
                      ['1000', '1010', '2000', '2010',
                       '3000', '3010', '4000', '4010', 
                       '9000', '9010', 'K000', 'K010',
                       'B000', 'B010', 'M000', 'M010'
                      ]
                     ]
dfAsym = get_ghl2_table('ccb_ci_asm', 
                       inputParamGhl2Asym, 
                       'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, asymmetry, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num',
                       'band < 3'
                       )
"""
dfAsym = pd.read_pickle('df_ccb_ci_asm.pkl')
dfAsymFlat = pd.pivot_table(dfAsym.query("band<3 and row_num==1"), 
                           index=['hddsn', 'mfgid', 'testcode', 'lhd', 'phd'], 
                           columns=['qualifier', 'band'], 
                           values=['asymmetry'], 
                           aggfunc=np.sum).reset_index(drop=False)
cols = [x[0] for x in list(dfAsymFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfAsymFlat.columns)[5:]]
dfAsymFlat.columns = cols
dfAsymFlat
"""

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, asymmetry,                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_asm where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600', '6800') and qualifier in ('1000', '1010', '2000', '2010', '3000', '3010', '4000', '4010', '9000', '9010', 'K000', 'K010', 'B000', 'B010', 'M000', 'M010') and pfcode not in ('9999', '99V6', '99V7') and band < 3 
(1272, 11)


'\ndfAsym = pd.read_pickle(\'df_ccb_ci_asm.pkl\')\ndfAsymFlat = pd.pivot_table(dfAsym.query("band<3 and row_num==1"), \n                           index=[\'hddsn\', \'mfgid\', \'testcode\', \'lhd\', \'phd\'], \n                           columns=[\'qualifier\', \'band\'], \n                           values=[\'asymmetry\'], \n                           aggfunc=np.sum).reset_index(drop=False)\ncols = [x[0] for x in list(dfAsymFlat.columns)[:5]]+[str(x[0])+\'_\'+str(x[1])+\'_\'+str(x[2]) for x in list(dfAsymFlat.columns)[5:]]\ndfAsymFlat.columns = cols\ndfAsymFlat\n'

In [24]:
## RWIP ##

inputParamGhl2Rwip = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400', '6600', '6800'],
                      ['1000', '1010', '2000', '2010', 
                       '3000', '3010', '4000', '4010', 
                       '9000', '9010', 'K000', 'K010',
                       'B000', 'B010', 'M000', 'M010'
                      ]
                     ]
dfRwip = get_ghl2_table('ccb_ci_rwip', 
                        inputParamGhl2Rwip, 
                       'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, snrtotal, snrsystem, snrmedia, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num',
                        'band < 3'
                       )
"""
dfRwip = pd.read_pickle('df_ccb_ci_rwip.pkl').astype({'snrtotal': 'float64', 'snrsystem': 'float64', 'snrmedia': 'float64'})
dfRwipFlat = pd.pivot_table(dfRwip.query("band<3 and row_num==1"), 
                           index=['hddsn', 'mfgid', 'testcode', 'lhd', 'phd'], 
                           columns=['qualifier', 'band'], 
                           values=['snrtotal', 'snrsystem', 'snrmedia'], 
                           aggfunc=np.sum).reset_index(drop=False)
cols = [x[0] for x in list(dfRwipFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfRwipFlat.columns)[5:]]
dfRwipFlat.columns = cols
dfRwipFlat
"""

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, band, phd, lhd, snrtotal, snrsystem, snrmedia,                         row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_rwip where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600', '6800') and qualifier in ('1000', '1010', '2000', '2010', '3000', '3010', '4000', '4010', '9000', '9010', 'K000', 'K010', 'B000', 'B010', 'M000', 'M010') and pfcode not in ('9999', '99V6', '99V7') and band < 3 
(1272, 13)


'\ndfRwip = pd.read_pickle(\'df_ccb_ci_rwip.pkl\').astype({\'snrtotal\': \'float64\', \'snrsystem\': \'float64\', \'snrmedia\': \'float64\'})\ndfRwipFlat = pd.pivot_table(dfRwip.query("band<3 and row_num==1"), \n                           index=[\'hddsn\', \'mfgid\', \'testcode\', \'lhd\', \'phd\'], \n                           columns=[\'qualifier\', \'band\'], \n                           values=[\'snrtotal\', \'snrsystem\', \'snrmedia\'], \n                           aggfunc=np.sum).reset_index(drop=False)\ncols = [x[0] for x in list(dfRwipFlat.columns)[:5]]+[str(x[0])+\'_\'+str(x[1])+\'_\'+str(x[2]) for x in list(dfRwipFlat.columns)[5:]]\ndfRwipFlat.columns = cols\ndfRwipFlat\n'

# AIw

In [1]:
#AIw
inputParamGhl2AIw = [snlist_csv, 
                      listTestcode,
                      listEnddt, 
                      listHddTrial,
                      ['6400'],
                      ['10N0', '11N0', '30N0', '20N0', '40N0']
                     ]
dfAIw = get_ghl2_table('ccb_ci_aiw_summary', 
                        inputParamGhl2AIw, 
                       'hddsn, enddate, pfcode, testcode, mfgid, mfgid_5, hddtrial, qualifier, lhd, phd, readerselection, \
                        cmdexecutiontimeinmilliseconds as time, band, cylinder, meastemp, \
                        writecurrent as iw, kickamplitude as ka, kickduration as kd, bestser, readoffset, enddt, \
                        row_number() over ( partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num'
                       )
dfAIw.loc[:,'mfgid_5'] = dfAIw['mfgid_5'].values.astype('int')
#Gen4
dfAIw.loc[dfAIw['mfgid_5'] < 4,'coef_a'] = -0.013954886
dfAIw.loc[dfAIw['mfgid_5'] < 4,'coef_b'] = 1.128161219
dfAIw.loc[dfAIw['mfgid_5'] < 4,'coef_c'] = 1.973010793
dfAIw.loc[dfAIw['mfgid_5'] < 4,'coef_d'] = -1.407847328
#Gen4i A1
dfAIw.loc[dfAIw['mfgid_5'] >= 4,'coef_a'] = -0.015470889
dfAIw.loc[dfAIw['mfgid_5'] >= 4,'coef_b'] = 1.684161818
dfAIw.loc[dfAIw['mfgid_5'] >= 4,'coef_c'] = 1.335027916
dfAIw.loc[dfAIw['mfgid_5'] >= 4,'coef_d'] = 0.387637386

dfAIw.loc[:,'ipk'] = ((dfAIw.loc[:,'coef_a'].values * dfAIw.loc[:,'ka'].values + dfAIw.loc[:,'coef_b'].values) * dfAIw.loc[:,'iw'].values +
                      (dfAIw.loc[:,'coef_c'].values * dfAIw.loc[:,'ka'].values + dfAIw.loc[:,'coef_d'].values)
                     )
dfAIw.to_pickle('dfAIw.pkl')
dfAIw

NameError: name 'snlist_csv' is not defined

# ATI

In [26]:
#DATI Nmax
inputParamGhl2Dati = [snlist_csv, 
                     listTestcode,
                     listEnddt, 
                     listHddTrial,
                     ['6400', '6600'],
                     ['3000', '9000', '4360', 'L360']
                    ]
dfNmaxTemp = get_ghl2_table('ccb_ci_ati_prim_bybandwrnum', 
                            inputParamGhl2Dati, 
                            'hddsn, mfgid, hddtrial, testcode, qualifier, procid, lhd, phd, band, nmax50byband, \
                             row_number() over (partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num')
dfNmaxTemp = pd.read_pickle('df_ccb_ci_ati_prim_bybandwrnum.pkl')
dfNmaxTemp.loc[:,'nmax50byband'] = dfNmaxTemp.loc[:,'nmax50byband'].values.astype(np.int64)
dfNmaxTemp.to_pickle('dfNmaxTemp.pkl')
dfNmaxTemp

current sql is select hddsn, mfgid, hddtrial, testcode, qualifier, procid, lhd, phd, band, nmax50byband,                              row_number() over (partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_ati_prim_bybandwrnum where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600') and qualifier in ('3000', '9000', '4360', 'L360') and pfcode not in ('9999', '99V6', '99V7') 
(3328, 11)


Unnamed: 0,hddsn,mfgid,hddtrial,testcode,qualifier,procid,lhd,phd,band,nmax50byband,row_num
0,2FA00WRA,KJ1C03,Z07E,PDQX083M,3000,6400,0,9,56,304,1
1,2FA00WRA,KJ1C03,Z07E,PDQX083M,9000,6600,0,9,1,161,1
2,2FA02DNA,KJ1C03,Z07E,PDQX083M,3000,6400,0,9,32,377,1
3,2FA06GNA,KJ1C03,NPIF,PDQX083M,3000,6400,0,9,44,453,1
4,2FA06H1A,KJ1C03,NPIF,PDQX083M,3000,6400,0,9,38,407,1
...,...,...,...,...,...,...,...,...,...,...,...
3323,2FA06H4A,KJ1C03,NPIF,PDQX083M,9000,6600,0,9,37,373,1
3324,2FA00WRA,KJ1C03,Z07E,PDQX083M,3000,6400,0,9,28,481,1
3325,2FA00WRA,KJ1C03,Z07E,PDQX083M,9000,6600,1,10,43,247,1
3326,2FA01E4A,KJ1C03,Z07E,PDQX083M,9000,6600,0,9,41,203,1


In [27]:
#DATI RV
inputParamGhl2AtiRv = [snlist_csv, 
                       listTestcode,
                       listEnddt, 
                       listHddTrial,
                       ['6400', '6600'],
                       ['35N0', '3000', '95H0', '9000', '4360', 'L360']
                      ]

dfAtiRv = get_ghl2_table('ccb_ci_dati_theta', 
                         inputParamGhl2AtiRv, 
                         'hddsn, enddate, pfcode, testcode, testcodec, mfgid, hddtrial, cmdname, qualifier, lhd, phd, subqualifier, readerselection, tempzone, zone, \
                          maxrvsumheadalltemp, avgrvsumheadalltemp, avgrvsumdrivealltemp, avgrvsumzonealltemp, rvsum, spaidentifier, rvsumminushalf4smr, rvsumplushalf4smr, \
                          worstavgrvsumzonewithmaxnlxmulti, worstzonewithmaxnlxmulti, worstavgrvsumzonewithminnlxmulti, worstzonewithminnlxmulti, worstavgrvsumzonewithavenlxmulti, \
                          worstzonewithavenlxmulti, product, procid, enddt, \
                          row_number() over ( partition by hddsn, testcode, qualifier, lhd, tempzone, zone order by enddate desc) as row_num')
#dfAtiRv = pd.read_pickle('df_ccb_ci_dati_theta.pkl')
dfAtiRv

current sql is select hddsn, enddate, pfcode, testcode, testcodec, mfgid, hddtrial, cmdname, qualifier, lhd, phd, subqualifier, readerselection, tempzone, zone,                           maxrvsumheadalltemp, avgrvsumheadalltemp, avgrvsumdrivealltemp, avgrvsumzonealltemp, rvsum, spaidentifier, rvsumminushalf4smr, rvsumplushalf4smr,                           worstavgrvsumzonewithmaxnlxmulti, worstzonewithmaxnlxmulti, worstavgrvsumzonewithminnlxmulti, worstzonewithminnlxmulti, worstavgrvsumzonewithavenlxmulti,                           worstzonewithavenlxmulti, product, procid, enddt,                           row_number() over ( partition by hddsn, testcode, qualifier, lhd, tempzone, zone order by enddate desc) as row_num from ghl2.ccb_ci_dati_theta where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2F

Unnamed: 0,hddsn,enddate,pfcode,testcode,testcodec,mfgid,hddtrial,cmdname,qualifier,lhd,...,worstavgrvsumzonewithmaxnlxmulti,worstzonewithmaxnlxmulti,worstavgrvsumzonewithminnlxmulti,worstzonewithminnlxmulti,worstavgrvsumzonewithavenlxmulti,worstzonewithavenlxmulti,product,procid,enddt,row_num
0,2FA01EHA,20201223225255,0000,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,3000,0,...,0,0,0,0,0,0,pdq,6400,20201223,1
1,2FA02DNA,20201223224858,0000,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,35N0,0,...,0,255,0,255,0,255,pdq,6400,20201223,1
2,2FA02EUA,20201223231019,0000,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,3000,0,...,0,0,0,0,0,0,pdq,6400,20201223,1
3,2FA06G3A,20201229143527,0000,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,3000,1,...,0,0,0,0,0,0,pdq,6400,20201229,1
4,2FA06G3A,20201231175539,0000,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9000,1,...,0,0,0,0,0,0,pdq,6600,20201231,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14347,2FA06G3A,20201229143527,0000,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,3000,1,...,0,0,0,0,0,0,pdq,6400,20201229,1
14348,2FA06G3A,20201229143527,0000,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,35N0,1,...,0,255,180,0,0,255,pdq,6400,20201229,1
14349,2FA06G5A,20201231175333,0000,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9000,0,...,0,0,0,0,0,0,pdq,6600,20201231,1
14350,2FA06G5A,20201231175333,0000,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9000,0,...,0,0,0,0,0,0,pdq,6600,20201231,1


In [14]:
#JINX RV
inputParamGhl2AtiRv = [snlist_csv, 
                       listTestcode,
                       listEnddt, 
                       listHddTrial,
                       ['6600'],
                       ['9050']
                      ]

dfJinxRv = get_ghl2_table('ccb_ci_ati_mcsb_ns', 
                         inputParamGhl2AtiRv, 
                         'hddsn, enddate, pfcode, testcode, testcodec, mfgid, hddtrial, cmdname, qualifier, lhd, phd, maxrvcheck4nonlinearxcor_0, maxrvcheck4nonlinearxcor_1, \
                          row_number() over ( partition by hddsn, testcode, qualifier, lhd order by enddate desc) as row_num')
dfJinxRv

current sql is select hddsn, enddate, pfcode, testcode, testcodec, mfgid, hddtrial, cmdname, qualifier, lhd, phd, maxrvcheck4nonlinearxcor_0, maxrvcheck4nonlinearxcor_1,                           row_number() over ( partition by hddsn, testcode, qualifier, lhd order by enddate desc) as row_num from ghl2.ccb_ci_ati_mcsb_ns where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6600') and qualifier in ('9050') and pfcode not in ('9999', '99V6', '99V7') 
(26, 14)


Unnamed: 0,hddsn,enddate,pfcode,testcode,testcodec,mfgid,hddtrial,cmdname,qualifier,lhd,phd,maxrvcheck4nonlinearxcor_0,maxrvcheck4nonlinearxcor_1,row_num
0,2FA06H1A,20201231173842,0,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9050,1,10,2078,15000,1
1,2FA02DNA,20201226005006,0,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,9050,0,9,1724,15000,1
2,2FA06H1A,20201231173842,0,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9050,0,9,1658,15000,1
3,2FA06G5A,20201231175333,0,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9050,0,9,2144,15000,1
4,2FA06G3A,20201231175539,0,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9050,1,10,1652,15000,1
5,2FA06H4A,20201231161928,0,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9050,1,10,1272,15000,1
6,2FA06GNA,20201231170633,0,PDQX083M,PDQX083,KJ1C03,NPIF,CAti,9050,0,9,1719,15000,1
7,2FA01E4A,20201226001819,0,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,9050,1,10,1674,15000,1
8,2FA02DNA,20201226005006,0,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,9050,1,10,2740,15000,1
9,2FA01E4A,20201226001819,0,PDQX083M,PDQX083,KJ1C03,Z07E,CAti,9050,0,9,2388,15000,1


# ASB (SMR)

In [28]:
#CASB
inputParamGhl2Asb = [snlist_csv, 
                     listTestcode,
                     listEnddt, 
                     listHddTrial,
                     ['6400'],
                     ['2300']
                    ]
dfAsb = get_ghl2_table('ccb_ci_casb', inputParamGhl2Asb, 
                       'hddsn, lhd, phd, mfgid, testcode, maxbandod2id, \
                        row_number() over (partition by hddsn, testcode, lhd, band order by enddate desc) as row_num', 
                       'band=0')
dfAsb = pd.read_pickle('df_ccb_ci_casb.pkl').query("row_num==1")
dfAsb

current sql is select hddsn, lhd, phd, mfgid, testcode, maxbandod2id,                         row_number() over (partition by hddsn, testcode, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_casb where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('2300') and pfcode not in ('9999', '99V6', '99V7') and band=0 
(28, 7)


Unnamed: 0,hddsn,lhd,phd,mfgid,testcode,maxbandod2id,row_num
0,2FA02E8A,1,10,KJ1C03,PDQX083M,23,1
1,2FA06GNA,1,10,KJ1C03,PDQX083M,31,1
2,2FA02EAA,1,10,KJ1C03,PDQX083M,31,1
3,2FA06G5A,1,10,KJ1C03,PDQX083M,27,1
4,2FA01E4A,1,10,KJ1C03,PDQX083M,34,1
5,2FA06G3A,0,9,KJ1C03,PDQX083M,47,1
6,2FA00WRA,0,9,KJ1C03,PDQX083M,32,1
7,2FA02EUA,0,9,KJ1C03,PDQX083M,37,1
8,2FA02DNA,0,9,KJ1C03,PDQX083M,51,1
9,2FA02DNA,1,10,KJ1C03,PDQX083M,21,1


# UFO

In [29]:
# UFO
inputParamGhl2Ufo = [snlist_csv, 
                     listTestcode,
                     listEnddt, 
                     listHddTrial,
                     ['6400'],
                     ['1000', '2000']
                    ]
dfUfo = get_ghl2_table('ccb_ci_ufo', inputParamGhl2Ufo, 
                       'hddsnenddate, hddsn, enddate, drivemodel, pfcode, testcode, mfgid, hddtrial, aetype, hdctype, cmdname, \
                       qualifier, readerselection, cmdexecutiontimeinmilliseconds, mcsbarraymaxnumheads, mcsbarraymaxnumbands, \
                       mcsbarrayodmdidbands, logicalheadtestcompletemap, physicalheadfailcriteriamap, lastlogicalheadtested, lastbandtested, \
                       lasttracktested, lastsubcmdtested, finalusedservotrack, finalstrokepctx100, finalstrokepercent, finaldefectupliftpctx100, \
                       finalmediacacheupliftpctx100, finaltrackskewadjustment, finaltpirelaxationpercentx100, finaltpirelaxationpercent, \
                       finalwaterfallselection, finalblocksizemap, finalfitmetricvalue, finalmaxlogicalheadnum, finalmodelnumber, finalphysicalheadmap, \
                       finalsustainedmbytepersec, totaldrivelbarequired, finaldrivetotallba, calcsteps, accdrivetotallba, accdrivetotallbanoati, \
                       maxchanfreqmhz, accdrivetotallbaformat, finalseqperfvarpctx100, accdefaultupliftcalcreqcap, product, procid, enddt, \
                       row_number() over (partition by hddsn, testcode, qualifier order by enddate desc) as row_num')
#dfUfo.loc[dfUfo.qualifier.str.startswith('1'), 'targetLba'] = tgtLbaCmr
#dfUfo.loc[dfUfo.qualifier.str.startswith('2'), 'targetLba'] = tgtLbaSmr
#dfUfo.loc[:,'iacc_pct'] = dfUfo.loc[:,'iacc_pct'].values / tgtLbaCmr
dfUfo = pd.read_pickle('df_ccb_ci_ufo.pkl')

current sql is select hddsnenddate, hddsn, enddate, drivemodel, pfcode, testcode, mfgid, hddtrial, aetype, hdctype, cmdname,                        qualifier, readerselection, cmdexecutiontimeinmilliseconds, mcsbarraymaxnumheads, mcsbarraymaxnumbands,                        mcsbarrayodmdidbands, logicalheadtestcompletemap, physicalheadfailcriteriamap, lastlogicalheadtested, lastbandtested,                        lasttracktested, lastsubcmdtested, finalusedservotrack, finalstrokepctx100, finalstrokepercent, finaldefectupliftpctx100,                        finalmediacacheupliftpctx100, finaltrackskewadjustment, finaltpirelaxationpercentx100, finaltpirelaxationpercent,                        finalwaterfallselection, finalblocksizemap, finalfitmetricvalue, finalmaxlogicalheadnum, finalmodelnumber, finalphysicalheadmap,                        finalsustainedmbytepersec, totaldrivelbarequired, finaldrivetotallba, calcsteps, accdrivetotallba, accdrivetotallbanoati,                        maxch

In [30]:
# UFO hd array
inputParamGhl2UfoHdArry = [snlist_csv, 
                           listTestcode,
                           listEnddt, 
                           listHddTrial,
                           ['6400'],
                           ['1000', '2000']
                    ]
dfUfoHdArry = get_ghl2_table('ccb_ci_ufo_hd_array', inputParamGhl2UfoHdArry, 
                             'hddsn, enddate, pfcode, testcode, mfgid, hddtrial, qualifier, lhd, readerselection, \
                             finalheadtotallba, accheadtotallba, accmetricvalue, accheadtotallbanoati, \
                             accheadtotallbaformat, \
                             row_number() over (partition by hddsn, lhd, testcode, qualifier order by enddate desc) as row_num',
                            )
dfUfoHdArry.loc[dfUfoHdArry.qualifier.str.startswith('1'), 'targetLba'] = tgtLbaCmr
dfUfoHdArry.loc[dfUfoHdArry.qualifier.str.startswith('2'), 'targetLba'] = tgtLbaSmr
dfUfoHdArry.loc[:,'iacc_pct'] = 100*dfUfoHdArry.loc[:,'accheadtotallbanoati'].values  / dfUfoHdArry.loc[:, 'targetLba'].values
dfUfoHdArry.loc[:,'pacc_pct'] = 100*dfUfoHdArry.loc[:,'accheadtotallbaformat'].values / dfUfoHdArry.loc[:, 'targetLba'].values
dfUfoHdArry.to_pickle('dfUfoHdArry.pkl')
dfUfoHdArry

current sql is select hddsn, enddate, pfcode, testcode, mfgid, hddtrial, qualifier, lhd, readerselection,                              finalheadtotallba, accheadtotallba, accmetricvalue, accheadtotallbanoati,                              accheadtotallbaformat,                              row_number() over (partition by hddsn, lhd, testcode, qualifier order by enddate desc) as row_num from ghl2.ccb_ci_ufo_hd_array where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('1000', '2000') and pfcode not in ('9999', '99V6', '99V7') 
(56, 15)


Unnamed: 0,hddsn,enddate,pfcode,testcode,mfgid,hddtrial,qualifier,lhd,readerselection,finalheadtotallba,accheadtotallba,accmetricvalue,accheadtotallbanoati,accheadtotallbaformat,row_num,targetLba,iacc_pct,pacc_pct
0,2FA01E4A,20201223225806,0000,PDQX083M,KJ1C03,Z07E,2000,0,3951,267858273,,,278491976,278491976,1,333662709.0,83.465119,83.465119
1,2FA06EXA,20201229141813,0000,PDQX083M,KJ1C03,NPIF,1000,1,4079,242078443,,,251773400,251649405,1,273906721.0,91.919395,91.874126
2,2FA02DKA,20201223104205,6SAT,PDQX083M,KJ1C03,Z07E,2000,0,3951,275461728,,,286158107,286158107,1,333662709.0,85.762688,85.762688
3,2FA02DNA,20201223224858,0000,PDQX083M,KJ1C03,Z07E,1000,0,3951,240051867,,,250321994,248961665,1,273906721.0,91.389504,90.892865
4,2FA02EAA,20201223223927,0000,PDQX083M,KJ1C03,Z07E,2000,0,3951,279880003,,,290631492,290631492,1,333662709.0,87.103378,87.103378
5,2FA00WRA,20201223220407,0000,PDQX083M,KJ1C03,Z07E,2000,1,3951,278558564,,,288904727,288904727,1,333662709.0,86.58586,86.58586
6,2FA00WRA,20201223220407,0000,PDQX083M,KJ1C03,Z07E,1000,0,4079,245470719,,,254384313,252810374,1,273906721.0,92.872607,92.297981
7,2FA02E8A,20201223223939,0000,PDQX083M,KJ1C03,Z07E,1000,1,3951,233789486,,,246438788,241675766,1,273906721.0,89.971793,88.232872
8,2FA01EHA,20201223225255,0000,PDQX083M,KJ1C03,Z07E,2000,0,3951,279827033,,,288744137,288744137,1,333662709.0,86.537731,86.537731
9,2FA01E4A,20201223225806,0000,PDQX083M,KJ1C03,Z07E,1000,1,4079,234932816,,,245784406,242022474,1,273906721.0,89.732886,88.359451


In [30]:
pd.pivot_table(dfUfoHdArry.query("qualifier=='1000'"), index=['hddsn', 'mfgid', 'lhd'], columns=['testcode'], values=['pacc_pct'], 
               aggfunc=np.mean)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pacc_pct
Unnamed: 0_level_1,Unnamed: 1_level_1,testcode,PDQX083M
hddsn,mfgid,lhd,Unnamed: 3_level_2
2FA01DHA,KJ1C01,0,83.170553
2FA01DHA,KJ1C01,1,84.05975
2FA01E1A,KJ1C01,0,82.908505
2FA01E1A,KJ1C01,1,84.368206
2FA01E9A,KJ1C01,0,83.037982
2FA01E9A,KJ1C01,1,84.274434
2FA02EGA,KJ1C01,0,90.242004
2FA02EGA,KJ1C01,1,92.146839
2FA02ELA,KJ1C01,0,90.461266
2FA02ELA,KJ1C01,1,88.53991


In [31]:
# UFO SER, TPC
inputParamGhl2UfoSubCmd1 = [snlist_csv, 
                            listTestcode,
                            listEnddt, 
                            listHddTrial,
                            ['6400'],
                            ['1000', '2000']
                           ]
dfUfoSubCmd1 = get_ghl2_table('ccb_ci_ufo_scmd1', inputParamGhl2UfoSubCmd1, 
                              '* ')
dfUfoSubCmd1 = pd.read_pickle('df_ccb_ci_ufo_scmd1.pkl')

current sql is select *  from ghl2.ccb_ci_ufo_scmd1 where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('1000', '2000') and pfcode not in ('9999', '99V6', '99V7') 
(7699, 62)


In [32]:
# UFO Sqz Margin, otrc, track pitch
inputParamGhl2UfoSubCmd7 = [snlist_csv, 
                            listTestcode,
                            listEnddt, 
                            listHddTrial,
                            ['6400', '6600'],
                            ['3000', '9000', '4000', 'K000']
                           ]
dfUfoSubCmd7 = get_ghl2_table('ccb_ci_ufo_scmd7', 
                              inputParamGhl2UfoSubCmd7, 
                              'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, lhd, test_band, tpmtpcdbl, tpmtpcod, tpmtpcid, otrc, finkfci, finktpi, defaultrate, procid, enddt, \
                               row_number() over (partition by hddsn, testcode, qualifier, lhd, test_band order by enddate desc) as row_num')
#dfUfoSubCmd7 = pd.read_pickle('df_ccb_ci_ufo_scmd7.pkl')

dfUfoSubCmd7.query("qualifier in ['3000', '9000', '4000', 'K000']", inplace=True)
dfUfoSubCmd7.loc[:,'tp_nm']  = (dfUfoSubCmd7.loc[:,'defaultrate'].values / 4096.0 * 25.4/stpi * 1e3).astype(np.float64)
if dfUfoSubCmd7.query("qualifier in ['3000', '9000']").shape[0] > 0:
    dfUfoSubCmd7.loc[(dfUfoSubCmd7['qualifier']=='3000') | (dfUfoSubCmd7['qualifier']=='9000'),'dtpi'] = dtpi_cmr
if dfUfoSubCmd7.query("qualifier in ['4000', 'K000']").shape[0] > 0:    
    dfUfoSubCmd7.loc[(dfUfoSubCmd7['qualifier']=='4000') | (dfUfoSubCmd7['qualifier']=='K000'),'dtpi'] = dtpi_smr

dfUfoSubCmd7.loc[:,'otrc'] = (dfUfoSubCmd7.loc[:,'otrc'].values * 25.4/dfUfoSubCmd7.loc[:,'dtpi'].values).astype(np.float64)     
dfUfoSubCmd7.loc[:,'tpf_dbl'] = (dfUfoSubCmd7.loc[:,'tpmtpcdbl'].values / 10000.0 * 25.4/dfUfoSubCmd7.loc[:,'dtpi'].values * 1e3).astype(np.float64)
dfUfoSubCmd7.loc[:,'tpf_od'] = (dfUfoSubCmd7.loc[:,'tpmtpcod'].values / 10000.0 * 25.4/dfUfoSubCmd7.loc[:,'dtpi'].values * 1e3).astype(np.float64)
dfUfoSubCmd7.loc[:,'tpf_id'] = (dfUfoSubCmd7.loc[:,'tpmtpcid'].values / 10000.0 * 25.4/dfUfoSubCmd7.loc[:,'dtpi'].values * 1e3).astype(np.float64)

#valid calc for only SMR
dfUfoSubCmd7.loc[:,'tpf_smr'] = (dfUfoSubCmd7.loc[:,'tpf_od'].values + dfUfoSubCmd7.loc[:,'tpf_id']).astype(np.float64)
dfUfoSubCmd7.loc[:,'sqm_smr'] = (dfUfoSubCmd7.loc[:,'tp_nm'].values - dfUfoSubCmd7.loc[:,'tpf_smr'].values).astype(np.float64)                            

#valid calc for only CMR
dfUfoSubCmd7.loc[:,'sqm_dbl'] = (dfUfoSubCmd7.loc[:,'tp_nm'].values - dfUfoSubCmd7.loc[:,'tpf_dbl'].values).astype(np.float64)
dfUfoSubCmd7.loc[:,'sqm_od']  = (dfUfoSubCmd7.loc[:,'tp_nm'].values - dfUfoSubCmd7.loc[:,'tpf_od'].values).astype(np.float64)
dfUfoSubCmd7.loc[:,'sqm_id']  = (dfUfoSubCmd7.loc[:,'tp_nm'].values - dfUfoSubCmd7.loc[:,'tpf_id'].values).astype(np.float64)

dfUfoSubCmd7.to_pickle('dfUfoSubCmd7.pkl')

"""
dfSqmFlat = pd.pivot_table(dfUfoSubCmd7.query("row_num==1"), 
                               index=['hddsn', 'mfgid', 'hddtrial', 'testcode', 'lhd'], 
                               columns=['qualifier', 'test_band'], 
                               values=['tp_nm', 'tpf_dbl', 'tpf_od', 'tpf_id', 'tpf_smr', 'sqm_dbl', 'sqm_od', 'sqm_id', 'sqm_smr', 'otrc', 'finkfci', 'finktpi'], 
                               aggfunc=np.sum).reset_index(drop=False)
        
cols = [x[0] for x in list(dfSqmFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfSqmFlat.columns)[5:]]
dfSqmFlat.columns = cols
del dfUfoSubCmd7, cols
print('dfSqmFlat')
"""
dfUfoSubCmd7

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, lhd, test_band, tpmtpcdbl, tpmtpcod, tpmtpcid, otrc, finkfci, finktpi, defaultrate, procid, enddt,                                row_number() over (partition by hddsn, testcode, qualifier, lhd, test_band order by enddate desc) as row_num from ghl2.ccb_ci_ufo_scmd7 where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400', '6600') and qualifier in ('3000', '9000', '4000', 'K000') and pfcode not in ('9999', '99V6', '99V7') 
(6656, 18)


Unnamed: 0,hddsn,pfcode,testcode,mfgid,hddtrial,qualifier,lhd,test_band,tpmtpcdbl,tpmtpcod,...,tp_nm,dtpi,tpf_dbl,tpf_od,tpf_id,tpf_smr,sqm_smr,sqm_dbl,sqm_od,sqm_id
0,2FA02EUA,0000,PDQX083M,KJ1C03,Z07E,9000,0,16,10103,8007,...,57.435682,500.0,51.32324,40.675560,37.891720,78.567280,-21.131598,6.112442,16.760122,19.543962
1,2FA00WRA,0000,PDQX083M,KJ1C03,Z07E,3000,1,18,8950,7342,...,52.731344,500.0,45.46600,37.297360,34.985960,72.283320,-19.551976,7.265344,15.433984,17.745384
2,2FA06EXA,0000,PDQX083M,KJ1C03,NPIF,K000,1,60,0,7380,...,42.082435,602.0,0.00000,31.138206,0.000000,31.138206,10.944229,42.082435,10.944229,42.082435
3,2FA01E4A,0000,PDQX083M,KJ1C03,Z07E,3000,1,56,10456,7636,...,59.873378,500.0,53.11648,38.790880,40.502840,79.293720,-19.420342,6.756898,21.082498,19.370538
4,2FA01EHA,0000,PDQX083M,KJ1C03,Z07E,3000,0,54,9951,7426,...,57.606748,500.0,50.55108,37.724080,39.263320,76.987400,-19.380652,7.055668,19.882668,18.343428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6651,2FA06G5A,0000,PDQX083M,KJ1C03,NPIF,4000,0,16,0,0,...,43.579270,602.0,0.00000,0.000000,32.973588,32.973588,10.605682,43.579270,43.579270,10.605682
6652,2FA06H4A,0000,PDQX083M,KJ1C03,NPIF,3000,0,35,9354,7434,...,54.442012,500.0,47.51832,37.764720,37.465000,75.229720,-20.787708,6.923692,16.677292,16.977012
6653,2FA01E4A,0000,PDQX083M,KJ1C03,Z07E,4000,0,5,0,0,...,42.082435,602.0,0.00000,0.000000,30.707841,30.707841,11.374595,42.082435,42.082435,11.374595
6654,2FA06G5A,0000,PDQX083M,KJ1C03,NPIF,9000,1,11,8891,7510,...,52.047077,500.0,45.16628,38.150800,36.398200,74.549000,-22.501923,6.880797,13.896277,15.648877


In [33]:
# UFO ADC, BPI, TPI
inputParamGhl2UfoHdBandArray = [snlist_csv, 
                                listTestcode,
                                listEnddt, 
                                listHddTrial,
                                ['6400'],
                                ['1000', '2000']
                               ]
dfUfoHdBandArray = get_ghl2_table('ccb_ci_ufo_hd_band_array', inputParamGhl2UfoHdBandArray, 
                                  'hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, lhd, band, adc, adckbpi, adcktpi, \
                                   row_number() over (partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num')
dfUfoHdBandArray = pd.read_pickle('df_ccb_ci_ufo_hd_band_array.pkl')
dfUfoHdBandArray.loc[:,'adc']     = dfUfoHdBandArray.loc[:,'adc'].values.astype(np.float64)
dfUfoHdBandArray.loc[:,'adckbpi'] = dfUfoHdBandArray.loc[:,'adckbpi'].values.astype(np.float64)
dfUfoHdBandArray.loc[:,'adcktpi'] = dfUfoHdBandArray.loc[:,'adcktpi'].values.astype(np.float64)
dfUfoHdBandArray.to_pickle('dfUfoHdBandArray.pkl')

"""
dfAdcFlat = pd.pivot_table(dfUfoHdBandArray.query("band<7 and row_num==1"), 
                          index=['hddsn', 'mfgid', 'hddtrial', 'testcode', 'lhd'], 
                          columns=['qualifier', 'band'], 
                          values=['adc', 'adckbpi', 'adcktpi'], 
                          aggfunc=np.sum).reset_index(drop=False)
cols = [x[0] for x in list(dfAdcFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfAdcFlat.columns)[5:]]
dfAdcFlat.columns = cols
del dfUfoHdBandArray, cols
print('dfAdcFlat')
"""
dfUfoHdBandArray

current sql is select hddsn, pfcode, testcode, mfgid, hddtrial, qualifier, lhd, band, adc, adckbpi, adcktpi,                                    row_number() over (partition by hddsn, testcode, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_ufo_hd_band_array where product='pdq' and testcode in ('PDQX083M') and enddt between '20201223' and '20210101' and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') and qualifier in ('1000', '2000') and pfcode not in ('9999', '99V6', '99V7') 
(349, 12)


Unnamed: 0,hddsn,pfcode,testcode,mfgid,hddtrial,qualifier,lhd,band,adc,adckbpi,adcktpi,row_num
0,2FA06EXA,0000,PDQX083M,KJ1C03,NPIF,1000,0,6,1034.0,2155.0,480.0,1
1,2FA06G3A,0000,PDQX083M,KJ1C03,NPIF,1000,0,3,978.0,2100.0,466.0,1
2,2FA01EHA,0000,PDQX083M,KJ1C03,Z07E,1000,1,6,1055.0,2582.0,408.0,1
3,2FA01EHA,0000,PDQX083M,KJ1C03,Z07E,1000,1,5,1041.0,2494.0,417.0,1
4,2FA01E4A,0000,PDQX083M,KJ1C03,Z07E,1000,1,3,924.0,2365.0,391.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
344,2FA06H1A,0000,PDQX083M,KJ1C03,NPIF,2000,0,3,1028.0,2027.0,507.0,1
345,2FA06G3A,0000,PDQX083M,KJ1C03,NPIF,1000,0,0,849.0,1834.0,463.0,1
346,2FA06GNA,0000,PDQX083M,KJ1C03,NPIF,1000,1,4,1043.0,2275.0,458.0,1
347,2FA02EUA,0000,PDQX083M,KJ1C03,Z07E,2000,0,3,1144.0,2184.0,524.0,1


# RWDC

In [81]:
#rwdc
inputParamGhl2Dati = [snlist_csv, 
                     listTestcode,
                     listEnddt, 
                     listHddTrial,
                     ['6600'],
                     ['60H0', 'G0H0']
                    ]
dfRwdc = get_ghl2_table('ccb_ci_rwdc', inputParamGhl2Dati, 
                        'hddsn, lhd, qualifier, phd, testcode, mfgid, hddtrial, band, rwdcmin, rwdcmax, \
                         row_number() over (partition by hddsn, qualifier, lhd, band order by enddate desc) as row_num')
dfRwdc = pd.read_pickle('df_ccb_ci_rwdc.pkl')
#dfRwdc.loc[:,['hddsn', 'lhd', 'phd', 'testcode', 'mfgid', 'band', 'rwdcmin', 'rwdcmax']]
dfRwdcFlat = pd.pivot_table(dfRwdc.query("row_num==1"),
                            index=['hddsn', 'mfgid', 'hddtrial', 'testcode', 'lhd'], 
                            columns=['qualifier', 'band'], 
                            values=['rwdcmin', 'rwdcmax'], 
                            aggfunc=np.sum).reset_index(drop=False)
cols = [x[0] for x in list(dfRwdcFlat.columns)[:5]]+[str(x[0])+'_'+str(x[1])+'_'+str(x[2]) for x in list(dfRwdcFlat.columns)[5:]]
dfRwdcFlat.columns = cols
dfRwdcFlat.to_pickle('dfRwdcFlat.pkl')

current sql is select hddsn, lhd, qualifier, phd, testcode, mfgid, hddtrial, band, rwdcmin, rwdcmax,                          row_number() over (partition by hddsn, qualifier, lhd, band order by enddate desc) as row_num from ghl2.ccb_ci_rwdc where product='pdq' and testcode in ('PDQX054M') and enddt between '20200829' and '20200902' and hddsn in ('2FA03AVA', '2FA014WA', '2FA01ELA', '2FA01DVA', '2FA01E9A', '2FA03DLA', '2FA00EHA', '2FA03D9A', '2FA01E4A', '2FA01DWA', '2FA01E0A', '2FA03BYA', '2FA03AXA', '2FA012GA', '2FA01DXA', '2FA01E3A', '2FA013UA', '2FA012RA', '2FA01EEA', '2FA03D2A', '2FA014RA', '2FA014VA', '2FA01E1A', '2FA01EGA', '2FA01E8A', '2FA00U9A') and procid in ('6600') and qualifier in ('60H0', 'G0H0') and pfcode not in ('9999', '99V6', '99V7')
(3200, 11)


In [85]:
# Merge all tables

dfParamAll = dfHeadMap.copy(deep = True)
for df in [dfRsbnFlat, dfSerFlat, dfSerOffsetFlat, 
           dfOwFlat, dfMcwFlat, dfMrwFlat, dfAmpFlat, 
           dfAsymFlat, dfRwipFlat, dfSatiSerFlat, dfNmaxTempFlat, dfAsb]:
    dfParamAll = dfParamAll.merge(df,
                             left_on  = ['hddsn', 'lhd', 'phd', 'mfgid', 'testcode'],
                             right_on = ['hddsn', 'lhd', 'phd', 'mfgid', 'testcode'],
                             how='left',
                            )
    print(dfParamAll.shape)
    
for df in [dfSqmFlat, dfAdcFlat]:
    dfParamAll = dfParamAll.merge(df,
                             left_on  = ['hddsn', 'lhd', 'mfgid', 'testcode'],
                             right_on = ['hddsn', 'lhd', 'mfgid', 'testcode'],
                             how='left',
                            )
    print(dfParamAll.shape)

dfParamAll.to_pickle('dfParamAll.pkl')
dfParamAll.to_csv('dfParamAll.csv')

(52, 263)
(52, 1415)
(52, 2567)
(52, 2615)
(52, 2635)
(52, 2659)
(52, 2803)
(52, 2839)
(52, 2875)
(52, 3308)
(52, 3437)
(52, 3439)
(52, 6512)
(52, 6555)


In [86]:
pd.pivot_table(dfParamAll, index=['testcode', 'mfgid'], columns='lhd', values='hddsn', aggfunc='count')

Unnamed: 0_level_0,lhd,0,1
testcode,mfgid,Unnamed: 2_level_1,Unnamed: 3_level_1
PDQX054M,KJ1Y01,7,7
PDQX054M,KJ1Y03,6,6
PDQX054M,KJBY01,13,13


# DET Data

In [15]:
# Association

inputParamVqaa = [snlist_csv, 
                  listTestcode,
                  listEnddt, 
                  listHddTrial,
                  ['6400']
                 ]
get_vqaa_table ('fact_hdd_headops', inputParamVqaa, 'product, slidersn, pheadno, hddsn, procid, mfgid, hddtrial, enddate, \
                 row_number() over ( partition by slidersn order by enddate desc) as row_num', whereAdded = None)

current sql is select product, slidersn, pheadno, hddsn, procid, mfgid, hddtrial, enddate,                  row_number() over ( partition by slidersn order by enddate desc) as row_num from vqaa.fact_hdd_headops where product='pdq' and testpgmver in ('PDQX083M') and enddt between '20201223' and '20210101' and hddtrial in ('Z07E', 'NPIF') and hddsn in ('2FA00WRA', '2FA02E8A', '2FA06EXA', '2FA06GNA', '2FA06H4A', '2FA06H1A', '2FA06G5A', '2FA06G3A', '2FA02DKA', '2FA01E4A', '2FA02EUA', '2FA02EAA', '2FA02DNA', '2FA01EHA') and procid in ('6400') 
(306, 9)


Unnamed: 0,product,slidersn,pheadno,hddsn,procid,mfgid,hddtrial,enddate,row_num
0,pdq,49FB51932F,7,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1
1,pdq,4AF083B607,4,2FA06G3A,6400,KJ1C03,NPIF,20201229143527,1
2,pdq,49FBF2D223,12,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1
3,pdq,49FBF1B432,0,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1
4,pdq,49F8D1182C,10,2FA06H4A,6400,KJ1C03,NPIF,20201229142426,1
...,...,...,...,...,...,...,...,...,...
301,pdq,49F491C709,7,2FA01EHA,6400,KJ1C03,Z07E,20201223225255,1
302,pdq,49F6A28B38,7,2FA02DNA,6400,KJ1C03,Z07E,20201223224858,1
303,pdq,4AFF60B71B,7,2FA06GNA,6400,KJ1C03,NPIF,20201229141542,1
304,pdq,4AFF60B71B,7,2FA06GNA,6400,KJ1C03,NPIF,20201228132753,2


In [19]:
## sdet ##
inputParamSldr = ['df_fact_hdd_headops.csv', 
                  ['2020-01-01', '2020-07-31'],
                 ]
get_sdet_table('sldr.sdet_special_trans', inputParamSldr, 
               'slidersn, product, storeday, \
                wew_r0_nm_sto_2, wew_r1_nm_sto_2, wew_r0_mww_nm_sto_2, wew_r1_mww_nm_sto_2, wew_r0_umrw_nm_sto_2, wew_r1_umrw_nm_sto_2, wew_r0_eb_nm_sto_2, wew_r1_eb_nm_sto_2, \
                row_number() over ( partition by slidersn order by storeday desc) as row_num',
                whereAdded = None)

current sql is select slidersn, product, storeday,                 wew_r0_nm_sto_2, wew_r1_nm_sto_2, wew_r0_mww_nm_sto_2, wew_r1_mww_nm_sto_2, wew_r0_umrw_nm_sto_2, wew_r1_umrw_nm_sto_2, wew_r0_eb_nm_sto_2, wew_r1_eb_nm_sto_2,                 row_number() over ( partition by slidersn order by storeday desc) as row_num from hive.sldr.sdet_special_trans where storeday between '2020-01-01' and '2020-07-31' and slidersn in ('49FB51932F', '4AF083B607', '49FBF2D223', '49FBF1B432', '49F8D1182C', '4AF081CC0A', '4AF081CC0A', '4AF081CC0A', '49F8D11835', '49FBF3FF22', '4AF0823428', '4AFF60B339', '49FBF3A615', '4AFF61130A', '4AFF61130A', '49F6A28627', '49F8D1183B', '49FBF0330B', '49F8818902', '4AF080AC1D', '4AF080AC1D', '4AF080AC1D', '4AFF60B338', '4AF0821A19', '4AFF605126', '49F491C70C', '49FBF2D221', '49FBF0F215', '49F491C715', '49FBF03510', '49FCC2130D', '49FCC2130D', '4AFF61591C', '4AFF61591C', '49F5508532', '49F5536420', '49F8A33C1D', '4A2B82810B', '49F4910703', '49FBF03B0F', '49FBF0351D', '4

Unnamed: 0,slidersn,product,storeday,wew_r0_nm_sto_2,wew_r1_nm_sto_2,wew_r0_mww_nm_sto_2,wew_r1_mww_nm_sto_2,wew_r0_umrw_nm_sto_2,wew_r1_umrw_nm_sto_2,wew_r0_eb_nm_sto_2,wew_r1_eb_nm_sto_2,row_num
0,4AF0823428,C3_B_D,2020-07-29,,,,,,,,,1
1,4AFF60B339,C3_T_D,2020-07-28,,,,,,,,,1
2,49F8A33C1D,C3_B_D,2020-03-10,,,,,,,,,1
3,4AFF61591C,C3_T_D,2020-07-30,,,,,,,,,1
4,49F8D11835,C3_B_D,2020-06-07,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...
86,49FBF2DD0F,C3_T_D,2020-03-10,,,,,,,,,1
0,49F491C709,C3_T_D,2020-03-11,,,,,,,,,1
1,49F6A28B38,C3_T_D,2020-03-12,,,,,,,,,1
2,4AFF60B71B,C3_T_D,2020-07-28,,,,,,,,,1


In [20]:
#Combine slider sn, pheadno, and HDD SN
df = pd.read_pickle('df_fact_hdd_headops.pkl').query("row_num==1").merge(pd.read_pickle('df_sldr.sdet_special_trans.pkl').loc[:,['slidersn', 'storeday', 'wew_r0_nm_sto_2', 'row_num']].query("row_num==1"),
                                                                         left_on=['slidersn', 'row_num'], 
                                                                         right_on=['slidersn', 'row_num'], 
                                                                         how='left')
df.to_pickle('hddops_association.pkl')
df

Unnamed: 0,product,slidersn,pheadno,hddsn,procid,mfgid,hddtrial,enddate,row_num,storeday,wew_r0_nm_sto_2
0,pdq,49FB51932F,7,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1,2020-03-09,
1,pdq,4AF083B607,4,2FA06G3A,6400,KJ1C03,NPIF,20201229143527,1,2020-07-28,
2,pdq,49FBF2D223,12,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1,2020-03-14,
3,pdq,49FBF1B432,0,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1,2020-03-14,
4,pdq,49F8D1182C,10,2FA06H4A,6400,KJ1C03,NPIF,20201229142426,1,2020-06-07,
...,...,...,...,...,...,...,...,...,...,...,...
247,pdq,49FB530725,15,2FA02DKA,6400,KJ1C03,Z07E,20201223104205,1,2020-03-09,
248,pdq,49F8A34E11,10,2FA02E8A,6400,KJ1C03,Z07E,20201223223939,1,2020-03-08,
249,pdq,49F491C709,7,2FA01EHA,6400,KJ1C03,Z07E,20201223225255,1,2020-03-11,
250,pdq,49F6A28B38,7,2FA02DNA,6400,KJ1C03,Z07E,20201223224858,1,2020-03-12,
