# Create Training Table

## Tool information: rev_module and PM Counters etc. 

In [1]:
import pandas as pd
import numpy as np
from scipy import interpolate, stats
import matplotlib.pyplot as plt
import PyUber
from datetime import datetime, timedelta, date
import glob, time
import os
    
def convert_to_date(df, column1='MEAS_SET_DATA_COLLECT_DATE', column2='LOT_DATA_COLLECT_DATE', \
                    column3='CURRENT_MOVEIN_DATE', column4='END_DATE'):
    if column1 in df.columns:
        df[column1] = pd.to_datetime(df[column1])
    if column2 in df.columns:
        df[column2] = pd.to_datetime(df[column2])
    if column3 in df.columns:
        df[column3] = pd.to_datetime(df[column3])
    if column4 in df.columns:
        df[column4] = pd.to_datetime(df[column4])
    return df

def SQL_DataFrame(sql, source='D1D_PROD_XEUS'):
    conn = PyUber.connect(source)
    df = pd.read_sql(sql, conn)
    return df

def get90d(ss, datestr): 
    end = pd.Timestamp(datestr) + timedelta(days=1)
    start = end - timedelta(days=90)
    try:
        ss = ss[(ss['LOT_DATA_COLLECT_DATE'] >= start) & (ss['LOT_DATA_COLLECT_DATE'] < end)]
    except:
        ss = convert_to_date(ss)
        ss = ss[(ss['LOT_DATA_COLLECT_DATE'] >= start) & (ss['LOT_DATA_COLLECT_DATE'] < end)]
    return ss

sql="""
SELECT  DISTINCT 
          a1.entity AS entity
         ,a5.value AS chart_value
         ,To_Char(a0.data_collection_time,'yyyy-mm-dd hh24:mi:ss') AS lot_data_collect_date
         ,a3.measurement_set_name AS measurement_set_name
         ,To_Char(a3.data_collection_time,'yyyy-mm-dd hh24:mi:ss') AS meas_set_data_collect_date
         ,a2.monitor_type AS monitor_type
         ,a3.parameter_class AS parameter_class
         ,a2.monitor_set_name AS monitor_set_name
         ,a0.lotoperkey AS lotoperkey
         ,a5.incontrol_flag AS incontrol_flag
         ,a5.standard_flag AS chart_pt_standard_flag
         ,a10.centerline AS centerline
         ,a10.lo_control_lmt AS lo_control_lmt
         ,a10.up_control_lmt AS up_control_lmt
         ,a5.chart_type AS chart_type
         ,a5.spc_chart_subset AS spc_chart_subset
         ,a2.test_name AS test_name
         ,a3.parameter_header AS parameter_header
         ,a2.module AS module
FROM 
P_SPC_MEASUREMENT_SET a3
INNER JOIN P_SPC_SESSION a2 ON a2.spcs_id = a3.spcs_id
LEFT JOIN P_SPC_LOT a0 ON a0.spcs_id = a2.spcs_id
INNER JOIN P_SPC_ENTITY a1 ON a2.spcs_id = a1.spcs_id AND a1.entity_sequence=1
LEFT JOIN P_SPC_CHART_POINT a5 ON a5.spcs_id = a3.spcs_id AND a5.measurement_set_name = a3.measurement_set_name
LEFT JOIN P_SPC_CHART_LIMIT a10 ON a10.chart_id = a5.chart_id AND a10.limit_id = a5.limit_id
WHERE
              (a1.entity LIKE 'LAT%' 
              OR a1.entity LIKE 'PAT%')
 AND      a5.value Is Not Null  
 AND      a3.data_collection_time >= TRUNC(SYSDATE) - {} 
 AND      a3.data_collection_time <= TRUNC(SYSDATE) - {} 
 AND      a2.monitor_type = 'TOOL MONITOR' 
 AND      a3.parameter_class = 'DEFECT_PARTICLE' 
 AND      a5.spc_chart_subset = 'PARTICLE_SIZE=TOTAL_ADDERS'
"""

sql2= '''SELECT 
          e.entity AS entity
         ,ea.attribute_value AS attribute_value
         ,e.ceid AS ceid
         ,ea.attribute_name AS attribute_name
         ,e.rev_module AS rev_module
FROM 
F_ENTITY e
LEFT JOIN F_ENTITYATTRIBUTE ea ON ea.entity = e.entity AND ea.history_deleted_flag='N'
WHERE
              (e.entity Like 'PAT%' 
              OR e.entity Like 'LAT%')
 AND      ea.attribute_name Like 'PM_Counter' 
ORDER BY
           1 Asc'''

sql2= '''SELECT 
          e.entity AS entity
         ,ea.attribute_value AS attribute_value
         ,e.ceid AS ceid
         ,ea.attribute_name AS attribute_name
         ,e.rev_module AS rev_module
FROM 
F_ENTITY e
LEFT JOIN F_ENTITYATTRIBUTE ea ON ea.entity = e.entity AND ea.history_deleted_flag='N'
WHERE
              (e.entity Like 'LAT%' 
              OR e.entity Like 'PAT%'
              OR e.entity Like 'REX%')
 AND      (ea.attribute_name Like 'PM_Counter' 
             OR ea.attribute_name Like 'Hit%Counter')
ORDER BY
           1 Asc'''



In [2]:
tool_r = SQL_DataFrame(sql2)
tool_r['ATTRIBUTE_VALUE'] = tool_r['ATTRIBUTE_VALUE'].astype(int)
tools = pd.pivot_table(tool_r, values = 'ATTRIBUTE_VALUE', index = ['REV_MODULE', 'CEID' ,'ENTITY'], columns = 'ATTRIBUTE_NAME')
tools = tools.reset_index()
#tools[tools['ENTITY'] == 'LATXX']['REV_MODULE'].values[0]

camp = os.getcwd()

In [10]:
tool_r.head()

Unnamed: 0,ENTITY,ATTRIBUTE_VALUE,CEID,ATTRIBUTE_NAME,REV_MODULE
0,LAT01_PM1,0,LATne,PMGCounter,FE PAT
1,LAT01_PM1,1132,LATne,PMACounter,FE PAT
2,LAT01_PM1,0,LATne,PMECounter,FE PAT
3,LAT01_PM1,77064,LATne,PMDCounter,FE PAT
4,LAT01_PM1,51538,LATne,PMCCounter,FE PAT


In [3]:
#pasts = [210,  270, 300, 330, 360, 390, 420, 450]
pasts = [0]

start =  time.time()

try:
    assert len(tool_r)>0
    print('already have tool_r')
except:
    tool_r = SQL_DataFrame(sql2)
    tool_r['ATTRIBUTE_VALUE'] = tool_r['ATTRIBUTE_VALUE'].astype(int)
    tools = pd.pivot_table(tool_r, values = 'ATTRIBUTE_VALUE', index = ['REV_MODULE', 'CEID' ,'ENTITY'], columns = 'ATTRIBUTE_NAME')
    tools = tools.reset_index()
    tools.set_index(['ENTITY'], inplace = True)

# Collect long term data 
for past in pasts:
    ss = SQL_DataFrame(sql.format(str(past+600), str(past)))

    camp = os.getcwd()

    dstr = ss['LOT_DATA_COLLECT_DATE'].max()[:10]
    dstr
    fname = 'LAT.SPC.defect.' + dstr + '.csv'
    fname
    ss.to_csv(fname, index=False)
    

done1 = time.time()
print('done in: ', done1-start)

already have tool_r
done in:  424.2910952568054


In [4]:
tools.set_index(['ENTITY'], inplace = True)

In [5]:
tools.head()

ATTRIBUTE_NAME,REV_MODULE,CEID,HitAlESCounter,HitAlGECounter,HitCEWaferCounter,HitChuckLifeCounter,HitClean1Counter,HitClean2Counter,HitClean3Counter,HitClean4Counter,...,HitTMPCounter,HitV1Counter,HitV2Counter,PMACounter,PMBCounter,PMCCounter,PMDCounter,PMECounter,PMFCounter,PMGCounter
ENTITY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
REX415_EU4,BE MM3,MM3ck,0.0,0.0,0.0,0.0,5625.0,17004.0,117443.0,117443.0,...,131668.0,117341.0,117341.0,,,,,,,
REX405_EU1,BE R3X,R3Xcb,0.0,297252.0,0.0,0.0,609.0,6566.0,6566.0,238771.0,...,173921.0,584497.0,584497.0,,,,,,,
REX401_EU2,BE R3X,R3Xch,0.0,0.0,0.0,0.0,2913.0,24418.0,91829.0,265495.0,...,96642.0,262786.0,262786.0,,,,,,,
REX406_EU1,BE R3X,R3Xch,0.0,0.0,5.0,0.0,4008.0,4133.0,10016.0,193451.0,...,18214.0,43434.0,43434.0,,,,,,,
REX406_EU2,BE R3X,R3Xch,0.0,0.0,0.0,0.0,5421.0,33117.0,33245.0,206135.0,...,35414.0,56943.0,56943.0,,,,,,,


In [6]:
tools.loc['LAT01_PM2']['REV_MODULE']

'FE  PAT'

## Create cleaned, smoothed surfscan tables and plot
60 days, 20 points 

time ends at last spc data point

In [18]:
import time

start = time.time()
sta=pd.DataFrame()
st=[0]
csvs = glob.glob('training_data/LAT*.csv')
csvs = ['training_data/LATREX.SPC.defect.2021-04-22.csv']
csvs.reverse()

existing_dates = []
for csv in csvs:
    d = csv.split('.')[-2]
    existing_dates.append(d)


for fnode in csvs:


    dstr = fnode.split('.')[-2]
    ss=pd.read_csv(fnode)
    entities = ss.ENTITY.unique()
    
    taa = ss[ss['SPC_CHART_SUBSET'] == 'PARTICLE_SIZE=TOTAL_ADDERS']
    taa = pd.merge(taa, tools, on='ENTITY')
    taa=taa[['REV_MODULE', 'CHART_VALUE']]
    taa=taa.rename(columns={'REV_MODULE': 'module', 'CHART_VALUE': 'TA'})
    taac = taa[taa.groupby('module').TA.transform(lambda x: stats.zscore(x)<0.5)]
    baselines = taac.groupby('module').mean()
    
    for entity in entities:
        #if len(st)==20 : break
            
        #entity=entities[0]
        try:
            fname = tools.loc[entity]['REV_MODULE']+'.TA.'+entity+'.'+dstr
            baseline = baselines.loc[tools.loc[entity]['REV_MODULE']].values
        except:
            fname = 'NONE.TA.'+entity+'.'+dstr
            baseline = baselines['TA'].mean()
        sst = ss[ss['ENTITY']==entity]
        st = sst[sst['SPC_CHART_SUBSET'] == 'PARTICLE_SIZE=TOTAL_ADDERS']

        st=st[['LOT_DATA_COLLECT_DATE', 'CHART_VALUE']]
        st=st.rename(columns={'LOT_DATA_COLLECT_DATE': 't', 'CHART_VALUE': fname})
        st.index = pd.to_datetime(st.t)
        st.drop(['t'], axis=1, inplace = True)
        if len(st)<20: continue # need enough data to interpolate properly
        st=st.sort_index()
        st=st.resample('3D').mean()
        try:
            st=st.interpolate(method='spline', order=2)
            st[st<0] = 0
        except:
            continue
        #st=np.log(st+1)
        st = st[len(st)-20:]
        if len(st)<20: continue #sometimes data doesn't extend back 60 days
        #print(fname+' len: ', len(st))
        
        st = st/baseline
        
        want_figs = True
        if ((len(st)==20) & (st.mean() < np.inf) & want_figs) : #do plot
            # PLOT the smoothed data
            fig = plt.figure()
            ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
            #ax = plt.subplot(111)
            x = np.linspace(-len(st)+1,0,len(st)).reshape(-1,1)*3
            ax.plot(x,st.values, 'o-')
            ax.legend([entity+': '+dstr])
            
            # PLOT spline
            try: 
                xn = np.linspace(x[0], x[-1], 100)
                non_fliers = st.values.astype(float)<4
                yp = st[non_fliers]
                xp = x[non_fliers]
                y_BSpline = interpolate.UnivariateSpline(xp,yp,s=20.)
                yn = y_BSpline(xn)
                ax.plot(xn, yn, '-')
            except:
                pass
            
            ax.set_xlabel('day')
            ax.set_ylabel('TA')
            ax.set_xticks([-60,-30,0])
            plt.ylim([-0.1, 4])
            
            fig.savefig('figs/'+fname+'.png')
            plt.close()
            #break

        #if len(st)==20 : break
        st = st.reset_index().T.drop('t')
        if sta.empty: sta=st
        else: sta = sta.append(st)
        #break
    
    #break

print('seconds: ', time.time()-start)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [20]:
sta

In [21]:
ss.head()

Unnamed: 0,ENTITY,CHART_VALUE,LOT_DATA_COLLECT_DATE,MEASUREMENT_SET_NAME,MEAS_SET_DATA_COLLECT_DATE,MONITOR_TYPE,PARAMETER_CLASS,MONITOR_SET_NAME,LOTOPERKEY,INCONTROL_FLAG,CHART_PT_STANDARD_FLAG,CENTERLINE,LO_CONTROL_LMT,UP_CONTROL_LMT,CHART_TYPE,SPC_CHART_SUBSET,TEST_NAME,PARAMETER_HEADER,MODULE
0,LAT426_PM3,0.0,9/17/2019 17:45,LAT.DSA_TIN_PST.74.DER,9/17/2019 17:45,TOOL MONITOR,DEFECT_PARTICLE,LAT.DSA_TIN_PST.74.MON,1847754675,N,N,1.01,0.0,3.6,X-BAR,PARTICLE_SIZE=TOTAL_ADDERS,4LATTINNTSSPS,PARTICLE_SIZE,LAT
1,LAT426_PM3,0.0,9/26/2019 09:09,LAT.DSA_TIN_PST.74.DER,9/26/2019 09:09,TOOL MONITOR,DEFECT_PARTICLE,LAT.DSA_TIN_PST.74.MON,1864650392,N,N,1.01,0.0,3.6,X-BAR,PARTICLE_SIZE=TOTAL_ADDERS,4LATTINNTSSPS,PARTICLE_SIZE,LAT
2,PAT408_PM5,0.0,1/28/2020 11:31,LAT.DSA_TIN_PST.74.DER,1/28/2020 11:31,TOOL MONITOR,DEFECT_PARTICLE,LAT.DSA_TIN_PST.74.MON,2053656495,Y,Y,1.01,0.0,3.6,X-BAR,PARTICLE_SIZE=TOTAL_ADDERS,4LATTINNTSSPS,PARTICLE_SIZE,LAT
3,LAT416_PM1,0.0,1/29/2020 05:08,LAT.DSA_PST.74.DER,1/29/2020 05:08,TOOL MONITOR,DEFECT_PARTICLE,LAT.DSA_PST.74.MON,2053656495,Y,Y,1.01,0.0,3.2,X-BAR,PARTICLE_SIZE=TOTAL_ADDERS,4LATNTSSPS,PARTICLE_SIZE,LAT
4,PAT430_PM4,0.0,1/18/2020 21:06,PATBE.DSA_PST.76.DER,1/18/2020 21:06,TOOL MONITOR,DEFECT_PARTICLE,PATBE.DSA_PST.76.MON,2053972317,Y,Y,0.5,0.0,3.6,X-BAR,PARTICLE_SIZE=TOTAL_ADDERS,6PATBESSPST,PARTICLE_SIZE,PAT BE


In [22]:
sql2rex= '''SELECT 
          e.entity AS entity
         ,ea.attribute_value AS attribute_value
         ,e.ceid AS ceid
         ,ea.attribute_name AS attribute_name
         ,e.rev_module AS rev_module
FROM 
F_ENTITY e
LEFT JOIN F_ENTITYATTRIBUTE ea ON ea.entity = e.entity AND ea.history_deleted_flag='N'
WHERE
    e.entity Like 'REX%' 
AND      ea.attribute_name Like 'Hit%Counter' 

ORDER BY
           1 Asc'''

In [23]:
import time

start = time.time()
sta=pd.DataFrame()
st=[0]

ssb = pd.read_csv('training_data/LATREX.SPC.defect.2021-04-22.csv')

# snapdates = ['2021-04-14', '2021-03-31', '2021-03-17', '2021-01-21', '2020-12-23', '2020-11-24', '2020-10-18', \
#             '2020-09-20', '2020-08-23', '2020-07-21', '2020-06-23', '2020-05-25', '2020-04-22', '2020-03-25', \
#             '2020-02-25']

training_dates=pd.read_csv('training_data/training_dates.csv')
snapdates = training_dates[training_dates['Module'] == 'LAT']['Date']

# csvs = glob.glob('training_data/LAT*.csv')
# csvs.reverse()

#Get module/tool information
tool_r = SQL_DataFrame(sql2)
tool_r['ATTRIBUTE_VALUE'] = tool_r['ATTRIBUTE_VALUE'].astype(int)
tools = pd.pivot_table(tool_r, values = 'ATTRIBUTE_VALUE', index = ['REV_MODULE', 'CEID' ,'ENTITY'], columns = 'ATTRIBUTE_NAME')
tools = tools.reset_index()
tools.set_index(['ENTITY'], inplace = True)

#Calculate module baselines
taa = ssb[ssb['SPC_CHART_SUBSET'] == 'PARTICLE_SIZE=TOTAL_ADDERS']
taa = pd.merge(taa, tools, on='ENTITY')
taa=taa[['REV_MODULE', 'CHART_VALUE']]
taa=taa.rename(columns={'REV_MODULE': 'module', 'CHART_VALUE': 'TA'})
taac = taa[taa.groupby('module').TA.transform(lambda x: stats.zscore(x)<0.5)]
baselines = taac.groupby('module').mean()

#existing_dates = []

for dstr in snapdates:
    ss = get90d(ssb, dstr)
    entities = ss.ENTITY.unique()
    
#     taa = ss[ss['SPC_CHART_SUBSET'] == 'PARTICLE_SIZE=TOTAL_ADDERS']
#     taa = pd.merge(taa, tools, on='ENTITY')
#     taa=taa[['REV_MODULE', 'CHART_VALUE']]
#     taa=taa.rename(columns={'REV_MODULE': 'module', 'CHART_VALUE': 'TA'})
#     taac = taa[taa.groupby('module').TA.transform(lambda x: stats.zscore(x)<0.5)]
#     baselines = taac.groupby('module').mean()
    
    for entity in entities:
        #if len(st)==20 : break
            
        #entity=entities[0]
        try:
            fname = tools.loc[entity]['REV_MODULE']+'.TA.'+entity+'.'+dstr
            baseline = baselines.loc[tools.loc[entity]['REV_MODULE']].values
        except:
            fname = 'NONE.TA.'+entity+'.'+dstr
            baseline = baselines['TA'].mean()
        sst = ss[ss['ENTITY']==entity]
        st = sst[sst['SPC_CHART_SUBSET'] == 'PARTICLE_SIZE=TOTAL_ADDERS']

        st=st[['LOT_DATA_COLLECT_DATE', 'CHART_VALUE']]
        st=st.rename(columns={'LOT_DATA_COLLECT_DATE': 't', 'CHART_VALUE': fname})
        st.index = pd.to_datetime(st.t)
        st.drop(['t'], axis=1, inplace = True)
        if len(st)<20: continue # need enough data to interpolate properly
        st=st.sort_index()
        st=st.resample('3D').mean()
        try:
            st=st.interpolate(method='spline', order=2)
            st[st<0] = 0
        except:
            continue
        #st=np.log(st+1)
        st = st[len(st)-20:]
        if len(st)<20: continue #sometimes data doesn't extend back 60 days
        #print(fname+' len: ', len(st))
        
        st = st/baseline
        
        want_figs = False
        if ((len(st)==20) & (st.mean() < np.inf)[0] & want_figs) : #do plot
            # PLOT the smoothed data
            fig = plt.figure()
            ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
            #ax = plt.subplot(111)
            x = np.linspace(-len(st)+1,0,len(st)).reshape(-1,1)*3
            ax.plot(x,st.values, 'o-')
            ax.legend([entity+': '+dstr])
            
            # PLOT spline
            try: 
                xn = np.linspace(x[0], x[-1], 100)
                non_fliers = st.values.astype(float)<4
                yp = st[non_fliers]
                xp = x[non_fliers]
                y_BSpline = interpolate.UnivariateSpline(xp,yp,s=20.)
                yn = y_BSpline(xn)
                ax.plot(xn, yn, '-')
            except:
                pass
            
            ax.set_xlabel('day')
            ax.set_ylabel('TA')
            ax.set_xticks([-60,-30,0])
            plt.ylim([-0.1, 4])
            
            fig.savefig('figs/'+fname+'.png')
            plt.close()
            #break

        #if len(st)==20 : break
        st = st.reset_index().T.drop('t')
        if sta.empty: sta=st
        else: sta = sta.append(st)
        #break
    
    #break

print('seconds: ', time.time()-start)

FileNotFoundError: [Errno 2] No such file or directory: 'training_data/training_dates.csv'

In [None]:
sta.to_csv('training_data/LATREX.SPC.defect.2021-04-22.sta.csv')
baselines.to_csv('training_data/LATREX.baselines.csv')

In [None]:
sta

In [None]:
sta.head()

In [None]:
st

In [None]:
sta.index[0]

In [None]:
os.chdir(camp+'/figs/1trending')
trending = pd.Series(glob.glob('*.png'), name = 'key').apply(lambda x: x.split('.png')[0])
trending = pd.DataFrame(trending)
trending['trending'] = 1

os.chdir(camp+'/figs/2burst')
burst = pd.Series(glob.glob('*.png'), name = 'key').apply(lambda x: x.split('.png')[0])
burst = pd.DataFrame(burst)
burst['burst'] = 1

os.chdir(camp+'/figs/3elevated')
elevated = pd.Series(glob.glob('*.png'), name = 'key').apply(lambda x: x.split('.png')[0])
elevated = pd.DataFrame(elevated)
elevated['elevated'] = 1

os.chdir(camp+'/figs/4clean')
clean = pd.Series(glob.glob('*.png'), name = 'key').apply(lambda x: x.split('.png')[0])
clean = pd.DataFrame(clean)
clean['clean'] = 1

binned = pd.concat([trending, burst, elevated, clean]).fillna(0).set_index('key')

os.chdir(camp)
#trending.head()

In [None]:
binned.to_csv('training_data/LATREX.binned.csv', index=False)

In [None]:
os.chdir(camp+'/figs/dirty')
dirtylist = glob.glob('*.png')
os.chdir(camp)

In [None]:
#binned = {'trending': trending, 'burst': burst, 'elevated': elevated, 'clean': clean}
keys = pd.concat([trending, burst, elevated, clean])

In [None]:
binned=pd.DataFrame()
binned['keys'] = keys
binned['trending'] = binned['keys'].apply(lambda x: 1 if x in trending else 0)

In [None]:
'DE-LAT-NVE.TA.LAT424_PM2.2020-03-25' in trending.values

In [None]:
dirty.split('.png')[0]

In [None]:
#dirtylist=pd.Series(dirtylist)

In [None]:
#dirtylist = dirtylist.apply(lambda x: x.split('.png')[0])

In [None]:
#dirtylist[0]

In [None]:
#sta['is_dirty'] =  pd.Series(sta.index).apply(lambda x: (x in dirtylist.values)*1).values

In [None]:
sta = sta.rename_axis('key')
stay = pd.merge(sta, binned, on=['key'], how='inner')
stay.to_csv('training_data/cleaned_charts_training.csv')

In [None]:
binned['trending']

In [None]:
sta.tail()

In [None]:
sta.to_csv('training_data/cleaned_charts_training.csv')

In [None]:
y = sta.T[sta.index[0]]

In [None]:
x = sta.columns.values

In [None]:
x_new = np.linspace(x[0], x[-1]+1, 100)

In [None]:
y

In [None]:
yp.shape

In [None]:
yp

In [None]:
non_fliers = stats.zscore(y.astype(float))<1
yp = y[non_fliers]
xp = x[non_fliers]
y_BSpline = interpolate.UnivariateSpline(xp,yp,s=40.)
y_new = y_BSpline(x_new)
plt.plot(x_new, y_new)

In [None]:
plt.plot(x,y)

In [None]:
xn = np.linspace(x[0], x[-1], 100)
y_BSpline = interpolate.UnivariateSpline(x,st.values,s=40.)
yn = y_BSpline(xn)
plt.plot(xn, yn)

In [None]:
fig = plt.figure()
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
#ax = plt.subplot(111)
x = np.linspace(-len(st)+1,0,len(st)).reshape(-1,1)*3
ax.plot(x,st.values, 'o-')


xn = np.linspace(x[0], x[-1], 100)
y_BSpline = interpolate.UnivariateSpline(x,st.values,s=20.)
yn = y_BSpline(xn)
ax.plot(xn, yn, '-')

ax.set_xlabel('date')
ax.set_ylabel('TA')
ax.set_xticks([-60,-30,0])
ax.legend([entity+': '+dstr])
plt.ylim([-0.1, 4])

#fig.savefig('figs/'+fname+'.png')
#plt.close()

In [None]:
np.arange(-60,1,30)

In [None]:
taa = ss[ss['SPC_CHART_SUBSET'] == 'PARTICLE_SIZE=TOTAL_ADDERS']
taa = pd.merge(taa, tools, on='ENTITY')
taa=taa[['REV_MODULE', 'CHART_VALUE']]
taa=taa.rename(columns={'REV_MODULE': 'module', 'CHART_VALUE': 'TA'})
taac = taa[taa.groupby('module').TA.transform(lambda x: stats.zscore(x)<0.3)]
baselines = taac.groupby('module').mean()

In [None]:
baselines

In [None]:
st/baseline.loc['DE-LAT-TNC'].values

In [None]:
baseline.loc['DE-LAT-TNC'].values

In [None]:
baseline['TA'].mean()

In [None]:
baseline.loc[tools.loc[entity]['REV_MODULE']].values

In [None]:
sta.head()

In [None]:
st = sta.loc['DE-PAT-XD.TA.PAT458_PM3.2021-02-05']

In [None]:
len(st)==20 & (st.mean() < np.inf)

In [None]:
ssa=pd.read_csv('LAT.SPC.defect.2021-03-17.csv')

In [None]:
len(ssa)

In [None]:
len(ss)

In [None]:
def convert_to_date(df, column1='MEAS_SET_DATA_COLLECT_DATE', column2='LOT_DATA_COLLECT_DATE', \
                    column3='CURRENT_MOVEIN_DATE', column4='END_DATE'):
    if column1 in df.columns:
        df[column1] = pd.to_datetime(df[column1])
    if column2 in df.columns:
        df[column2] = pd.to_datetime(df[column2])
    if column3 in df.columns:
        df[column3] = pd.to_datetime(df[column3])
    if column4 in df.columns:
        df[column4] = pd.to_datetime(df[column4])
    return df

ssa=convert_to_date(ssa)

In [None]:
ssa.sort_values(by=['MEAS_SET_DATA_COLLECT_DATE'], inplace=True, ascending=False)

In [None]:
start = 9
ss=ssa[(ss['MEAS_SET_DATA_COLLECT_DATE']<datetime.now() - timedelta(days=start)) & (ss['MEAS_SET_DATA_COLLECT_DATE']>datetime.now() - timedelta(days= (90+start)))]

In [None]:
ss.head()

In [None]:
ds = []
for csv in csvs:
    d = csv.split('.')[-2]
    ds.append(d)


In [None]:
ds

In [None]:
os.getcwd()

In [None]:
ss1 = pd.read_csv('LAT.SPC.defect.2021-03-17.csv')
ss2 = pd.read_csv('LAT.SPC.defect.2021-03-16.csv')
ss3 = pd.read_csv('LAT.SPC.defect.2021-04-22.csv')

key_cols = ['ENTITY', 'CHART_VALUE', 'LOT_DATA_COLLECT_DATE',\
       'MEASUREMENT_SET_NAME', 'MEAS_SET_DATA_COLLECT_DATE', 'MONITOR_TYPE',\
       'PARAMETER_CLASS', 'MONITOR_SET_NAME', 'LOTOPERKEY', 'INCONTROL_FLAG',\
       'CHART_PT_STANDARD_FLAG', 'CENTERLINE', 'LO_CONTROL_LMT',\
       'UP_CONTROL_LMT', 'CHART_TYPE', 'SPC_CHART_SUBSET', 'TEST_NAME',\
       'PARAMETER_HEADER', 'MODULE']
#key_cols = ['ENTITY', 'CHART_VALUE', 'LOT_DATA_COLLECT_DATE', 'SPC_CHART_SUBSET']
LAT_ss = pd.merge(ss1, ss2, left_on=key_cols, right_on=key_cols, how='outer')
LAT_ss = pd.merge(LAT_ss, ss3, left_on=key_cols, right_on=key_cols, how='outer')
#LAT_ss.to_csv('LAT.SPC.defect.2021-04-22.csv')


In [None]:
# LAT_ss.to_csv('training_data/LAT.SPC.defect.2021-04-22.csv', index=False)

In [None]:
LAT_ss.columns

In [None]:
LAT_ss=convert_to_date(LAT_ss)

In [None]:
type(ss1.LOT_DATA_COLLECT_DATE[0])

In [None]:
len(LAT_ss)

In [None]:
lat=pd.read_csv('training_data/LAT.SPC.defect.2021-04-22.csv')
rex=pd.read_csv('training_data/REX.SPC.defect.2021-04-22.csv')

In [None]:
latrex=pd.concat([lat, rex])

In [None]:
# latrex.to_csv('training_data/LATREX.SPC.defect.2021-04-22.csv', index=False)

In [None]:
lat=pd.read_csv('training_data/LAT.SPC.defect.2021-04-22.sta.csv')
rex=pd.read_csv('training_data/REX.SPC.defect.2021-04-22.sta.csv')
latrex=pd.concat([lat, rex])

In [None]:
latrex.head()

In [None]:
LAT_ss = pd.concat([ss1, ss2])

In [None]:
len(ss3)

In [None]:
len(ss3)==len(ss1)+len(ss2)

In [None]:
len(LAT_ss)

In [None]:
len(ss1)

In [None]:
len(ss2)