In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import sys
import os.path
sys.path.insert(0, os.getcwd()+"/..")
from SEMContour import *
sys.path.insert(0, os.getcwd()+"/../../common")
from PlotConfig import *
from FileUtil import gpfs2WinPath

CWD = r'/gpfs/WW/BD/MXP/SHARED/SEM_IMAGE/IMEC/Case02_calaveras_v3/3Tmp/CT_KPI_test/Calaveras_v3_regular_CT_KPI_003_slope_modified_copy/h/cache/dummydb/result/MXP/job1/ContourExtraction400result1'
CWD = gpfs2WinPath(CWD)

class ContourAnalyzer(object):
    """docstring for ContourData"""
    def __init__(self, contourfile):
        self.__build(contourfile)

    def __build(self, contourfile):
        contour = SEMContour()
        contour.parseFile(contourfile)
        if not contour:
            sys.exit("ERROR: read in contour file %s fails\n" % contourfile)
        self.contour = contour
        self.df = contour.cvtToDf()

def plot_corr(df):
    matplotlib.style.use('ggplot')
    #plot_contour(self.contour)
    # cols = 'slope  ridge_intensity intensity  contrast'.split()
    cols = 'slope  ridge_intensity'.split()
    print(df.columns)
    df = df[cols]
    df.loc[:, 'slope'] = df.loc[:, 'slope'].abs().values

    from pandas.plotting import scatter_matrix
    colors = ['red','blue']
    scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal='kde', color=colors) 

    '''
    import seaborn as sns
    sns.set(style="ticks")
    sns.pairplot(df, kind='scatter', diag_kind='kde')
    '''

In [38]:
ca = ContourAnalyzer(CWD+r'/461_image_contour.txt')
plot_contour(ca.contour)

In [48]:
%matplotlib auto
def plot_reg(df):
    colstr = 'slope  ridge_intensity'
    cols = colstr.split()
    #df = df[cols]
    df.loc[:, 'slope'] = df.loc[:, 'slope'].abs().values
    x, y = df.loc[:, 'slope'], df.loc[:, 'ridge_intensity']

    # from scipy import stats
    # slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    #plt.plot(x, intercept + slope*x, 'r', label='fitted ridge_intensity')
    import statsmodels.api as sm
    X = sm.add_constant(x, prepend=False)
    results = sm.OLS(y, X).fit()
    print(results.summary())
    print(results.mse_resid, results.mse_total)
    print(results.params, type(results.params))
    k, b = results.params.loc['slope'], results.params.loc['const']
    
    from statsmodels.sandbox.regression.predstd import wls_prediction_std
    pred_std, predict_ci_low, predict_ci_upp = wls_prediction_std(results)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(x, y, 'o', label='original ridge_intensity v.s. slope')
    y_pred = results.predict()
    ax.plot(x, y_pred, 'r', label='predicted ridge_intensity={:.3f}slope+{:.3f}, $R^2={:.3f}$'.format(k, b, results.rsquared))
    plt.plot(x, predict_ci_low, 'b--', lw=1, label='predict lower')
    plt.plot(x, predict_ci_upp, 'g--', lw=1, label='predict upper')
    
    df.loc[:, 'predict_ci_low'] = predict_ci_low
    df.loc[:, 'predict_ci_upp'] = predict_ci_upp
    
    #ax.set_xlim([0, x.max()*1.1])
    plt.legend()
    plt.show()
    
    flt = (df.ridge_intensity>=df.predict_ci_low)& (df.ridge_intensity<=df.predict_ci_upp)
    
    fig = plt.figure(2)
    ax = fig.add_subplot(111)
    print(df.columns)
    ax.plot(df.loc[flt ,'offsetx'], 1024-1-df.loc[flt, 'offsety'], 'b.', markersize=3, label='ridge_intensity In prediction range')
    ax.plot(df.loc[~flt ,'offsetx'], 1024-1-df.loc[~flt, 'offsety'], 'r.', markersize=3, label='ridge_intensity Out prediction range')
    plt.legend()
    plt.show()
    #resid=y-y_pred
    #rss=np.sum(resid**2)
    #MSE=np.sqrt(rss/(result.nobs-2))
    
    def ols_quantile(m, X, q):
      # m: Statsmodels OLS model.
      # X: X matrix of data to predict.
      # q: Quantile.
      #
      from scipy.stats import norm
      mean_pred = m.predict(X)
      se = np.sqrt(m.scale)
      return mean_pred + norm.ppf(q) * se
    
    print(ols_quantile(results, X, 0.5))

    
plot_reg(ca.df)

Using matplotlib backend: Qt4Agg
                            OLS Regression Results                            
Dep. Variable:        ridge_intensity   R-squared:                       0.966
Model:                            OLS   Adj. R-squared:                  0.966
Method:                 Least Squares   F-statistic:                 2.318e+05
Date:                Wed, 19 Sep 2018   Prob (F-statistic):               0.00
Time:                        18:51:56   Log-Likelihood:                 45492.
No. Observations:                8187   AIC:                        -9.098e+04
Df Residuals:                    8185   BIC:                        -9.097e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
slope          1.98

In [49]:
%matplotlib auto
def plot_reg2(df):
    colstr = 'slope  ridge_intensity'
    cols = colstr.split()
    df = df[cols]
    df.loc[:, 'slope'] = df.loc[:, 'slope'].abs().values
    x, y = df.loc[:, 'slope'], df.loc[:, 'ridge_intensity']

    from scipy import stats
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(x, y, 'o', label='original '+colstr)
    ax.plot(x, intercept + slope*x, 'r', label='ridge_intensity={:.2f}slope+{:.2f}'.format(slope, intercept))
    ax.set_xlim([0, x.max()*1.1])
    ax.set_ylabel(r"ridge_intensity")
    ax.set_xlabel("slope")
    plt.legend()
    plt.show()
plot_reg2(ca.df)

Using matplotlib backend: Qt4Agg


In [3]:
CWD

'\\\\devshare-brion.asml.com\\cnfs-WW\\BD\\MXP\\SHARED\\SEM_IMAGE\\IMEC\\Case02_calaveras_v3\\3Tmp\\CT_KPI_test\\Calaveras_v3_regular_CT_KPI_003_slope_modified_copy\\h\\cache\\dummydb\\result\\MXP\\job1\\ContourExtraction400result1'

In [6]:
import os
from subprocess import call
datapath  = r'D:\code\Python\apps\MXP\ContourSelect\samplejob\h\data\dummydb\MXP\job1'
resultpath =r'D:\code\Python\apps\MXP\ContourSelect\samplejob\h\cache\dummydb\result\MXP\job1'
for item in os.listdir(datapath):
    try:
        os.symlink(os.path.join(datapath, item), os.path.join(resultpath, item))
    except OSError:
        call('ln -s {} {}'.format(os.path.join(datapath, item), 
            os.path.join(resultpath, item)), shell=True)

In [8]:
import re
re.sub(r'./', '/', './test./key')[1:]
sys.version

'2.7.12 |Anaconda custom (64-bit)| (default, Jun 29 2016, 11:07:13) [MSC v.1500 64 bit (AMD64)]'

In [9]:
'test@2/key@1/value@1'.split('/')

['test@2', 'key@1', 'value@1']

In [13]:
print('test'.split('@'))
print('test@2'.split('@'))
print(['0'].append(1))

['test']
['test', '2']
None


Data Structure

A parent Node: (key, [])
A Leave Node: (key, value)

Example:

1. 
('test/value': 213.0, 'test/value@1': 212.0, 'test@1/value': 211.0, 'test@2/value': 210.0)
[(test, [(value, 213), (value, 212)]), (test, [(value, 211)], (test, [(value, 210)])
 
2. 
test/options/enable   test/value  test/value@1      test@2/key/option  test@2/value  
1-2000     213.0         212.0  revive_bug=Ticket111         210.0


Paths-indice, value
([(test, 0), (options, 0), (enable, 0)], 1-2000)
([(test, 0), (value, 0) ], 213.0)
([(test, 0), (value, 1) ], 212.0)
([(test, 2), (key, 0), (option, 0) ], revive_bug=Ticket111)
([(test, 2), (value, 0)], 210)


(test, [(options, [(enable, )])])


 
 