In [None]:
# Utilities
import numpy as np
import pandas as pd

# ML
#import sklearn
#sklearn.__version__# use this when reopening previously saved ML models
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error, mean_absolute_error
    #pprint allows us to see current hyperparameter values of our model 
from pprint import pprint
# import data
memData = pd.read_csv("vminCkbNEW.csv")

#for i in memData.columns:
#    print(memData[i].unique())

# drop file index for now, maybe used later to include data from the wafer
memData.drop(["File Index", "Test Number", "??", "VDDPE (Range)", "VDDCE (Range)", "DVDD (Range)", "Period (Range)", "Value", "Number of Failed Pins", "Failed Pins"], axis = 1, inplace = True)
memData.dropna(subset=['Shmoo Value'], inplace=True)
memData['EMA#1'] = memData['EMA#1'].map(lambda x: x.lstrip('A'))
memData['EMA#2'] = memData['EMA#2'].map(lambda x: x.lstrip('B'))
memData['EMAW'] = memData['EMAW'].map(lambda x: x.lstrip('A'))
memData['EMAS'] = memData['EMAS'].map(lambda x: x.lstrip('A'))
memData["Shmoo Value"] = memData["Shmoo Value"].astype('float')
columns = ['EMA#1', 'EMA#2', 'EMAW', 'EMAS', 'EMAP', 'WABL', 'WABLM', 'RAWL', 'RAWLM', 'KEN']
for c in columns:
    memData[c] = memData[c].astype('int')
memData['Chip Temp'] = pd.to_numeric(memData['Chip Temp'], errors='coerce').fillna(-40.0).astype(float)
memData['Chip Temp'] = memData['Chip Temp'].map(lambda x: x/150.0)
memData['Architecture'] = memData['Architecture'].map(lambda x: re.search('[A-Za-z0-9]+', x).group(0))
memData = pd.get_dummies(memData, columns=["Architecture", "Chip Type", "A/S", "EMA#1", "EMA#2", "EMAW", "EMAS", "EMAP", "WABL", "WABLM", "RAWL", "RAWLM", "KEN"])
memData.drop(["Arch. Type", "WABL_99", "WABLM_99", "RAWLM_99", "RAWL_99", "KEN_99", "EMA#2_99", "EMAS_99", "EMAP_99", "EMAW_99"], axis = 1, inplace = True)
memData.head()


In [None]:
string = 'sc7p5mcpp96p_sfk_lvt_c16'
re.split('_', string)

In [None]:
# create training and testing sets
X = memData.copy()
X = shuffle(X)
y = X["Shmoo Value"]
X.drop(["Shmoo Value"], axis = 1, inplace = True)

X_train = X[:144931]
y_train = y[:144931]
X_test = X[-16103:]
y_test = y[-16103:]

In [None]:
# randomized search for hyperparameter optimization
rf = RandomForestRegressor()
pprint(rf.get_params())

# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
pprint(random_grid)

# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestRegressor(random_state=42)
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42)
# Fit the random search model
rf_random.fit(X_train, y_train)
rf_random.best_params_


In [None]:
#comparison of base model and randomsearch result
base_model = RandomForestRegressor(random_state = 42)
base_model.fit(X_train, y_train)
y_predicted_base = base_model.predict(X_test)
from sklearn.metrics import mean_squared_error
print(mean_squared_error(y_test, y_predicted_base))

best_random = rf_random.best_estimator_
y_predicted_random = best_random.predict(X_test)
print(mean_squared_error(y_test, y_predicted_random))

for i in range(5):
    print("For the base model:")
    print("The test label is " + str(y_test.iloc[i]) + " and the predicted value is " + str(y_predicted_base[i]))
    print("For the randomized search optimal model:")
    print("The test label is still " + str(y_test.iloc[i]) + " and the predicted value is " + str(y_predicted_random[i]))



In [None]:
# this is just some bs, forget about it

param_grid = {
    'bootstrap': [True],
    'max_depth': [None],
    'max_features': ['auto'],
    'min_samples_leaf': [3, 4],
    'min_samples_split': [9,10],
    'n_estimators': [1600]
}

rfGrid = RandomForestRegressor()
grid_search = GridSearchCV(estimator = rfGrid, param_grid = param_grid, 
                          cv = 3, verbose = 2)

grid_search.fit(X_train, y_train)
grid_search.best_params_


In [None]:
print(feature_importances)
ax = feature_importances.plot.bar(rot=65)
plt.show()
plt.scatter(y_test, y_predicted_grid,  color='green', alpha=0.03)
plt.xlim([0.2,0.7])
plt.ylim([0.2,0.7])
plt.plot([0.2,0.7],[0.2,0.7], color='black')
plt.xlabel("Test Vmin")
plt.ylabel("Predicted Vmin")
plt.title("Memory Cell Vmin Prediction Error")
plt.show()

print(mean_absolute_error(y_test, y_predicted_grid))
print(mean_absolute_error(y_test, y_predicted_grid)/y_test.mean())

from joblib import dump, load
dump(best_grid, 'RFVminMem.joblib')

In [None]:
# Utilities
import numpy as np
import pandas as pd

# ML
#import sklearn
#sklearn.__version__# use this when reopening previously saved ML models
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error, mean_absolute_error
    #pprint allows us to see current hyperparameter values of our model 
from pprint import pprint

a = pd.read_csv("vminCkbNEW.csv")
a.head()

In [None]:
a.columns

In [None]:

#a['Architecture'] = a['Architecture'].map(lambda x: re.search('[A-Za-z0-9]+', x).group(0))
a['Architecture'].unique()