In [None]:
# Utilities
import numpy as np
import pandas as pd

# ML
#import sklearn
#sklearn.__version__# use this when reopening previously saved ML models
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error, mean_absolute_error
    #pprint allows us to see current hyperparameter values of our model 
from pprint import pprint
# import data
memData = pd.read_csv("vminCkbNEW.csv")

#for i in memData.columns:
#    print(memData[i].unique())

# drop file index for now, maybe used later to include data from the wafer
memData.drop(["Arch. Type", "A/S", "RAWLM", "RAWL", "WABLM", "WABL", "EMAP", "EMAS", "EMAW", "EMA#2", "File Index", "Test Number", "??", "VDDPE (Range)", "VDDCE (Range)", "DVDD (Range)", "Period (Range)", "Value", "Number of Failed Pins", "Failed Pins"], axis = 1, inplace = True)
memData.dropna(subset=['Shmoo Value'], inplace=True)
memData['EMA#1'] = memData['EMA#1'].map(lambda x: x.lstrip('A'))
memData["Shmoo Value"] = memData["Shmoo Value"].astype('float')
columns = ['EMA#1', 'KEN']
for c in columns:
    memData[c] = memData[c].astype('int')
memData['Chip Temp'] = pd.to_numeric(memData['Chip Temp'], errors='coerce').fillna(-40.0).astype(float)
memData['Chip Temp'] = memData['Chip Temp'].map(lambda x: x/150.0)
memData['Architecture'] = memData['Architecture'].map(lambda x: re.search('[A-Za-z0-9]+', x).group(0))
memData = pd.get_dummies(memData, columns=["Architecture", "Chip Type", "EMA#1", "KEN"])
memData.head()


In [None]:
# create training and testing sets
X = memData.copy()
X = shuffle(X)
y = X["Shmoo Value"]
X.drop(["Shmoo Value"], axis = 1, inplace = True)

X_train = X[:146514]
y_train = y[:146514]
X_test = X[-14490:]
y_test = y[-14490:]


In [None]:
# Number of trees in random forest
n_estimators = [300, 500]
# Number of features to consider at every split
max_features = ['auto']
# Maximum number of levels in tree
max_depth = [None]
# Minimum number of samples required to split a node
min_samples_split = [5,6]
# Minimum number of samples required at each leaf node
min_samples_leaf = [2,3]
# Method of selecting samples for training each tree
bootstrap = [True]

# Create the random grid
param_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
pprint(param_grid)


rfGrid = RandomForestRegressor()
grid_search = GridSearchCV(estimator = rfGrid, param_grid = param_grid, 
                          cv = 3, verbose = 50, n_jobs=-1)

grid_search.fit(X_train, y_train)
grid_search.best_params_


In [None]:
from joblib import load

In [None]:
best_grid = grid_search.best_estimator_
y_predicted_grid = best_grid.predict(X_test)
feature_importances = pd.DataFrame(best_grid.feature_importances_,
                                   index = X_train.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)


In [None]:
print(feature_importances)
ax = feature_importances.plot.bar(rot=65)
plt.title("Feature importance in Random forest model.")
plt.show()
plt.scatter(y_test, y_predicted_grid,  color='green', alpha=0.03)
plt.xlim([0.2,0.7])
plt.ylim([0.2,0.7])
plt.plot([0.2,0.7],[0.2,0.7], color='black')
plt.xlabel("Test Vmin")
plt.ylabel("Predicted Vmin")
plt.title("Memory Cell Vmin Prediction Error")
plt.show()

print(mean_absolute_error(y_test, y_predicted_grid))
print(mean_absolute_error(y_test, y_predicted_grid)/y_test.mean())

from joblib import dump, load
dump(best_grid, 'RFVminMemShort.joblib')

In [None]:
from joblib import load
best_grid = load('RFVminMemShort.joblib')
y_predicted_grid = best_grid.predict(X_test)

feature_importances = pd.DataFrame(best_grid.feature_importances_,
                                   index = X_train.columns,
                                    columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances)
ax = feature_importances.plot.bar(rot=65)
plt.title("Feature importance in Random forest model.")
plt.show()
plt.scatter(y_test, y_predicted_grid,  color='green', alpha=0.03)
plt.xlim([0.2,0.7])
plt.ylim([0.2,0.7])
plt.plot([0.2,0.7],[0.2,0.7], color='black')
plt.xlabel("Test Vmin")
plt.ylabel("Predicted Vmin")
plt.title("Memory Cell Vmin Prediction Error")
plt.show()
