add gridsearch and random search

llinjupt · Apr 19, 2019 · 03056b5 · 03056b5
1 parent d6d8101
commit 03056b5
Show file tree

Hide file tree

Showing 8 changed files with 1,016 additions and 576 deletions.
diff --git a/footstone/crossvalid.py b/footstone/crossvalid.py
@@ -71,6 +71,13 @@ def data_split(X, y, ratio=0.3, random_state=0):
     # 'X_train, y_test, x_labels, y_labels = '
     return train_test_split(X, y, test_size=ratio, random_state=random_state)
 
+# extend style as [x1,x2] to [1, x1, x2, x2x1, x1^2, x2^2]
+def data_extend_feature(X, degree=2, interaction_only=False, bias=True):
+    from sklearn.preprocessing import PolynomialFeatures
+    poly = PolynomialFeatures(degree=degree, interaction_only=interaction_only,
+                              include_bias=bias)
+    return poly.fit_transform(X)
+
 if __name__ == "__main__":
     X,y = normal_dis_trainset(3, 3)
 

diff --git a/footstone/dbload.py b/footstone/dbload.py
@@ -76,16 +76,19 @@ def load_mnist_vector(count=100, test=100):
     X_train, X_labels = load_mnist(r"./db/mnist", kind='train', count=count)
     X_train = X_train.reshape(X_train.shape[0], X_train.shape[1] ** 2)
 
+    mean = np.mean(X_train, axis=0)
+    std = np.std(X_train, axis=0)
+    std[std == 0] = 1e-25
+
     ds = scaler.DataScaler(X_train)
     X_train = ds.sklearn_standard(X_train)
 
-    y_train, y_labels = load_mnist(r"./db/mnist", kind='t10k', count=test)
-    y_train = y_train.reshape(y_train.shape[0], y_train.shape[1] ** 2)
-
-    ds = scaler.DataScaler(y_train)
-    y_train = ds.sklearn_standard(y_train)
+    y_test, y_labels = load_mnist(r"./db/mnist", kind='t10k', count=test)
+    y_test = y_test.reshape(y_test.shape[0], y_test.shape[1] ** 2)
+    # Note: must use X_train mean and std standard testset
+    y_test = (y_test - mean)/std 
 
-    return X_train, X_labels, y_train, y_labels
+    return X_train, X_labels, y_test, y_labels
 
 def __load_kaggele_mnist(fname, labeled=True, count=-1):
     ''' Load Kaggle Mnist From csv file
@@ -196,8 +199,8 @@ def load_bmi_dataset(random_state=None, standard=True):
         X,y = scaler.shuffle(X, y)
 
     if not standard: return X,y
-    else: return scaler.standard(X), scaler.standard(y)
-    
+    else: return scaler.standard(X), y
+
 # generate noraml distribution train set
 def load_nd_dataset(positive=100, negtive=100, type='normal'):
     np.random.seed(3)