/
Generate_Data.py
118 lines (95 loc) · 3.44 KB
/
Generate_Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import pandas as pd
import os
import errno
def my_relu(x):
return x*(x>0)
#-----------------Regression Data------------------------_#
a = 1
b = 1
for my_seed in range(1,11):
np.random.seed(my_seed)
TotalP = 2000
print('p = ', TotalP)
NTrain = 10000
x_train = np.matrix(np.zeros([NTrain, TotalP]))
y_train = np.matrix(np.zeros([NTrain, 1]))
sigma = 1.0
for i in range(NTrain):
if i%1000 == 0:
print("x_train generate = ", i)
ee = np.sqrt(sigma) * np.random.normal(0, 1)
while ee > 10 or ee < -10:
ee = np.sqrt(sigma) * np.random.normal(0, 1)
for j in range(TotalP):
zj = np.sqrt(sigma) * np.random.normal(0, 1)
while zj > 10 or zj < -10:
zj = np.sqrt(sigma) * np.random.normal(0, 1)
x_train[i, j] = (a*ee + b*zj) / np.sqrt(a*a+b*b)
x0 = x_train[i, 0]
x1 = x_train[i, 1]
x2 = x_train[i, 2]
x3 = x_train[i, 3]
x4 = x_train[i, 4]
y_train[i, 0] = 5 * x1 / (1 + x0 * x0) + 5 * np.sin(x2 * x3) + 2 * x4 + np.random.normal(0, 1)
Nval = 1000
x_val = np.matrix(np.zeros([Nval, TotalP]))
y_val = np.matrix(np.zeros([Nval, 1]))
sigma = 1.0
for i in range(Nval):
ee = np.sqrt(sigma) * np.random.normal(0, 1)
while ee > 10 or ee < -10:
ee = np.sqrt(sigma) * np.random.normal(0, 1)
for j in range(TotalP):
zj = np.sqrt(sigma) * np.random.normal(0, 1)
while zj > 10 or zj < -10:
zj = np.sqrt(sigma) * np.random.normal(0, 1)
x_val[i, j] = (a*ee + b*zj) / np.sqrt(a*a+b*b)
x0 = x_val[i, 0]
x1 = x_val[i, 1]
x2 = x_val[i, 2]
x3 = x_val[i, 3]
x4 = x_val[i, 4]
y_val[i, 0] = 5 * x1 / (1 + x0 * x0) + 5 * np.sin(x2 * x3) + 2 * x4 + np.random.normal(0, 1)
NTest = 1000
x_test = np.matrix(np.zeros([NTest, TotalP]))
y_test = np.matrix(np.zeros([NTest, 1]))
for i in range(NTest):
ee = np.sqrt(sigma) * np.random.normal(0, 1)
while ee > 10 or ee < -10:
ee = np.sqrt(sigma) * np.random.normal(0, 1)
for j in range(TotalP):
zj = np.sqrt(sigma) * np.random.normal(0, 1)
while zj > 10 or zj < -10:
zj = np.sqrt(sigma) * np.random.normal(0, 1)
x_test[i, j] = (a*ee + b*zj) / np.sqrt(a*a+b*b)
x0 = x_test[i, 0]
x1 = x_test[i, 1]
x2 = x_test[i, 2]
x3 = x_test[i, 3]
x4 = x_test[i, 4]
y_test[i, 0] = 5 * x1 / (1 + x0 * x0) + 5 * np.sin(x2 * x3) + 2 * x4 + np.random.normal(0, 1)
x_train_df = pd.DataFrame(x_train)
y_train_df = pd.DataFrame(y_train)
x_val_df = pd.DataFrame(x_val)
y_val_df = pd.DataFrame(y_val)
x_test_df = pd.DataFrame(x_test)
y_test_df = pd.DataFrame(y_test)
PATH = './data/' + str(my_seed) + "/"
if not os.path.isdir(PATH):
try:
os.makedirs(PATH)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(PATH):
pass
else:
raise
print("write train")
x_train_df.to_csv(PATH + "x_train.csv")
y_train_df.to_csv(PATH + "y_train.csv")
print("write val")
x_val_df.to_csv(PATH + "x_val.csv")
y_val_df.to_csv(PATH + "y_val.csv")
print('write test')
x_test_df.to_csv(PATH + "x_test.csv")
y_test_df.to_csv(PATH + "y_test.csv")