/
nn_coreml_ios.mm
186 lines (143 loc) · 6.76 KB
/
nn_coreml_ios.mm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#include "../../YaneuraOu/source/eval/deep/nn_coreml.h"
#if defined(YANEURAOU_ENGINE_DEEP) && defined(COREML)
//#include "dlshogi_types.h"
#import <Foundation/Foundation.h>
#import <CoreML/CoreML.h>
#include "../../YaneuraOu/source/usi.h"
using namespace std;
using namespace Tools;
extern std::string modelc_url_cache;
extern int coreml_compute_units_cache;
/// Model Prediction Input Type
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface DlShogiResnetInput : NSObject<MLFeatureProvider>
/// input as 1 × 119 × 9 × 9 4-dimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * input;
- (instancetype)init NS_UNAVAILABLE;
- (instancetype)initWithInput:(MLMultiArray *)input NS_DESIGNATED_INITIALIZER;
@end
/// Model Prediction Output Type
API_AVAILABLE(macos(10.15), ios(13.0), watchos(6.0), tvos(13.0)) __attribute__((visibility("hidden")))
@interface DlShogiResnetOutput : NSObject<MLFeatureProvider>
/// output_policy as multidimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * output_policy;
/// output_value as multidimensional array of floats
@property (readwrite, nonatomic, strong) MLMultiArray * output_value;
- (instancetype)init NS_UNAVAILABLE;
- (instancetype)initWithOutput_policy:(MLMultiArray *)output_policy output_value:(MLMultiArray *)output_value NS_DESIGNATED_INITIALIZER;
@end
@implementation DlShogiResnetInput
- (instancetype)initWithInput:(MLMultiArray *)input {
self = [super init];
if (self) {
_input = input;
}
return self;
}
- (NSSet<NSString *> *)featureNames {
return [NSSet setWithArray:@[@"input"]];
}
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
if ([featureName isEqualToString:@"input"]) {
return [MLFeatureValue featureValueWithMultiArray:_input];
}
return nil;
}
@end
@implementation DlShogiResnetOutput
- (instancetype)initWithOutput_policy:(MLMultiArray *)output_policy output_value:(MLMultiArray *)output_value {
self = [super init];
if (self) {
_output_policy = output_policy;
_output_value = output_value;
}
return self;
}
- (NSSet<NSString *> *)featureNames {
return [NSSet setWithArray:@[@"output_policy", @"output_value"]];
}
- (nullable MLFeatureValue *)featureValueForName:(NSString *)featureName {
if ([featureName isEqualToString:@"output_policy"]) {
return [MLFeatureValue featureValueWithMultiArray:_output_policy];
}
if ([featureName isEqualToString:@"output_value"]) {
return [MLFeatureValue featureValueWithMultiArray:_output_value];
}
return nil;
}
@end
namespace Eval::dlshogi
{
// モデルファイルの読み込み。
Result NNCoreML::load(const std::string& model_filename , int gpu_id , int batch_size)
{
fixed_batch_size = batch_size;
MLModelConfiguration* config = [MLModelConfiguration new];
// 使用デバイス
// MLComputeUnitsCPUOnly = 0,
// MLComputeUnitsCPUAndGPU = 1,
// MLComputeUnitsAll = 2
// Allで損をする事例は見つかっていないが、選べるようにすることも考えられる。
config.computeUnits = (MLComputeUnits)coreml_compute_units_cache;
NSError *error = nil;
MLModel *model = [MLModel modelWithContentsOfURL:[NSURL URLWithString:[NSString stringWithUTF8String:modelc_url_cache.c_str()]] configuration:config error:&error];
if (!model) {
sync_cout << [[NSString stringWithFormat:@"info string Failed to load model, %@", error] UTF8String] << sync_endl;
Tools::exit();
}
if (![model init]) {
sync_cout << "info string Failed to initialize model" << sync_endl;
Tools::exit();
}
// 所有権をARCからプログラマに移す
this->model = (void*)CFBridgingRetain(model);
input_buf = new DType[(sizeof(NN_Input1) + sizeof(NN_Input2)) / sizeof(DType) * batch_size];
return ResultCode::Ok;
}
// 使用可能なデバイス数を取得する。
int NNCoreML::get_device_count()
{
// eGPUの場合は複数個もありうる?
return 1;
}
// NNによる推論
void NNCoreML::forward(const int batch_size, PType* p1, PType* p2, NN_Input1* x1, NN_Input2* x2, NN_Output_Policy* y1, NN_Output_Value* y2)
{
if (batch_size > fixed_batch_size) {
sync_cout << "info string batch_size > fixed_batch_size" << sync_endl;
Tools::exit();
}
@autoreleasepool { // Core ML内部で確保されるバッファの解放に必要
NSError *error = nil;
// 所有権を移さない(プログラマのまま)
MLModel* model = (__bridge MLModel*)(this->model);
// x1: [batch_size, 62 (MAX_FEATURES1_NUM * COLOR_NB), 9, 9], x2: [batch_size, 57 (MAX_FEATURES2_NUM), 9, 9]として与えられたものを、[batch_size, 119, 9, 9]に詰め替える
// fixed_batch_sizeに関わらず、意味のある部分だけ更新
for (int i = 0; i < batch_size; i++) {
memcpy(&input_buf[(sizeof(NN_Input1) + sizeof(NN_Input2)) / sizeof(DType) * i], &x1[i], sizeof(NN_Input1));
memcpy(&input_buf[(sizeof(NN_Input1) + sizeof(NN_Input2)) / sizeof(DType) * i + sizeof(NN_Input1) / sizeof(DType)], &x2[i], sizeof(NN_Input2));
}
MLMultiArray *model_input = [[MLMultiArray alloc] initWithDataPointer:input_buf shape:@[[NSNumber numberWithInt:fixed_batch_size], @((size_t)COLOR_NB * MAX_FEATURES1_NUM + MAX_FEATURES2_NUM), @9, @9] dataType:MLMultiArrayDataTypeFloat32 strides:@[@(((size_t)COLOR_NB * MAX_FEATURES1_NUM + MAX_FEATURES2_NUM) * 9 * 9), @(9 * 9), @9, @1] deallocator:NULL error:&error];
if (error) {
sync_cout << [[NSString stringWithFormat:@"info string CoreML inference array allocation failed, %@", error] UTF8String] << sync_endl;
Tools::exit();
}
DlShogiResnetInput *input_ = [[DlShogiResnetInput alloc] initWithInput:model_input];
id<MLFeatureProvider> out_features = [model predictionFromFeatures:input_ options:[[MLPredictionOptions alloc] init] error:&error];
if (error) {
sync_cout << [[NSString stringWithFormat:@"info string CoreML inference failed, %@", error] UTF8String] << sync_endl;
Tools::exit();
}
DlShogiResnetOutput *model_output = [[DlShogiResnetOutput alloc] initWithOutput_policy:(MLMultiArray *)[out_features featureValueForName:@"output_policy"].multiArrayValue output_value:(MLMultiArray *)[out_features featureValueForName:@"output_value"].multiArrayValue];
// 出力は動的確保された領域に書き出されるため、これを引数で指定されたバッファにコピー
memcpy(y1, model_output.output_policy.dataPointer, batch_size * MAX_MOVE_LABEL_NUM * (size_t)SQ_NB * sizeof(DType));
memcpy(y2, model_output.output_value.dataPointer, batch_size * sizeof(DType));
}
}
NNCoreML::~NNCoreML() {
// 所有権をARCに返す
MLModel *model = CFBridgingRelease(this->model);
// スコープを外れるので解放される
}
} // namespace Eval::dlshogi
#endif // defined(YANEURAOU_ENGINE_DEEP) && defined(COREML)