-
Notifications
You must be signed in to change notification settings - Fork 13
/
sphynxdisk.go
183 lines (174 loc) · 4.98 KB
/
sphynxdisk.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
package main
import (
"bufio"
"context"
"fmt"
"github.com/apache/arrow/go/arrow/ipc"
"github.com/apache/arrow/go/arrow/memory"
pb "github.com/lynxkite/lynxkite/sphynx/proto"
"io/ioutil"
"log"
"os"
"strings"
)
var arrowAllocator = memory.NewGoAllocator()
func createEntity(typeName string) (Entity, error) {
switch typeName {
case "VertexSet":
return &VertexSet{}, nil
case "EdgeBundle":
return &EdgeBundle{}, nil
case "Scalar":
return &Scalar{}, nil
case "DoubleAttribute":
return &DoubleAttribute{}, nil
case "StringAttribute":
return &StringAttribute{}, nil
case "DoubleVectorAttribute":
return &DoubleVectorAttribute{}, nil
case "LongAttribute":
return &LongAttribute{}, nil
case "LongVectorAttribute":
return &LongVectorAttribute{}, nil
default:
return nil, fmt.Errorf("Unknown entity to load: %v", typeName)
}
}
func saveToOrderedDisk(e Entity, dataDir string, guid GUID) error {
typeName := e.typeName()
dirName := fmt.Sprintf("%v/%v", dataDir, guid)
_ = os.Mkdir(dirName, 0775)
typeFName := fmt.Sprintf("%v/type_name", dirName)
typeFile, err := os.Create(typeFName)
if err != nil {
return err
}
tfw := bufio.NewWriter(typeFile)
if _, err := tfw.WriteString(string(typeName)); err != nil {
return fmt.Errorf("Failed to create type file: %v", err)
}
tfw.Flush()
switch e := e.(type) {
case TabularEntity:
onDisk, err := hasOnDisk(dataDir, guid)
if err != nil {
return err
}
if onDisk {
log.Printf("guid %v is already on disk", guid)
return nil
}
fname := fmt.Sprintf("%v/data.arrow", dirName)
successFile := fmt.Sprintf("%v/_SUCCESS", dirName)
f, err := os.Create(fname)
if err != nil {
return fmt.Errorf("Failed to create file: %v", err)
}
rec := e.toOrderedRows()
w, err := ipc.NewFileWriter(
f, ipc.WithSchema(rec.Schema()), ipc.WithAllocator(arrowAllocator))
if err != nil {
return fmt.Errorf("Failed to create Arrow writer: %v", err)
}
if err = w.Write(rec); err != nil {
return fmt.Errorf("Failed to write Arrow file: %v", err)
}
if err = w.Close(); err != nil {
return fmt.Errorf("Failed to write Arrow file: %v", err)
}
if err = f.Close(); err != nil {
return fmt.Errorf("Failed to write Arrow file: %v", err)
}
err = ioutil.WriteFile(successFile, nil, 0775)
if err != nil {
return fmt.Errorf("Failed to write success file: %v", err)
}
return nil
case *Scalar:
return e.write(dirName)
default:
return fmt.Errorf("Can't write entity with GUID %v to Ordered Sphynx Disk.", guid)
}
}
func loadFromOrderedDisk(dataDir string, guid GUID) (Entity, error) {
dirName := fmt.Sprintf("%v/%v", dataDir, guid)
typeFName := fmt.Sprintf("%v/type_name", dirName)
typeData, err := ioutil.ReadFile(typeFName)
if err != nil {
return nil, fmt.Errorf("Failed to read type of %v: %v", dirName, err)
}
// It may or may not have a newline.
typeName := strings.ReplaceAll(string(typeData), "\n", "")
log.Printf("Reading %v %v from ordered disk.", typeName, guid)
e, err := createEntity(typeName)
if err != nil {
return nil, err
}
switch e := e.(type) {
case TabularEntity:
const numGoRoutines int64 = 4
fname := fmt.Sprintf("%v/data.arrow", dirName)
onDisk, err := hasOnDisk(dataDir, guid)
if err != nil {
return nil, err
}
if !onDisk {
return nil, fmt.Errorf("Path is not present: %v", dirName)
}
f, err := os.Open(fname)
if err != nil {
return nil, err
}
defer f.Close()
r, err := ipc.NewFileReader(f, ipc.WithAllocator(arrowAllocator))
if err != nil {
return nil, fmt.Errorf("Failed to open %v: %v", dirName, err)
}
defer r.Close()
// Arrow files can have multiple records. We user zero records for empty
// entities or one record. We never use more than one record.
if r.NumRecords() == 1 {
rec, err := r.Record(0)
if err != nil {
return nil, fmt.Errorf("Failed to read %v: %v", dirName, err)
}
defer rec.Release()
if err = e.readFromOrdered(rec); err != nil {
return nil, fmt.Errorf("Could not read %v: %v", dirName, err)
}
} else if r.NumRecords() > 1 {
return nil, fmt.Errorf("%v has %v records, expected 1.", dirName, r.NumRecords())
}
case *Scalar:
*e, err = readScalar(dirName)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("Failed to read entity with GUID %v from Ordered Sphynx Disk.", guid)
}
return e, nil
}
func (s *Server) WriteToOrderedDisk(
ctx context.Context, in *pb.WriteToOrderedDiskRequest) (*pb.WriteToOrderedDiskReply, error) {
guid := GUID(in.Guid)
e, exists := s.entityCache.Get(guid)
if !exists {
return nil, NotInCacheError("entity", guid)
}
if err := saveToOrderedDisk(e, s.dataDir, guid); err != nil {
return nil, fmt.Errorf("failed to write %v to ordered disk: %v", guid, err)
}
return &pb.WriteToOrderedDiskReply{}, nil
}
func hasOnDisk(dataDir string, guid GUID) (bool, error) {
filename := fmt.Sprintf("%v/%v/_SUCCESS", dataDir, guid)
_, err := os.Stat(filename)
if err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
return true, nil
}