-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
315 lines (260 loc) · 7.47 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
package main
import (
"compress/gzip"
"database/sql"
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"sync"
"code.gitea.io/gitea/modules/emoji"
_ "github.com/mattn/go-sqlite3"
)
var db *sql.DB
var fileSaveWaitGroup sync.WaitGroup
const JSON_PAGINATION_PAGE_SIZE = 500
const MINIMUM_REPOSITORY_STARGAZERS = 5
type Record map[string]any
type closable interface {
Close() error
}
func closeOrPanic(toClose closable) {
err := toClose.Close()
if err != nil {
log.Fatalln("Could not call .Close(): ", err)
}
}
func programmingLanguages() []string {
// Query to retrieve unique languages
// This query is done on "Repo" instead of "ActiveRepo" to be sure to override the result json file for every
// programming language we've ever came across.
query := "SELECT DISTINCT Language FROM Repo"
// Execute the query
rows, err := db.Query(query)
if err != nil {
log.Fatalln("Error executing query:", err)
}
defer closeOrPanic(rows)
// Slice to store the unique languages
var languages []string
// Iterate over the query results
for rows.Next() {
var language string
if err := rows.Scan(&language); err != nil {
log.Fatalln("Error scanning row:", err)
}
languages = append(languages, language)
}
return languages
}
// createView creates a view of repos with fields actually used on the frontend.
// additionally, the repos have to have been found at least 15 search cycles back
// - this prevents displaying deleted repositories.
func createActiveRepoView() {
// Have to use sprintf here, because as sqlite points out: "parameters are not allowed in views"
_, err := db.Exec(fmt.Sprintf(`
begin transaction;
drop view if exists ActiveRepo;
create view ActiveRepo as
select
Id,
Archived,
CreatedAt,
Description,
GithubLink,
Homepage,
Language,
LicenseSpdxId,
LicenseName,
Name,
OwnerAvatarUrl,
OwnerLogin,
RepoPushedAt,
RepoUpdatedAt,
Stargazers
from Repo
where Repo.Stargazers >= %d;
end;
`, MINIMUM_REPOSITORY_STARGAZERS))
if err != nil {
log.Fatalln("Could not create the ActiveRepo view:", err)
}
}
func createIndices() {
log.Print("Creating index on Repo(Language, Stargazers, Id, NotSeenSinceCounter)... ")
_, err := db.Exec(`
create index if not exists LanguageStargazersId on Repo(Language, Stargazers DESC, Id, NotSeenSinceCounter);
`)
if err != nil {
log.Fatalln("\nCould not create index LanguageStargazers:", err)
}
log.Println("done")
log.Print("Creating index on Repo(Stargazers, Id, NotSeenSinceCounter)... ")
_, err = db.Exec(`
create index if not exists StargazersId on Repo(Stargazers DESC, Id, NotSeenSinceCounter);
`)
if err != nil {
log.Fatalln("\nCould not create index Stargazers:", err)
}
log.Println("done")
}
func dropIndices() {
log.Print("Dropping index on Repo(Language, Stargazers, Id)... ")
_, err := db.Exec(`drop index LanguageStargazersId;`)
if err != nil {
log.Fatalln("\nCould not drop index LanguageStargazersId", err)
}
log.Println("done")
log.Print("Dropping index on Repo(Stargazers, Id)... ")
_, err = db.Exec(`drop index StargazersId;`)
if err != nil {
log.Fatalln("\nCould not drop index StargazersId", err)
}
log.Println("done")
}
func escapeLanguageName(name string) string {
name = strings.ReplaceAll(name, "/", "-")
name = strings.ReplaceAll(name, " ", "-")
name = strings.ReplaceAll(name, "&", "-")
name = strings.ReplaceAll(name, "?", "-")
name = strings.ReplaceAll(name, "#", "-sharp-")
if name == "" {
return "-empty-"
}
return name
}
func main() {
// Open a connection to the SQLite database
var err error
db, err = sql.Open("sqlite3", fmt.Sprintf("%s?mode=rw&_busy_timeout=-5000&_journal_mode=WAL", *databasePath))
if err != nil {
log.Fatalln(err)
}
defer closeOrPanic(db)
createActiveRepoView()
createIndices()
defer dropIndices()
// Retrieve column names from the table
columnNames, err := getColumnNames(db, "ActiveRepo")
if err != nil {
log.Fatalln(err)
}
saveMetadata()
// Retrieve all possible languages from the Repo table
languages := programmingLanguages()
for _, language := range languages {
exportForLanguage(language, columnNames)
}
exportForAll(columnNames)
fileSaveWaitGroup.Wait()
}
func exportForAll(columnNames []string) {
// Set the page size and initialize the offset
pageSize := JSON_PAGINATION_PAGE_SIZE
offset := 0
page := 1
for retrieveAndSaveAll(columnNames, pageSize, offset, page) {
// Update offset and page number
offset += pageSize
page++
}
}
func exportForLanguage(language string, columnNames []string) {
// Set the page size and initialize the offset
pageSize := JSON_PAGINATION_PAGE_SIZE
offset := 0
page := 1
for retrieveAndSaveByLanguage(columnNames, pageSize, offset, page, language) {
// Update offset and page number
offset += pageSize
page++
}
}
// emojify renders all repository description emojis into unicode emojis
// For example: turns Description=":rocket: LGTM" into Description="🚀 LGTM"
func emojify(records []Record) []Record {
for i, record := range records {
description, ok := record["Description"]
if !ok {
continue
}
desc, ok := description.(string)
if !ok {
continue
}
records[i]["Description"] = emoji.ReplaceAliases(desc)
}
return records
}
func retrieveAndSaveAll(columnNames []string, pageSize int, offset int, page int) (shouldContinue bool) {
// Retrieve data from the database with pagination
rows, err := db.Query(`
SELECT * FROM ActiveRepo
ORDER BY Stargazers DESC, Id
LIMIT $1 OFFSET $2
`, pageSize, offset)
if err != nil {
log.Fatalln(err)
}
defer closeOrPanic(rows)
fileName := fmt.Sprintf("%s/all/%d", *outputDir, page)
records := rowsAsRecords(rows, columnNames)
records = emojify(records)
fileSaveWaitGroup.Add(1)
go saveToFile(fileName, records)
// Break the loop if there are no more records
shouldContinue = len(records) >= pageSize
return shouldContinue
}
func retrieveAndSaveByLanguage(columnNames []string, pageSize int, offset int, page int, language string) (shouldContinue bool) {
// Retrieve data from the database with pagination
rows, err := db.Query(`
SELECT * FROM ActiveRepo
WHERE Language=$1
ORDER BY Stargazers DESC, Id
LIMIT $2 OFFSET $3
`, language, pageSize, offset)
if err != nil {
log.Fatalln(err)
}
defer closeOrPanic(rows)
fileName := fmt.Sprintf("%s/language/%s/%d", *outputDir, escapeLanguageName(language), page)
records := rowsAsRecords(rows, columnNames)
records = emojify(records)
fileSaveWaitGroup.Add(1)
go saveToFile(fileName, records)
// Break the loop if there are no more records
shouldContinue = len(records) >= pageSize
return shouldContinue
}
func saveToFile(fileName string, records []Record) {
// Convert records to JSON
jsonData, err := json.Marshal(records)
if err != nil {
log.Fatalln(err)
}
// Write JSON data to a file
saveDataToGzipFile(fileName, jsonData)
}
func saveDataToGzipFile(fileName string, data []byte) {
defer fileSaveWaitGroup.Done()
err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm)
if err != nil {
log.Fatalf("Could not create directory %v: %v\n", filepath.Dir(fileName), err)
}
file, err := os.Create(fileName)
if err != nil {
log.Fatalln(err)
}
defer closeOrPanic(file)
gzipWriter := gzip.NewWriter(file)
defer closeOrPanic(gzipWriter)
// Write data to the gzip file
_, err = gzipWriter.Write(data)
if err != nil {
log.Fatalln(err)
}
log.Printf("Created file '%s'\n", fileName)
}