This repository has been archived by the owner on Oct 12, 2022. It is now read-only.
/
parsing.go
138 lines (121 loc) · 3.18 KB
/
parsing.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package main
import (
"bufio"
"compress/gzip"
"encoding/json"
"errors"
"net"
"os"
"path/filepath"
"time"
"github.com/oschwald/maxminddb-golang"
)
type logEntry struct {
Stamp float64 `json:"ts"`
Status uint16 `json:"status"`
Duration float64 `json:"duration"`
Size int64 `json:"size"`
Request struct {
Address string `json:"remote_addr"`
Protocol string `json:"proto"`
Method string `json:"method"`
Host string `json:"host"`
Location string `json:"uri"`
Encryption struct {
Version uint16 `json:"version"`
Cipher uint16 `json:"cipher_suite"`
} `json:"tls"`
Headers struct {
Languages []string `json:"Accept-Language"`
Encodings []string `json:"Accept-Encoding"`
UserAgent []string `json:"User-Agent"`
} `json:"headers"`
} `json:"request"`
Response struct {
ContentType []string `json:"Content-Type"`
} `json:"resp_headers"`
}
// Parse all logs in the log directory and return the statistics
func parseLogs(logDir string, geoFile string) (*statistics, error) {
// Create statistics instance
stats := newStatistics()
stats.Directory = logDir
// Validate log directory
info, err := os.Stat(logDir)
if err != nil {
return nil, err
}
if !info.IsDir() {
return nil, errors.New("stat " + logDir + ": not a directory")
}
// Find log files with log extension
logFiles, _ := filepath.Glob(filepath.Join(logDir, "*.log*"))
// Start the timer
startTime := time.Now()
// Read all files one by one
for _, logFile := range logFiles {
// Open the log file
file, err := os.Open(logFile)
if err != nil {
return nil, err
}
defer file.Close()
// Get log file size in bytes
info, err := file.Stat()
if err != nil {
return nil, err
}
stats.SizeBytes += info.Size()
// Create line scanner and decompress if the file is gzipped
scanner := bufio.NewScanner(file)
if filepath.Ext(logFile) == ".gz" {
decompressed, err := gzip.NewReader(file)
if err != nil {
return nil, err
}
scanner = bufio.NewScanner(decompressed)
}
// Scan line by line and add them to the statistics instance
for scanner.Scan() {
var line logEntry
err := json.Unmarshal(scanner.Bytes(), &line)
if err != nil {
return nil, err
}
err = addToStats(&line, stats)
if err != nil {
return nil, err
}
}
file.Close()
}
// Open geolocation database
geo, err := maxminddb.Open(geoFile)
if err != nil {
return nil, err
}
defer geo.Close()
// Get countries of all visitors observed
for _, counter := range stats.Hosts {
for visitor := range counter.Total.ObservedUsers {
var info struct {
Country struct {
Names map[string]string `maxminddb:"names"`
} `maxminddb:"country"`
}
ip := net.ParseIP(visitor.IPAddress)
err := geo.Lookup(ip, &info)
if err != nil {
return nil, err
}
country := info.Country.Names["en"]
if country == "" {
country = "Unknown"
}
counter.Visitors.Countries[country]++
}
}
// Save parse duration and return result
stats.ParseDuration = time.Since(startTime).Seconds()
return stats, nil
}