/
filedetect.go
242 lines (229 loc) · 7.46 KB
/
filedetect.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
package main
import (
"net/http"
"path/filepath"
"strings"
"golang.org/x/net/html/charset"
)
// extensionList holds a key-value store with the most common
// file extensions and their corresponding associations.
// There's also file names which are common across platform
// to identify certain file types.
var extensionList = map[string]string{
// File extensions from https://www.computerhope.com/issues/ch001789.htm
".aif": "AIF audio",
".cda": "CD audio track",
".mid": "MIDI audio",
".midi": "MIDI audio",
".mp3": "MP3 audio",
".mpa": "MPEG-2 audio",
".ogg": "Ogg Vorbis audio",
".wav": "WAV",
".wma": "WMA audio",
".wpl": "Windows Media Player playlist",
".7z": "7-Zip compressed",
".arj": "ARJ compressed",
".deb": "Debian software package",
".pkg": "Package",
".rar": "RAR",
".rpm": "Red Hat Package Manager",
".gz": "Gzip compressed",
".bz2": "Bzip2 compressed",
".z": "Z compressed",
".zip": "Zip compressed",
".dmg": "macOS X disk image",
".iso": "ISO disc image",
".toast": "Toast disc image",
".vcd": "Virtual CD",
".csv": "Comma separated value",
".dat": "Data",
".dbf": "Database",
".log": "Log",
".mdb": "Microsoft Access database",
".sav": "Save",
".sql": "SQL database",
".tar": "Linux / Unix tarball archive",
".tgz": "Linux / Unix tarball gzipped archive",
".xml": "XML",
".apk": "Android package",
".bat": "Batch",
".bin": "Binary",
".cgi": "Perl script",
".pl": "Perl script",
".com": "MS-DOS command",
".exe": "Executable",
".gadget": "Windows gadget",
".jar": "Java Archive",
".wsf": "Windows Script",
".fnt": "Windows font",
".fon": "Generic font",
".otf": "Open type font",
".ttf": "TrueType font",
".ai": "Adobe Illustrator",
".bmp": "Bitmap image",
".gif": "GIF image",
".jpg": "JPEG image",
".jpeg": "JPEG image",
".png": "PNG image",
".ps": "PostScript",
".ps1": "PowerShell Script",
".reg": "Registry",
".psd": "PSD image",
".svg": "Scalable Vector Graphics",
".tif": "TIFF image",
".tiff": "TIFF image",
".gitignore": "Git ignore",
".rspec": "RSpec",
".asp": "Active Server Page",
".aspx": "Active Server Page",
".cer": "Internet security certificate",
".cfm": "ColdFusion Markup",
".css": "Cascading Style Sheet",
".htm": "HTML",
".html": "HTML",
".js": "JavaScript",
".jsp": "Java Server Page",
".part": "Partially downloaded",
".php": "PHP",
".py": "Python",
".go": "Go",
".rb": "Ruby",
".rs": "Rust",
".lock": "Lock",
".yml": "YAML",
".yaml": "YAML",
".toml": "TOML",
".json": "JSON",
".rss": "RSS",
".xhtml": "XHTML",
".md": "Markdown",
".markdown": "Markdown",
".key": "Key",
".odp": "OpenOffice Impress presentation",
".pps": "PowerPoint slide show",
".ppt": "PowerPoint presentation",
".pptx": "PowerPoint Open XML presentation",
".c": "C, C++ source code",
".class": "Java class",
".cpp": "C++ source code",
".cs": "Visual C# source code",
".h": "C, C++, and Objective-C header",
".java": "Java Source code",
".sh": "Bash shell script",
".swift": "Swift source code",
".vb": "Visual Basic",
".ods": "OpenOffice Calc spreadsheet",
".xlr": "Microsoft Works spreadsheet",
".xls": "Microsoft Excel",
".xlsx": "Microsoft Excel Open XML spreadsheet",
".bak": "Backup",
".cab": "Windows Cabinet",
".cfg": "Config",
".cpl": "Windows Control panel",
".cur": "Windows cursor",
".dll": "DLL",
".dmp": "Dump",
".drv": "Device driver",
".icns": "macOS X icon resource",
".ico": "Icon",
".ini": "Initialization",
".lnk": "Windows shortcut",
".msi": "Windows installer package",
".sys": "Windows system",
".tmp": "Temporary",
".3g2": "3GPP2 multimedia",
".3gp": "3GPP multimedia",
".avi": "AVI",
".flv": "Adobe Flash",
".h264": "H.264 video",
".m4v": "Apple MP4 video",
".mkv": "Matroska Multimedia Container",
".mov": "Apple QuickTime movie",
".mp4": "MPEG4 video",
".mpeg": "MPEG video",
".mpg": "MPEG video",
".rm": "RealMedia",
".swf": "Shockwave flash",
".vob": "DVD Video Object",
".wmv": "Windows Media Video",
".docx": "Microsoft Word",
".doc": "Microsoft Word",
".odt": "OpenOffice Writer document",
".pdf": "PDF",
".rtf": "Real Text",
".tex": "A LaTeX document",
".txt": "Plain text",
".wps": "Microsoft Works",
".wks": "Microsoft Works",
".wpd": "WordPerfect document",
".tf": "Terraform HCL",
".repo": "Repository configuration",
".db": "Database",
".tmpl": "Template",
}
// fileNameList is a map from filename to the file type
// associated with it
var fileNameList = map[string]string{
".dockerignore": "Docker Ignore",
".gitattribute": "Git attribute",
".bash_profile": "Bash Profile",
".profile": "Bash Profile",
".bash_history": "Bash History",
".bash_logout": "Bash Logout",
".bashrc": "Bash RC",
".gemrc": "Ruby Gem Config",
".minttyrc": "MinTTY Config",
".npmjs": "NPM Config",
".yarnrc": "Yarn Config",
".vim": "Vim Config",
".vimrc": "Vim Config",
".vimtags": "Vim Tags Config",
".babelrc": "Babel Config",
".wget-hsts": "Wget HSTS Config",
".tmux.conf": "Tmux Config",
"webpack.config.js": "Webpack Configuration",
"Dockerfile": "Dockerfile",
"LICENSE": "License",
"CONTRIBUTE": "Contributor README",
"README": "README",
"README.md": "README Markdown",
"README.markdown": "README Markdown",
"Makefile": "GNU Make",
"Makefile.inc": "GNU Make include",
"Gemfile": "Ruby Gem",
"Rakefile": "Ruby Rake",
"config.ru": "Ruby Config",
"Vagrant": "Vagrant VM",
"config": "Config",
"go.mod": "Go Module File",
}
// detectByName tries to find the filetype based on the
// file name using the map above
func detectByName(name string) string {
// Get the content type based off the full file name
if content, found := fileNameList[name]; found {
return content
}
// Get the content type based off the file extension
if content, found := extensionList[filepath.Ext(name)]; found {
return content
}
// Get the content type based off the file name without extension
if content, found := fileNameList[strings.TrimSuffix(name, filepath.Ext(name))]; found {
return content
}
return ""
}
var overrideCTypeExtension = map[string]string{}
// generateContentTypeCharset tries to find the filetype based on the
// file content using the map above
func generateContentTypeCharset(name string, content []byte) string {
if s, found := overrideCTypeExtension[name]; found {
return s
}
s := http.DetectContentType(content)
if _, name, certain := charset.DetermineEncoding(content, s); certain && !strings.Contains(s, ";") {
return s + "; charset=" + name
}
return s
}