-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtokei_service.cr
More file actions
143 lines (120 loc) · 5.22 KB
/
tokei_service.cr
File metadata and controls
143 lines (120 loc) · 5.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
require "json"
require "file_utils"
require "random"
require "dotenv"
module Tokei::Api::Services
# Service class for executing tokei command
class TokeiService
# Load environment variables (skip in test environment)
Dotenv.load unless ENV["CRYSTAL_ENV"]? == "test"
# Git configuration
ENV["GIT_TERMINAL_PROMPT"] ||= "0"
ENV["GIT_ASKPASS"] ||= "/bin/true"
ENV["GIT_SSH_COMMAND"] ||= "ssh -o BatchMode=yes"
# Base path for temporary directory
TEMP_DIR_BASE = ENV["TEMP_DIR"]? || "/tmp/tokei-api"
# Timeout for git clone operation (in seconds)
CLONE_TIMEOUT = ENV["CLONE_TIMEOUT_SECONDS"]?.try(&.to_i) || 30
# Timeout for tokei analysis operation (in seconds)
TOKEI_TIMEOUT = ENV["TOKEI_TIMEOUT_SECONDS"]?.try(&.to_i) || 30
# Common URL patterns
# GitHub URL patterns
GITHUB_HTTPS_VALIDATION = /^https:\/\/github\.com\/[\w.-]+\/[\w.-]+(?:\.git|\/)?$/
GITHUB_SSH_VALIDATION = /^git@github\.com:[\w.-]+\/[\w.-]+(?:\.git|\/)?$/
# GitHub URL patterns with capture groups for owner and repo
# The regex captures the owner and repository name, removing any .git extension at the end
GITHUB_HTTPS_EXTRACTION = /https?:\/\/(?:www\.)?github\.com\/([^\/]+)\/([a-zA-Z0-9._-]+?)(?:\.git)?$/
GITHUB_SSH_EXTRACTION = /git@github\.com:([^\/]+)\/([a-zA-Z0-9._-]+?)(?:\.git)?$/
# GitLab URL patterns
GITLAB_HTTPS = /^https:\/\/gitlab\.com\/[\w.-]+\/[\w.-]+(?:\.git|\/)?$/
GITLAB_SSH = /^git@gitlab\.com:[\w.-]+\/[\w.-]+(?:\.git|\/)?$/
# Bitbucket URL patterns
BITBUCKET_HTTPS = /^https:\/\/bitbucket\.org\/[\w.-]+\/[\w.-]+(?:\.git|\/)?$/
BITBUCKET_SSH = /^git@bitbucket\.org:[\w.-]+\/[\w.-]+(?:\.git|\/)?$/
# Generic git URL patterns
# Allow ~ and other common user/repo characters
GENERIC_HTTPS = /^https:\/\/[\w.~:-]+\.[\w.~:-]+\/[\w.~:-]+\/[\w.~:-]+(?:\.git|\/)?$/
GENERIC_SSH = /^git@[\w.~:-]+\.[\w.~:-]+:[\w.~:-]+\/[\w.~:-]+(?:\.git|\/)?$/
# Repository URL validation
def self.valid_repo_url?(url : String) : Bool
!!(url.match(GITHUB_HTTPS_VALIDATION) || url.match(GITHUB_SSH_VALIDATION) ||
url.match(GITLAB_HTTPS) || url.match(GITLAB_SSH) ||
url.match(BITBUCKET_HTTPS) || url.match(BITBUCKET_SSH) ||
url.match(GENERIC_HTTPS) || url.match(GENERIC_SSH))
end
# Check if the repository URL is from GitHub
def self.github_repo?(url : String) : Bool
!!(url.match(GITHUB_HTTPS_EXTRACTION) || url.match(GITHUB_SSH_EXTRACTION))
end
# Extract owner and repo from GitHub URL
def self.extract_github_info(url : String) : {String, String}?
if match = url.match(GITHUB_HTTPS_EXTRACTION)
owner = match[1]
repo = match[2]
# Remove all .git extensions from the end of the repo name
repo = repo.gsub(/\.git(?:\.git)*$/, "")
return {owner, repo}
elsif match = url.match(GITHUB_SSH_EXTRACTION)
owner = match[1]
repo = match[2]
# Remove all .git extensions from the end of the repo name
repo = repo.gsub(/\.git(?:\.git)*$/, "")
return {owner, repo}
end
nil
end
# Analyze repository
def self.analyze_repo(repo_url : String) : String
# URL validation
raise "Invalid repository URL: #{repo_url}" unless valid_repo_url?(repo_url)
# Create temporary directory
random_suffix = Random::Secure.hex(8)
temp_dir = File.join(TEMP_DIR_BASE, random_suffix)
begin
# Create directory if it doesn't exist
FileUtils.mkdir_p(TEMP_DIR_BASE) unless Dir.exists?(TEMP_DIR_BASE)
# Clone repository with timeout and single-branch options
# Use Process.run for safety (no shell interpretation)
clone_result = Process.run(
"timeout",
["#{CLONE_TIMEOUT}s", "git", "clone", "--depth", "1", "--single-branch", "--no-tags", repo_url, temp_dir],
output: Process::Redirect::Close,
error: Process::Redirect::Close
)
unless clone_result.success?
# Check if the failure was due to timeout
if clone_result.exit_code == 124
raise "Repository cloning timed out after #{CLONE_TIMEOUT} seconds. The repository may be too large."
else
raise "Failed to clone repository: #{repo_url}. Please check the URL and try again."
end
end
# Execute tokei command
# Use Process.run for safety (no shell interpretation)
output = IO::Memory.new
tokei_result = Process.run(
"timeout",
["#{TOKEI_TIMEOUT}s", "tokei", "--output", "json"],
chdir: temp_dir,
output: output,
error: Process::Redirect::Close
)
unless tokei_result.success?
if tokei_result.exit_code == 124
raise "Repository analysis timed out after #{TOKEI_TIMEOUT} seconds. The repository may be too large."
else
raise "Failed to analyze repository with tokei"
end
end
output_string = output.to_s
if output_string.empty?
raise "Failed to analyze repository with tokei"
end
output_string
ensure
# Remove temporary directory
FileUtils.rm_rf(temp_dir) if Dir.exists?(temp_dir)
end
end
end
end