From 8a3661605c31be9812f5459ea295af1a29ca55df Mon Sep 17 00:00:00 2001 From: Simon Neutert Date: Wed, 20 Dec 2023 21:27:23 +0100 Subject: [PATCH] Add test-runner.clj, bb.edn, and test_main.clj files --- bb.edn | 10 +- git-hire.clj => src/git_hire/main.clj | 205 +++++++++++++++----------- test-runner.clj | 13 ++ test/git_hire/test_main.clj | 132 +++++++++++++++++ 4 files changed, 268 insertions(+), 92 deletions(-) rename git-hire.clj => src/git_hire/main.clj (52%) create mode 100644 test-runner.clj create mode 100644 test/git_hire/test_main.clj diff --git a/bb.edn b/bb.edn index e30053d..c66e429 100644 --- a/bb.edn +++ b/bb.edn @@ -1,10 +1,14 @@ -{:tasks +{:paths ["src"] + :min-bb-version "1.1.171" + :tasks {:requires ([babashka.fs :as fs] [babashka.process :refer [process]] [clojure.string :as str] [clojure.edn :as edn]) + scrape {:doc " Scrape all items from a user's profile " + :task git-hire.main/-main} search-keyword (let [search (first *command-line-args*)] (-> - (str "grep -r -i -l " search " profiles") + (str " grep -r -i -l " search " profiles ") (babashka.process/process) - :out slurp str/split-lines clojure.pprint/pprint))}} + :out slurp str/split-lines clojure.pprint/pprint))}} " diff --git a/git-hire.clj b/src/git_hire/main.clj similarity index 52% rename from git-hire.clj rename to src/git_hire/main.clj index 39d9f93..4033dd4 100644 --- a/git-hire.clj +++ b/src/git_hire/main.clj @@ -1,5 +1,17 @@ +(ns git-hire.main + (:require [clojure.string :as str] + [babashka.http-client :as http] + [cheshire.core :as json] + [babashka.fs :as fs])) + (import [java.net URLEncoder]) +(defn pretty-spit + [file content] + (prn file) + (prn content) + (spit file (with-out-str (clojure.pprint/pprint content)))) + (def auth "Sets up the headers for requests depending on a set ENV" @@ -11,23 +23,60 @@ (def base-url "https://api.github.com") -(defn base-url-search-users - [] - (str base-url "/search/users")) +(def user-search-path + "/search/users") -(defn get-json - "returns json as edn (keywordized)" - [url] - (let [req (curl/get url auth)] - (json/parse-string (:body req) true))) +(defn ->utf8 + [s] + (URLEncoder/encode s "UTF-8")) -(defn file-path-profiles - [] +(defn get-json-with-params + "Searches for users in a given location" + [path query-params] + (let [url (str base-url path) + request-spec (merge auth query-params)] + (-> (http/get url request-spec) + :body + (json/parse-string true)))) + +(defn sanitize-user-input + "Sanitizes user input" + [user-input] + (-> user-input + str/trim + str/lower-case + ->utf8)) + +(defn add-outer-quotes + [s] + (str "\"" s "\"")) + +(defn user-location-search-params-location + "hammers out the query params for user search location + the location needs to be wrapped in quotes" + [per-page more-repos-than location] + (let [location-str (add-outer-quotes (sanitize-user-input location))] + {:query-params {"per_page" per-page + "q" (str "location:" location-str + "+repos:" ">=" more-repos-than)}})) + +(defn user-location-search-params-location-lang + "hammers out the query params for user search location + the location needs to be wrapped in quotes" + [per-page more-repos-than location lang] + (let [location-str (add-outer-quotes (sanitize-user-input location)) + lang-str (add-outer-quotes (sanitize-user-input lang))] + {:query-params {"per_page" per-page + "q" (str "location:" location-str + "+repos:" ">=" more-repos-than + "+language:" lang-str)}})) + +(def file-path-profiles (str/lower-case "./profiles/")) (defn file-path-location [location] - (str (file-path-profiles) location)) + (str file-path-profiles (str/lower-case location))) (defn file-path-location-lang [location lang] @@ -44,38 +93,19 @@ (defn recursive-user-search "user results are kept in :items keyword so it seems reasonable to have this in a non-generalised function" - [url runs existing-results] + [query-params runs existing-results] (loop [run 2 results existing-results] (if (> run runs) results - (let [new-url (str url "&page=" run) - new-results (:items (get-json new-url))] + (let [res (get-json-with-params user-search-path query-params) + new-results (:items res)] (recur (inc run) (concat results new-results)))))) -(defn user-location-search-url - "hammers out the url for user search location" - [per-page more-repos-than encoded-location] - (str (base-url-search-users) - "?per_page=" per-page - "&q=location:" encoded-location - "+repos:" ">" more-repos-than)) - -(defn user-location-language-search-url - "hammers out the url for user search location and language" - [per-page more-repos-than encoded-location encoded-lang] - (str - (user-location-search-url per-page more-repos-than encoded-location) - "+language:" encoded-lang)) - (defn per-page->runs [total divisor] (int (Math/ceil (double (/ total divisor))))) -(defn ->utf8 - [s] - (URLEncoder/encode s "UTF-8")) - (defn search-users-by-location-lang-rich "1000 total results is the current user limit" [location lang more-repos-than] @@ -84,11 +114,8 @@ lang lang more-repos-than more-repos-than] (let [per-page 50 - more-repos-than more-repos-than - encoded-location (->utf8 location) - encoded-lang (->utf8 lang) - url (user-location-language-search-url per-page more-repos-than encoded-location encoded-lang) - res (get-json url) + q (user-location-search-params-location-lang per-page more-repos-than location lang) + res (get-json-with-params user-search-path q) total-user-count (:total_count res) runs (per-page->runs total-user-count per-page) users (:items res)] @@ -97,7 +124,7 @@ (recur location lang (+ 1 more-repos-than))) (if (> runs 1) (do (prn "getting users with more than " more-repos-than " repos") - (recursive-user-search url runs users)) + (recursive-user-search q runs users)) users))))) (defn search-users-by-location-lang @@ -110,10 +137,8 @@ (loop [location location more-repos-than more-repos-than] (let [per-page 50 - more-repos-than more-repos-than - encoded-location (->utf8 location) - url (user-location-search-url per-page more-repos-than encoded-location) - res (get-json url) + q (user-location-search-params-location per-page more-repos-than location) + res (get-json-with-params user-search-path q) total-user-count (:total_count res) runs (per-page->runs total-user-count per-page) users (:items res)] @@ -123,7 +148,7 @@ (do (file-path-location-all location) (if (> runs 1) (do (prn "getting users with more than " more-repos-than " repos") - (recursive-user-search url runs users)) + (recursive-user-search q runs users)) users)))))) (defn search-users-by-location @@ -131,8 +156,12 @@ (search-users-by-location-rich location 1)) (defn repo-slim + [user-repo] + (select-keys user-repo [:html_url :name :description :homepage :topics :language :updated_at])) + +(defn repos-slim [user-repos] - (mapv #(select-keys % [:html_url :name :description :homepage :topics :language :updated_at]) user-repos)) + (mapv repo-slim user-repos)) (defn user-languages [user-repos] @@ -143,51 +172,42 @@ (defn user-with-clean-repos [user-repos] (let [first-repo (first user-repos) - cleaned-repos (repo-slim user-repos)] + cleaned-repos (repos-slim user-repos)] {:name (get-in first-repo [:owner :login]) :owner_url (get-in first-repo [:owner :html_url]) :languages (user-languages cleaned-repos) :repositories cleaned-repos})) -(defn url-has-query? - [url] - (str/includes? url "?")) - -(defn url-add-query-param - [url k v] - (let [sign (if (url-has-query? url) "&" "?")] - (str url sign k "=" v))) - (defn recursive-curl [url] - (loop [run 1 - results []] - (let [page-url (url-add-query-param url "page" run) - res (get-json page-url)] - (if (= (count res) 0) - results - (recur (inc run) (concat results res)))))) + (let [path (last (str/split url #"api.github.com")) + per-page 25] + (loop [run 1 + results []] + (let [res (get-json-with-params path + {:query-params {"page" run + "per_page" per-page + "sort" "updated" + "direction" "desc"}})] + (if (zero? (count res)) + results + (recur (inc run) (concat results res))))))) (defn get-user-repos [user] - (let [per-page 25 - {:keys [repos_url]} user - url (-> repos_url - (url-add-query-param "sort" "updated") - (url-add-query-param "per_page" per-page))] - (if repos_url (recursive-curl url) nil))) + (let [{:keys [repos_url]} user + url repos_url] + (if url + (recursive-curl url) + nil))) (defn get-users-repos [users] - (pmap get-user-repos users)) + (map get-user-repos users)) (defn get-users-repos-without-forks [users] - (pmap remove-forks (get-users-repos users))) - -(defn pretty-spit - [file content] - (spit file (with-out-str (clojure.pprint/pprint content)))) + (map remove-forks (get-users-repos users))) (defn build-rich-user [user user-data] @@ -202,9 +222,8 @@ (defn enrich-user-data [file-path users] - (pmap (fn [user] - (let [user-url (str base-url "/users/" (:name user)) - user-data (get-json user-url) + (mapv (fn [user] + (let [user-data (get-json-with-params (str "/users/" (:name user)) {}) user-rich (build-rich-user user user-data) name (:name user-rich) file-name (str file-path name ".edn")] @@ -217,28 +236,36 @@ [file-path users] (->> users (get-users-repos-without-forks) - (pmap #(user-with-clean-repos %)) + (mapv #(user-with-clean-repos %)) (remove #(nil? (:name %))) (enrich-user-data file-path))) +(defn print-profile-count-for-location + [profiles location] + (let [count (reduce + (map count profiles))] + (prn (str "Found " count " users in " location)))) + (defn save-profiles-location "this will output the profiles matching into the `profiles` dir as formatted edn data" [location] (let [file-path (file-path-location-all location) - users (search-users-by-location location)] - (map #(prepare-user-data file-path %) (partition 100 100 nil users)))) + users (search-users-by-location location) + res (map #(prepare-user-data file-path %) (partition 10 10 nil users))] + (print-profile-count-for-location res location) + res)) (defn save-profiles-location-lang "this will output the profiles matching into the `profiles` dir as formatted edn data" [location lang] (let [file-path (file-path-location-lang location lang) users (search-users-by-location-lang location lang)] - (map #(prepare-user-data file-path %) (partition 100 100 nil users)))) - -;; entrypoint -(let [search-term-location (first *command-line-args*) - search-term-lang (second *command-line-args*)] - (case (count *command-line-args*) - 1 (save-profiles-location search-term-location) - 2 (save-profiles-location-lang search-term-location search-term-lang) - :done)) + (mapv #(prepare-user-data file-path %) (partition 100 100 nil users)))) + +(defn -main + [& args] + (let [search-term-location (first args) + search-term-lang (second args)] + (case (count args) + 1 (save-profiles-location search-term-location) + 2 (save-profiles-location-lang search-term-location search-term-lang) + :done))) \ No newline at end of file diff --git a/test-runner.clj b/test-runner.clj new file mode 100644 index 0000000..137dbce --- /dev/null +++ b/test-runner.clj @@ -0,0 +1,13 @@ +(require '[clojure.test :as t] + '[babashka.classpath :as cp]) + +(cp/add-classpath "src:test") + +(require 'git-hire.test-main) + +(def test-results + (t/run-tests 'git-hire.test-main)) + +(let [{:keys [fail error]} test-results] + (when (pos? (+ fail error)) + (System/exit 1))) \ No newline at end of file diff --git a/test/git_hire/test_main.clj b/test/git_hire/test_main.clj new file mode 100644 index 0000000..b3a3ead --- /dev/null +++ b/test/git_hire/test_main.clj @@ -0,0 +1,132 @@ +(ns git-hire.test-main) +(require '[clojure.test :as t] + '[babashka.classpath :as cp] + '[git-hire.main :as main]) + +(t/deftest test-definitions + (t/is (= main/base-url + "https://api.github.com")) + (t/is (= main/user-search-path + "/search/users")) + (t/is (= true + (map? main/auth))) + (t/is (= true + (map? (:headers main/auth)))) + (t/is (= ["Accept" "Authorization"] + (keys (:headers main/auth))))) + +(t/deftest utf8-conversion + (t/is (= "foo+bar" + (main/->utf8 "foo bar")))) + +(t/deftest add-outer-quotes + (t/is (= "\"foo\"" + (main/add-outer-quotes "foo")))) + +(t/deftest sanitize-user-input + (t/is (= "foo+bar" + (main/sanitize-user-input " Foo Bar ")))) + +(t/deftest per-page->runs + (t/is (= 1 + (main/per-page->runs 10 10))) + (t/is (= 2 + (main/per-page->runs 10 5))) + (t/is (= 4 + (main/per-page->runs 11 3))) + (t/is (= 5 + (main/per-page->runs 101 25)))) + +(t/deftest repos-slim + "This test checks if the function repos-slim + returns a vector of maps with the correct keys" + (t/is (= [{:html_url "bar" + :name "foo" + :description "baz" + :homepage "www.foo.bar" + :topics ["foo" "bar"] + :language "clojure" + :updated_at "2020-01-01T00:00:00Z"}] + (main/repos-slim [{:name "foo" + :html_url "bar" + :description "baz" + :homepage "www.foo.bar" + :topics ["foo" "bar"] + :language "clojure" + :updated_at "2020-01-01T00:00:00Z" + :stargazers_count 10 + :forks_count 5 + :open_issues_count 2 + :license "MIT"}])))) + +(t/deftest repo-slim + "This test checks if the function repo-slim + returns a map with the whitelisted/pre-defined keys" + (t/is (= {:html_url "bar" + :name "foo" + :description "baz" + :homepage "www.foo.bar" + :topics ["foo" "bar"] + :language "clojure" + :updated_at "2020-01-01T00:00:00Z"} + (main/repo-slim {:name "foo" + :html_url "bar" + :description "baz" + :homepage "www.foo.bar" + :topics ["foo" "bar"] + :language "clojure" + :updated_at "2020-01-01T00:00:00Z" + :stargazers_count 10 + :forks_count 5 + :open_issues_count 2 + :license "MIT"})))) + +(t/deftest user-languages + "This test checks if the function user-languages + returns a vector of maps with the users used + languages as a set of strings. + Empty languages are removed." + (t/is (= #{"clojure" "ruby"} + (main/user-languages [{:name "foo" + :html_url "bar" + :language "clojure" + :open_issues_count 2 + :license "MIT"} + {:name "foo" + :html_url "bar" + :language "ruby" + :open_issues_count 2 + :license "MIT"} + {:name "foo" + :pizza "turtles"}]))) + (t/is (= true + (set? (main/user-languages + [{:name "foo" + :html_url "bar" + :language "clojure" + :open_issues_count 2 + :license "MIT"} + {:name "foo" + :html_url "bar" + :language "ruby" + :open_issues_count 2 + :license "MIT"} + {:name "foo" + :pizza "turtles"}]))))) + +(t/deftest user-location-search-params-location + (t/is (= {:query-params {"per_page" 10, "q" "location:\"bad+kissingen\"+repos:>=0"}} + (main/user-location-search-params-location 10 0 "Bad Kissingen"))) + (t/is (= {:query-params {"per_page" 20, "q" "location:\"mainz\"+repos:>=0"}} + (main/user-location-search-params-location 20 0 "Mainz")))) + +(t/deftest file-path-location-all + (t/is (= "./profiles/mainz/all/" + (main/file-path-location-all "Mainz"))) + (t/is (= "./profiles/bad kissingen/all/" + (main/file-path-location-all "Bad Kissingen")))) + +(t/deftest user-location-search-params-location-lang + (t/is (= {:query-params {"per_page" 10, + "q" "location:\"bad+kissingen\"+repos:>=0+language:\"clojure\""}} + (main/user-location-search-params-location-lang 10 0 "Bad Kissingen" "clojure")))) \ No newline at end of file