Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

automate collection and reporting of leiningen download statistics #1252

Merged
merged 1 commit into from

2 participants

@gphil

I wrote some code to fetch and parse the leiningen download logs and report some summary statistics.

It can be run with:

lein run -m leiningen.downloads 

But you'll need the appropriate AWS credentials for it to work in
~/.secrets/leiningen_downloads_aws_cred.clj:

{:access-key "AWS_ACCESS_KEY"
 :secret-key "AWS_SECRET_KEY"}

It takes a while to run, since it has to fetch all the logs from S3 (there are a lot of log files, so it has to make a lot of requests.) It also requires a fair amount of memory to process all the files, so you may need to set JVM_OPTS=-Xmx512m.

Additionally, it uses Pomegranate to download some additional dependencies at runtime so as not to pollute the project.clj.

I had to add the bin/ directory to the classpath in order to run the code using leiningen. I'm not sure if this is OK. If not, there may be a better place to put this file, or a better way of running it that I'm not aware of. I'm happy to change this given a better solution.

@technomancy technomancy merged commit f4b489b into from
@technomancy
Owner

Thanks!

Putting it in :source-paths will actually cause it to be included in the jar, so I'll find a better place for it. But this is good to have.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
Showing with 114 additions and 84 deletions.
  1. +0 −83 bin/downloads.clj
  2. +113 −0 bin/leiningen/downloads.clj
  3. +1 −1  project.clj
View
83 bin/downloads.clj
@@ -1,83 +0,0 @@
-(ns leiningen.downloads
- "Calculate download statistics from logs."
- (:require [clojure.java.io]
- [clojure.pprint :refer [pprint]]
- [clojure.java.shell :refer [sh]]))
-
-;; Before GitHub shut down its download service all uberjars were
-;; hosted there. Here's the latest data we have on it.
-(def github {"leiningen-1.6.1.1-standalone.jar" 15143,
- "leiningen-1.6.2-standalone.jar" 16640,
- "leiningen-1.7.1-standalone.jar" 64026,
- "leiningen-full.jpg" 519,
- "leiningen-1.5.2-standalone.jar" 24865,
- "leiningen-1.6.1-standalone.jar" 9405,
- "leiningen-1.7.0-standalone.jar" 10969,
- "leiningen-1.4.2-standalone.jar" 31651,
- "leiningen-1.5.1-standalone.jar" 290,
- "leiningen-1.6.0-standalone.jar" 1065,
- "leiningen-1.4.1-standalone.jar" 1606,
- "leiningen-1.5.0-standalone.jar" 9575,
- "leiningen-1.3.1-standalone.jar" 7905,
- "leiningen-1.4.0-standalone.jar" 1589,
- "leiningen-1.3.0-SNAPSHOT-standalone.jar" 280,
- "leiningen-1.4.0-SNAPSHOT-standalone.jar" 423,
- "leiningen-1.3.0-standalone.jar" 2442,
- "leiningen-banner.png" 399328,
- "leiningen-1.2.0-standalone.jar" 3617,
- "leiningen-1.1.0-standalone.jar" 12858,
- "leiningen-1.7.0-SNAPSHOT-standalone.jar" 434,
- "leiningen-1.6.2-SNAPSHOT-standalone.jar" 637,
- "leiningen-1.7.1-SNAPSHOT-standalone.jar" 971,
- "leiningen-2.0.0-preview10-standalone.jar" 555530, ; huh?
- "leiningen-1.4.0-RC2-standalone.jar" 188,
- "leiningen-1.5.0-RC1-standalone.jar" 177,
- "leiningen-2.0.0-preview10-standalone.jar.asc" 272,
- "leiningen-1.4.0-RC1-standalone.jar" 200,
- "leiningen-1.3.0-RC1-standalone.jar" 103,
- "leiningen-2.0.0-preview9-standalone.jar" 442,
- "leiningen-2.0.0-preview8-standalone.jar" 2050,
- "leiningen-2.0.0-preview7-standalone.jar" 8022,
- "leiningen-2.0.0-preview6-standalone.jar" 2839,
- "leiningen-2.0.0-preview9-standalone.jar.asc" 41,
- "leiningen-1.4.0-win32.zip" 70,
- "leiningen-2.0.0-preview8-standalone.jar.asc" 37,
- "leiningen-1.5.0-win32.zip" 464,
- "leiningen-1.4.1-win32.zip" 260,
- "leiningen-2.0.0-preview5-standalone.jar" 200,
- "leiningen-1.4.2-win32.zip" 1108,
- "leiningen-2.0.0-preview4-standalone.jar" 1701,
- "leiningen-2.0.0-preview3-standalone.jar" 2029,
- "leiningen-2.0.0-preview2-standalone.jar" 1437,
- "leiningen-1.5.2-win.zip" 4346,
- "lein-win32.zip" 1502,
- "leiningen-2.0.0-preview1-standalone.jar" 282})
-
-;; filter out non-release-jars
-(def github-releases
- (into {} (remove (comp (partial re-find #"SNAPSHOT|RC|zip|jpg|png|asc") key)
- github)))
-
-(def total (apply + (vals github-releases))) ; 788178
-
-(defn file-for-line [line]
- (let [[_ file] (re-find #"\"GET ([^ ]+) " line)]
- (if file
- (last (.split file "/")))))
-
-(defn parse-line [sums line]
- (if-let [file (file-for-line line)]
- (update-in sums [file] (fnil inc 0))
- sums))
-
-(defn parse-file [f]
- (with-open [rdr (clojure.java.io/reader f)]
- (reduce parse-line {} (line-seq rdr))))
-
-(defn parse-dir [d]
- (apply merge-with + (->> (.listFiles (java.io.File. d))
- (filter (memfn isFile))
- (map parse-file))))
-
-;; TODO: fetch S3 logs?
-(def -main parse-dir)
View
113 bin/leiningen/downloads.clj
@@ -0,0 +1,113 @@
+(use '[cemerick.pomegranate :only (add-dependencies)])
+
+(add-dependencies :coordinates '[[clj-aws-s3 "0.3.6"]
+ [tentacles "0.2.4"]]
+ :repositories (merge cemerick.pomegranate.aether/maven-central
+ {"clojars" "http://clojars.org/repo"}))
+
+(ns leiningen.downloads
+ "Calculate download statistics from logs."
+ (:require [aws.sdk.s3 :as s3]
+ [clojure.java.io :as io]
+ [tentacles.repos :as repo]
+ [clojure.pprint :refer [pprint]])
+ (:import [java.io File]))
+
+(def ^:internal aws-cred
+
+ ;; in order to run, you need to define a map with the appropriate AWS
+ ;; credentials in ~/.secrets/leiningen_downloads_aws_cred.clj:
+
+ ;; {:access-key "AWS_ACCESS_KEY"
+ ;; :secret-key "AWS_SECRET_KEY"}
+
+ (read-string
+ (slurp (File. (System/getenv "HOME")
+ "/.secrets/leiningen_downloads_aws_cred.clj"))))
+
+(defn- list-all-objects
+ [bucket & [objects next-marker]]
+ (let [response (s3/list-objects aws-cred bucket {:marker next-marker})
+ truncated? (:truncated? response)
+ next-marker (:next-marker response)
+ objects (concat objects (:objects response))]
+ (if (not truncated?)
+ objects
+ (recur bucket [objects next-marker]))))
+
+(defn- fetch-all-objects
+ [bucket]
+ (for [object (list-all-objects bucket)]
+ (do
+ (println (str "Processing: " (:key object)))
+ (s3/get-object aws-cred bucket (:key object)))))
+
+(defn- file-for-line
+ [line]
+ (let [[_ file] (re-find #"\"GET ([^ ]+) " line)]
+ (if file
+ (last (.split file "/")))))
+
+(defn- ip-for-line
+ [line]
+ (re-find #"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b" line))
+
+(defn- status-for-line
+ [line]
+ (second (re-find #"\" (\d\d\d)" line)))
+
+(defn- parse-files
+ [content]
+ (with-open [rdr (io/reader content)]
+ (doall (for [line (line-seq rdr)]
+ {:file (file-for-line line)
+ :status (status-for-line line)
+ :ip (ip-for-line line)}))))
+
+(defn- s3-downloads
+ []
+ (flatten
+ (for [logfile (map :content (fetch-all-objects "leiningen-logs"))]
+ (filter #(and (get % :file) ;; file is present
+ (re-find #"\.jar\b" (get % :file)) ;; file is a jar
+ (= "200" (get % :status))) ;; and only HTTP 200 responses
+ (parse-files logfile)))))
+
+(defn- github-downloads
+ []
+ (reverse
+ (sort-by #(first (vals %))
+ (filter #(re-find #"\.jar$" (first (keys %)))
+ (let [downloads {}]
+ (for [download (repo/downloads "technomancy" "leiningen")]
+ (assoc downloads
+ (:name download)
+ (:download_count download))))))))
+
+(defn print-report
+ []
+ (let [s3-downloads (s3-downloads)
+ s3-download-count (count s3-downloads)
+ github-downloads (github-downloads)
+ github-download-count
+ (reduce + (map #(first (vals %)) github-downloads))]
+ (println (str "GitHub Downloads: " github-download-count))
+ (println (str "S3 Downloads: " s3-download-count))
+ (println (str "Unique IP Addresses (S3 Downloads Only): "
+ (count (distinct (map :ip s3-downloads)))))
+ (println (str "Total Downloads: "
+ (+ github-download-count s3-download-count)))
+ (print "\n\n")
+ (println "GitHub downloads by file:")
+ (print "\n\n")
+ (pprint github-downloads)
+ (print "\n\n")
+ (println "S3 downloads by file:")
+ (print "\n\n")
+ (pprint (frequencies (map :file s3-downloads)))
+ (println ""))) ;; need this last println for some reason or else
+ ;; the above doesn't print out using lein run...
+
+(defn -main
+ []
+ (print-report))
View
2  project.clj
@@ -34,7 +34,7 @@
:test-selectors {:default (complement :disabled)
:offline (comp (partial not-any? identity)
(juxt :online :disabled))}
- :source-paths ["leiningen-core/src" "src"]
+ :source-paths ["leiningen-core/src" "src" "bin"]
;; work around Clojure bug http://dev.clojure.org/jira/browse/CLJ-1034
:uberjar-exclusions [#"^data_readers.clj$"]
:eval-in :leiningen)
Something went wrong with that request. Please try again.