Permalink
Browse files

Merge pull request #8 from fweez/master

Linux source tree file sizes
  • Loading branch information...
2 parents 9848726 + 62d78e9 commit 36183cfc5a4b3bda0c7fd2166293517ec7bbcbb3 @blackant blackant committed Jun 27, 2011
Showing with 50 additions and 0 deletions.
  1. +32 −0 ben.py
  2. +1 −0 js/datasets/index.json
  3. +17 −0 js/datasets/linux-filesizes.json
View
32 ben.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+
+import os
+import subprocess
+
+# First, build up the results file. it'll have the form:
+# 110 vidioc-g-dv-preset.xml
+linecount_cmd = "rm results; for i in *; do find $i -execdir wc -c '{}' \; " + \
+ ">> results; done;"
+os.system(linecount_cmd)
+
+# Then figure out how many are in each bucket (linecount's initial digit)
+bucket_cmd = 'for i in `jot 9`; do egrep "^[ ]*$i" results| ' + \
+ 'wc -l; done > counts'
+os.system(bucket_cmd)
+
+# And how many total files there are...
+total_cmd = 'wc -l results'
+p = subprocess.Popen(total_cmd, stdout=subprocess.PIPE, shell=True)
+(total, _) = p.communicate()
+
+total = int(total.split()[0])
+
+for i,count in enumerate(file('counts', 'r').readlines()):
+ print '"%d": %0.5f,' % (i + 1, 100 * (float(count) / total))
+
+print "Record count:", total
+
+os.system('echo "biggest:" && sort --general-numeric-sort -b results '
+ '| tail -n 1')
+
+
View
@@ -5,4 +5,5 @@
"loan-amounts-on-kiva-org": "Loan amounts on kiva.org",
"total-number-of-print-materials-in-us-libraries": "Total number of print materials in US libraries",
"population-of-spanish-cities": "Population of Spanish cities"
+ "linux-filesizes": "File sizes in the Linux 2.6.39.2 source tree"
}
@@ -0,0 +1,17 @@
+{
+ "values": {
+ "1": 30.30625,
+ "2": 17.37780,
+ "3": 13.03471,
+ "4": 9.65342,
+ "5": 7.78159,
+ "6": 6.36750,
+ "7": 5.74356,
+ "8": 5.09237,
+ "9": 4.64280
+ },
+ "num_records": "36,702",
+ "min_value": "1",
+ "max_value": "950,746",
+ "source": "http://kernel.org"
+}

0 comments on commit 36183cf

Please sign in to comment.