Permalink
Browse files

added sections 1-3 for lesson 6

  • Loading branch information...
1 parent d4b023d commit 22a47294ed97ed4826e21b440bf94068ae6fd9bd @pauldix committed Aug 30, 2012
View
@@ -0,0 +1,83 @@
+function histogramChart() {
+ var margin = {top: 0, right: 0, bottom: 20, left: 0},
+ width = 960,
+ height = 500;
+
+ var histogram = d3.layout.histogram(),
+ x = d3.scale.ordinal(),
+ y = d3.scale.linear(),
+ xAxis = d3.svg.axis().scale(x).orient("bottom").tickSize(6, 0);
+
+ function chart(selection) {
+ selection.each(function(data) {
+
+ // Compute the histogram.
+ data = histogram(data);
+
+ // Update the x-scale.
+ x .domain(data.map(function(d) { return d.x; }))
+ .rangeRoundBands([0, width - margin.left - margin.right], .1);
+
+ // Update the y-scale.
+ y .domain([0, d3.max(data, function(d) { return d.y; })])
+ .range([height - margin.top - margin.bottom, 0]);
+
+ // Select the svg element, if it exists.
+ var svg = d3.select(this).selectAll("svg").data([data]);
+
+ // Otherwise, create the skeletal chart.
+ var gEnter = svg.enter().append("svg").append("g");
+ gEnter.append("g").attr("class", "bars");
+ gEnter.append("g").attr("class", "x axis");
+
+ // Update the outer dimensions.
+ svg .attr("width", width)
+ .attr("height", height);
+
+ // Update the inner dimensions.
+ var g = svg.select("g")
+ .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
+
+ // Update the bars.
+ var bar = svg.select(".bars").selectAll(".bar").data(data);
+ bar.enter().append("rect");
+ bar.exit().remove();
+ bar .attr("width", x.rangeBand())
+ .attr("x", function(d) { return x(d.x); })
+ .attr("y", function(d) { return y(d.y); })
+ .attr("height", function(d) { return y.range()[0] - y(d.y); })
+ .order();
+
+ // Update the x-axis.
+ g.select(".x.axis")
+ .attr("transform", "translate(0," + y.range()[0] + ")")
+ .call(xAxis);
+ });
+ }
+
+ chart.margin = function(_) {
+ if (!arguments.length) return margin;
+ margin = _;
+ return chart;
+ };
+
+ chart.width = function(_) {
+ if (!arguments.length) return width;
+ width = _;
+ return chart;
+ };
+
+ chart.height = function(_) {
+ if (!arguments.length) return height;
+ height = _;
+ return chart;
+ };
+
+ // Expose the histogram's value, range and bins method.
+ d3.rebind(chart, histogram, "value", "range", "bins");
+
+ // Expose the x-axis' tickFormat method.
+ d3.rebind(chart, xAxis, "tickFormat");
+
+ return chart;
+}
@@ -0,0 +1,13 @@
+#!/usr/bin/env ruby
+
+require "#{File.dirname(__FILE__)}/post_parser.rb"
+
+STDIN.each do |line|
+ post = PostParser.from_xml_fragment(line)
+
+ if post
+ post[:tags].each do |tag|
+ puts "#{tag}\t1\t#{post[:created_date].to_i}"
+ end
+ end
+end
View
@@ -0,0 +1,45 @@
+require 'date'
+
+class PostParser
+ def self.from_xml_fragment(line)
+ match = line.match(/\sId="(.*?")/)
+ unless match
+ return nil
+ end
+
+ tags_match = line.match(/Tags=(".*?")/)
+ if tags_match
+ tags = tags_match[0].downcase
+ tags = tags.split(">").map {|s| s.gsub(/.*\&lt\;/, '')}
+ tags = tags.slice(0, tags.length - 2)
+ end
+ tags ||= []
+
+ creation_date_match = line.match(/\sCreationDate="(.*?")/)
+
+ if creation_date_match
+ creation_string = creation_date_match[1].chomp('"')
+ created_date = DateTime.parse(creation_string).to_time
+ else
+ created_date =Time.now
+ end
+ {
+ :tags => tags,
+ :created_date => created_date
+ }
+ end
+
+ def self.clean_string(string)
+ string.gsub('"', ' ')
+ .gsub("<p>", '')
+ .gsub("<", ' ')
+ .gsub(">", ' ')
+ .gsub("/p 
", '')
+ .gsub("
", '')
+ .gsub(/\d/, '')
+ .gsub(/[^\w]/, ' ')
+ .downcase
+ .split
+ .join(' ')
+ end
+end
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+
+require 'rubygems'
+require 'active_support/core_ext/numeric/time'
+require 'active_support/core_ext/time/calculations'
+require 'active_support/core_ext/date/calculations'
+require 'json'
+
+current_tag = nil
+week_counts = Hash.new {|h, k| h[k] = 0}
+
+STDIN.each do |line|
+ tag, count, time = line.split
+ count = count.to_i
+ time = Time.at(time.to_i)
+
+ if current_tag != tag && !current_tag.nil?
+ weeks = week_counts.to_a.sort {|a, b| a.first <=> b.first}
+ weeks = weeks.map {|week| [week.first.to_i * 1000, week.last]}
+
+ json = {:tag => current_tag, :data => weeks}.to_json
+ puts json
+
+ week_counts = Hash.new {|h, k| h[k] = 0}
+ end
+
+ week = time.beginning_of_week
+ week_counts[week] += count
+ current_tag = tag
+end
@@ -0,0 +1,26 @@
+#!/usr/bin/env ruby
+
+require 'rubygems'
+require 'json'
+require 'active_support/core_ext/numeric/time'
+
+current_count = 0
+current_tag = nil
+tags = []
+
+STDIN.each do |line|
+ tag, count, time = line.split
+ count = count.to_i
+ time = Time.at(time.to_i)
+
+ if current_tag != tag && !current_tag.nil?
+ tags << [current_count, current_tag]
+ current_count = 0
+ end
+
+ current_count += count
+ current_tag = tag
+end
+
+tags = tags.sort {|a, b| b.first <=> a.first}
+puts tags.to_json
View
@@ -0,0 +1,15 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <script src="http://d3js.org/d3.v2.js"></script>
+</head>
+
+<body>
+ <script>
+ // hot visualization action here
+ </script>
+</body>
+</html>
+<!DOCTYPE html>
+<html>
View
@@ -0,0 +1,46 @@
+<head>
+ <meta charset="utf-8">
+ <script src="http://d3js.org/d3.v2.js"></script>
+ <style>
+ ul.chart {
+ list-style: none;
+ }
+ li.bar {
+ background-color:steelBlue;
+ color:white;
+ height:2em;
+ line-height:2em;
+ padding-right:1em;
+ margin-bottom:2px;
+ text-align:right;
+ }
+ </style>
+</head>
+
+<body>
+ <script>
+ console.log('hi')
+ d3.json("tag_counts_last_30_days.json", function(allTags) {
+ var topTags = allTags.slice(0, 20);
+
+ var width = 600;
+ var countExtent = d3.extent(topTags, function(t) {
+ return t[0];
+ });
+
+ var scale = d3.scale.linear().domain(countExtent).range([50, width]);
+
+ d3.select("body")
+ .append("ul")
+ .attr("class", "chart")
+ .selectAll("li")
+ .data(topTags)
+ .enter()
+ .append("li")
+ .attr("class", "bar")
+ .style("width", function(d) {console.log(d[0]); return scale(d[0])})
+ .text(function(d) {return d[1]})
+ });
+ </script>
+</body>
+</html>
View
@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'sinatra'
+
+get '/*' do
+ file_name = params[:splat].first
+ file_parts = file_name.split("/")
+
+ puts "GET: #{file_name}"
+
+ if File.exists?(file_name)
+ File.read(file_name)
+ elsif file_parts.first == "tags"
+ tag_name = file_parts.last.split(".").first
+ `grep \\"#{tag_name}\\" tags_by_week.txt`
+ else
+ [404, "#{file_name} not found\n"]
+ end
+end
View
@@ -0,0 +1,81 @@
+Step 1: Prepare raw data
+
+go over the code for post_parser.rb (don't write)
+write mapper_tag_counter, but start with the STDIN loop already defined
+
+gem install active_support
+write the reducer_tag_aggregator. talk about whether we want to return raw data or rollups
+
+tail -n 3000000 posts.xml | ./mapper_tag_counter.rb | sort | ./reducer_tags_by_week.rb > tags_by_week.txt
+tail -n 3000000 posts.xml | ./mapper_tag_counter.rb | sort | ./reducer_tags_last_30_days.rb | sort > tags_last_30_days.json
+
+
+******************************************************************************************************************
+Step 2: D3 Basics
+
+bring up D3 website
+
+* open source javascript visualization toolkit created by Mike Bostock
+* evolution of years of experience writing Protovis
+* more than a charting library
+* you can build charting libraries with it
+
+(first install JSONView on the presenter machine)
+
+show the server (don't write)
+ruby server.rb -p 3001
+http://localhost:3001/tags/javascript
+
+show section2.html and bring it up along with jsconsole
+
+// show some data
+d3.json("tags/javascript", function(data) {console.log(data)});
+
+// put somewhere to access
+d3.json("tags/javascript", function(data) {window.tagHistory = data});
+
+// do drawing of chart to show scale, domain, range.
+// scale converts the domain (the input) to a range (the output/pixels)
+
+// range determined by the size of the chart
+
+// domain determined by the min and max of the data. or fixed bounds
+tagCountExtent = d3.extent(tagHistory.data, function(d) {return d[1];})
+
+// d3 scales make it easy to map values to their corresponding pixel location in a visualization
+tagCountScale = d3.scale.linear()
+tagCountScale.domain(tagCountExtent)
+tagCountScale.range([0, 300])
+
+// or use the more idiomatic d3 way: chaining
+tagCountScale.domain(tagCountExtent).range([0, 300])
+
+tagCountExtent
+tagCountScale(754)
+tagCountScale(2067)
+tagCountScale(1000)
+tagCountScale(3000)
+
+// bound the output
+tagCountScale.clamp(true)
+tagCountScale(3000)
+
+// selecting elements
+d3.select("body").append("h1").text("d3 ftw")
+d3.select("body").append("h1").text("moar h1")
+d3.select("h1")
+d3.selectAll("h1")
+
+d3.select("body").append("h1").text("h1 with class").attr("class", "foo")
+d3.selectAll("h1.foo")
+
+d3.json("tag_counts_last_30_days.json", function(d) {window.tagCounts = d})
+d3.select("body").append("ol").selectAll("li").data(tagCounts.slice(0, 20)).enter().append("li").text(function(d) {return d[1]})
+
+
+******************************************************************************************************************
+Step 3: Create a Histogram
+
+
+******************************************************************************************************************
+Step 4: Create a time series

0 comments on commit 22a4729

Please sign in to comment.