Skip to content


Subversion checkout URL

You can clone with
Download ZIP
branch: gh-pages
Fetching contributors…

Cannot retrieve contributors at this time

155 lines (128 sloc) 4.895 kB
<!DOCTYPE html>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=1280, user-scalable=no">
<title>Working with Big Data: Infrastructure, Algorithms, and Visualizations</title>
<!-- Required stylesheet -->
<link rel="stylesheet" href="core/deck.core.css">
<!-- Extension CSS files go here. Remove or add as needed. -->
<link rel="stylesheet" href="extensions/goto/deck.goto.css">
<link rel="stylesheet" href="extensions/menu/">
<link rel="stylesheet" href="extensions/navigation/deck.navigation.css">
<link rel="stylesheet" href="extensions/status/deck.status.css">
<link rel="stylesheet" href="extensions/hash/deck.hash.css">
<link rel="stylesheet" href="extensions/scale/deck.scale.css">
<!-- Style theme. More available in /themes/style/ or create your own. -->
<link rel="stylesheet" href="themes/style/swiss.css">
<!-- Transition theme. More available in /themes/transition/ or create your own. -->
<link rel="stylesheet" href="themes/transition/fade.css">
<!-- Required Modernizr file -->
<script src="modernizr.custom.js"></script>
<body class="deck-container">
<!-- Begin slides. Just make elements with a class of slide. -->
<section class="slide">
<h1>Lesson 1: Building a Big Data Infrastructure Part 1</h1>
<section class="slide">
<h1>Unstructured Storage &amp; Hadoop</h1>
<section class="slide">
<h2>Unstructured Data</h2>
<li class="slide">
<h3>Log Files</h3>
<li class="slide">
<li class="slide">
<h3>Unknown Formats</h3>
<section class="slide">
<li class="slide"><h3>Open source</h3></li>
<li class="slide"><h3>HDFS: Distributed file system modeled after GFS</h3></li>
<li class="slide"><h3>MapReduce: Distributed batch processing modeled after Google's MapReduce</h3></li>
<section class="slide">
<h2>Hadoop's Wider Ecosystem</h2>
<li class="slide"><h3>HBase - A column oriented database modeled after Google's BigTable</h3></li>
<li class="slide"><h3>ZooKeeper - A service for maintaining configuration and distributed synchronization</h3></li>
<li class="slide"><h3>Hive - Provides a SQL like interface for querying data in Hadoop</h3></li>
<li class="slide"><h3>Cascading - A framework for creating data processing workflows in Hadoop</h3></li>
<li class="slide"><h3>Pig - A high level language for creating MapReduce programs</h3></li>
<li class="slide"><h3>Flume - Useful for moving log data into Hadoop</h3></li>
<section class="slide">
<h2>Batch Processing</h2>
<li class="slide">
<h3>Like cron</h3>
<li class="slide">
<h3>Run once or frequently</h3>
<li class="slide">
<h3>Ship code to data</h3>
<section class="slide">
<h2>What Hadoop &amp; Batch Processing are Good For</h2>
<li class="slide"><h3>Storing copies of all data</h3></li>
<li class="slide"><h3>Storing and grepping through log files</h3></li>
<li class="slide"><h3>Joining data from disparate sources</h3></li>
<li class="slide"><h3>Building Indexes</h3></li>
<li class="slide"><h3>Building Models</h3></li>
<!-- End slides. -->
<!-- Begin extension snippets. Add or remove as needed. -->
<!-- deck.navigation snippet -->
<a href="#" class="deck-prev-link" title="Previous">&#8592;</a>
<a href="#" class="deck-next-link" title="Next">&#8594;</a>
<!-- deck.status snippet -->
<p class="deck-status">
<span class="deck-status-current"></span>
<span class="deck-status-total"></span>
<!-- deck.goto snippet -->
<form action="." method="get" class="goto-form">
<label for="goto-slide">Go to slide:</label>
<input type="text" name="slidenum" id="goto-slide" list="goto-datalist">
<datalist id="goto-datalist"></datalist>
<input type="submit" value="Go">
<!-- deck.hash snippet -->
<a href="." title="Permalink to this slide" class="deck-permalink">#</a>
<!-- End extension snippets. -->
<!-- Required JS files. -->
<script src="jquery-1.7.2.min.js"></script>
<script src="core/deck.core.js"></script>
<!-- Extension JS files. Add or remove as needed. -->
<script src="core/deck.core.js"></script>
<script src="extensions/hash/deck.hash.js"></script>
<script src="extensions/menu/"></script>
<script src="extensions/goto/deck.goto.js"></script>
<script src="extensions/status/deck.status.js"></script>
<script src="extensions/navigation/deck.navigation.js"></script>
<script src="extensions/scale/deck.scale.js"></script>
<!-- Initialize the deck. You can put this in an external file if desired. -->
$(function() {
Jump to Line
Something went wrong with that request. Please try again.