Skip to content
This repository has been archived by the owner on Mar 28, 2018. It is now read-only.

Commit

Permalink
cleanup to prep RC
Browse files Browse the repository at this point in the history
  • Loading branch information
Mirko Kämpf committed Nov 16, 2014
1 parent 8fb9671 commit 085bbd6
Show file tree
Hide file tree
Showing 17 changed files with 330 additions and 398 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -4,7 +4,7 @@
<artifactId>crunchTS</artifactId>
<packaging>jar</packaging>
<version>0.0.2-SNAPSHOT</version>
<name>crunchTS</name>
<name>crunch.TS</name>
<url>http://maven.apache.org</url>
<properties>
<!--hadoopclient.version>2.0.0-cdh4.5.0</hadoopclient.version-->
Expand Down
Expand Up @@ -114,7 +114,7 @@ public void initialize() {
fileName = ((CombineFileSplit) inputSplit).getPaths()[0].getName();
}

timestamp = wikipedia.TimeStampTool.getTimeInMillis(fileName);
timestamp = de.bitocean.util.wikipedia.TimeStampTool.getTimeInMillis(fileName);
}
catch (ClassCastException e) {
e.printStackTrace();
Expand Down
@@ -1,4 +1,4 @@
package org.apache.crunchts;
package de.bitocean.data;
import java.io.BufferedReader;
import java.io.DataInput;
import java.io.DataOutput;
Expand Down
Expand Up @@ -113,7 +113,7 @@ public void initialize() {
fileName = ((CombineFileSplit) inputSplit).getPaths()[0].getName();
}

timestamp = wikipedia.TimeStampTool.getTimeInMillis(fileName);
timestamp = de.bitocean.util.wikipedia.TimeStampTool.getTimeInMillis(fileName);
}
catch (ClassCastException e) {
e.printStackTrace();
Expand Down
@@ -1,17 +1,35 @@
package wikipedia;
package de.bitocean.util.wikipedia;

import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;

/**
* Based on a filename like "pagecounts-20071210-010000.gz"
* the time in millis is calculated.
* Wikipedia Click-Count data is public available.
*
* Each file contains aggregated hourly click-counts for all
* Wikipedia pages.
*
* Based on the filename like "pagecounts-20071210-010000.gz"
* the time stamp in milli seconds is calculated.
*
* The timestamp represents the beginning of the hour for which data
* is provided. Due to inqaccuracy one can not relay on all digits.
*
* This implementation does not do any kind of correction.
* More advanced testing is required for future releases.
*
* @author root
* @author Mirko Kämpf
*
*/
public class TimeStampTool {

/**
* Extract time stamp from click-count file name.
*
* @param filename
* @return timestamp in ms
*/
static public long getTimeInMillis(String filename) {
// System.out.println( filename );
String[] s = filename.split("-");
Expand Down

0 comments on commit 085bbd6

Please sign in to comment.