Skip to content

Commit

Permalink
Related #1230, 1231, 1244: integrate Timeline2 with quorascraper
Browse files Browse the repository at this point in the history
  • Loading branch information
vibhcool committed Jul 4, 2017
1 parent e2440e3 commit 28efecf
Show file tree
Hide file tree
Showing 9 changed files with 515 additions and 93 deletions.
11 changes: 7 additions & 4 deletions src/org/loklak/api/search/ConsoleService.java
Expand Up @@ -33,6 +33,7 @@
import org.loklak.objects.QueryEntry;
import org.loklak.objects.ResultList;
import org.loklak.objects.Timeline;
import org.loklak.objects.Timeline2;
import org.loklak.objects.UserEntry;
import org.loklak.server.APIException;
import org.loklak.server.APIHandler;
Expand All @@ -43,7 +44,6 @@
import org.loklak.susi.SusiProcedures;
import org.loklak.susi.SusiThought;
import org.loklak.susi.SusiTransfer;
import org.loklak.harvester.Post;
import org.loklak.harvester.BaseScraper;

import org.loklak.tools.storage.JSONObjectWithDefault;
Expand Down Expand Up @@ -236,13 +236,16 @@ public String getAPIPath() {
json.setData(transfer.conclude(json.getData()));
return json;
});
/*

dbAccess.put(Pattern.compile("SELECT +?(.*?) +?FROM +?quoraprofile +?WHERE +?profile ??= ??'(.*?)' ??;"), matcher -> {
BaseScraper quoraScrape = new QuoraProfileScraper(matcher.group(2));
Timeline2 dataList = quoraScrape.getData();
SusiThought json = new SusiThought(dataList.toJSON());
SusiTransfer transfer = new SusiTransfer(matcher.group(1));
return quoraScrape.getData().toJSON(transfer.conclude(json.getData()));
json.setData(transfer.conclude(json.getData()));
return json;
});
*/

dbAccess.put(Pattern.compile("SELECT +?(.*?) +?FROM +?wikigeodata +?WHERE +?place ??= ??'(.*?)' ??;"), matcher -> {
SusiThought json = WikiGeoData.wikiGeoData(matcher.group(2));
SusiTransfer transfer = new SusiTransfer(matcher.group(1));
Expand Down
27 changes: 10 additions & 17 deletions src/org/loklak/api/search/QuoraProfileScraper.java
Expand Up @@ -30,17 +30,10 @@
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.loklak.data.DAO;
import org.loklak.harvester.Post;
import org.loklak.harvester.BaseScraper;
import org.loklak.objects.Timeline;
import org.loklak.server.APIException;
import org.loklak.server.APIHandler;
import org.loklak.server.AbstractAPIHandler;
import org.loklak.server.Authorization;
import org.loklak.harvester.Post;
import org.loklak.objects.Timeline2;
import org.loklak.server.BaseUserRole;
import org.loklak.server.Query;
import org.loklak.susi.SusiThought;
import org.loklak.tools.storage.JSONObjectWithDefault;

public class QuoraProfileScraper extends BaseScraper {

Expand Down Expand Up @@ -135,9 +128,9 @@ private Post scrapeProfile() {

//TODO: this method shall return Timeline object
@Override
// protected Timeline scrape(BufferedReader br) {
protected Post scrape(BufferedReader br) {
// Timeline dataSet = new Timeline(order);
protected Timeline2 scrape(BufferedReader br) {
// protected Post scrape(BufferedReader br) {
Timeline2 dataSet = new Timeline2(order);
//for profile
Post qPost;
try {
Expand All @@ -147,8 +140,8 @@ protected Post scrape(BufferedReader br) {
}
qPost = scrapeProfile();

// return dataSet.add(qPost);
return qPost;
return dataSet.add(qPost);
// return qPost;
}


Expand All @@ -164,6 +157,7 @@ public QuoraPost(String _quoraId, int _quoraPostNo) {
super();
this.quoraId = _quoraId;
this.quoraPostNo = _quoraPostNo;
this.postId = this.timestamp + this.quoraPostNo + this.quoraId;
}

public void getQuoraId(String _quoraId) {
Expand All @@ -174,12 +168,11 @@ public void getQuoraPostNo(int _quoraPostNo) {
this.quoraPostNo = _quoraPostNo;
}

public void getPostId() {
public void setPostId() {
this.postId = this.timestamp + this.quoraPostNo + this.quoraId;
}

public String setPostId() {
this.postId = this.timestamp + this.quoraPostNo + this.quoraId;
public String getPostId() {
return String.valueOf(this.postId);
}
//clean data
Expand Down
52 changes: 0 additions & 52 deletions src/org/loklak/data/logger.java

This file was deleted.

20 changes: 10 additions & 10 deletions src/org/loklak/harvester/BaseScraper.java
Expand Up @@ -11,11 +11,11 @@
import org.loklak.http.ClientConnection;
import org.loklak.objects.ProviderType;
import org.loklak.objects.SourceType;
import org.loklak.objects.Timeline;
import org.loklak.server.AbstractAPIHandler;
import org.loklak.server.APIException;
import org.loklak.server.Authorization;
import org.loklak.server.Query;
import org.loklak.objects.Timeline2;
import org.loklak.tools.storage.JSONObjectWithDefault;

/**
Expand All @@ -40,7 +40,7 @@ public abstract class BaseScraper extends AbstractAPIHandler {
//TODO: dummy variable, add datastructure for filter, type_of_posts, location, etc
protected String extra = "";
//TODO: setup Timeline for Post
protected final Timeline.Order order = Timeline.parseOrder("timestamp");
protected final Timeline2.Order order = Timeline2.parseOrder("timestamp");

@Override
public JSONObject serviceImpl(Query call, HttpServletResponse response, Authorization rights,
Expand All @@ -50,19 +50,19 @@ public JSONObject serviceImpl(Query call, HttpServletResponse response, Authoriz
//TODO: add different extra paramenters. this is dummy variable
this.extra = call.get("extra", "");
//TODO: to be implemented to use Timeline
//return getData().toJSON;
return this.getData();
return getData().toJSON(false, "metadata_base", "statuses_base");
//return this.getData();
}

protected abstract Map<?, ?> getExtra(String _extra);

// public Timeline getData() {
public Post getData() {
public Timeline2 getData() {
// public Post getData() {
ClientConnection connection;
BufferedReader br;

// Timeline tl = new Timeline(order);
Post tl = null;
Timeline2 tl = new Timeline2(order);
// Post tl = null;
this.url = this.baseUrl + this.midUrl + this.query;

try {
Expand Down Expand Up @@ -92,8 +92,8 @@ public BufferedReader getHtml(ClientConnection connection) {
return br;
}

//protected abstract Timeline scrape(BufferedReader br);
protected abstract Post scrape(BufferedReader br);
protected abstract Timeline2 scrape(BufferedReader br);
//protected abstract Post scrape(BufferedReader br);

public String bufferedReaderToString(BufferedReader br) throws IOException {
StringBuilder everything = new StringBuilder();
Expand Down
14 changes: 12 additions & 2 deletions src/org/loklak/harvester/Post.java
@@ -1,6 +1,7 @@
package org.loklak.harvester;

import org.json.JSONObject;
import java.util.Date;

/**
* @author vibhcool (Vibhor Verma)
Expand All @@ -13,6 +14,7 @@ public abstract class Post extends JSONObject {

protected long timestamp = 0;
protected String postId;

protected Post() {
this.setTimestamp();
}
Expand All @@ -35,8 +37,16 @@ public void setTimestamp() {
this.setTimestamp(timestamp);
}

//public abstract void getPostId();
public Date getTimestampDate() {
return new Date(this.timestamp);
}

//TODO: Set up TwitterTweet before setting this as abstract
private void setPostId() { }

//public abstract String setPostId();
//TODO: Set up TwitterTweet before setting this as abstract
public String getPostId() {
return "";
}
}

3 changes: 2 additions & 1 deletion src/org/loklak/objects/AbstractObjectEntry.java
Expand Up @@ -30,10 +30,11 @@
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.loklak.harvester.Post;

import com.fasterxml.jackson.core.JsonGenerator;

public abstract class AbstractObjectEntry implements ObjectEntry {
public abstract class AbstractObjectEntry extends Post implements ObjectEntry {

public final static String TIMESTAMP_FIELDNAME = "timestamp";
public final static String CREATED_AT_FIELDNAME = "created_at";
Expand Down
22 changes: 16 additions & 6 deletions src/org/loklak/objects/MessageEntry.java
Expand Up @@ -48,7 +48,7 @@ public class MessageEntry extends AbstractObjectEntry implements ObjectEntry {

public static final String RICH_TEXT_SEPARATOR = "\n***\n";

protected Date timestamp; // a time stamp that is given in loklak upon the arrival of the tweet which is the current local time
protected Date timestampDate; // a time stamp that is given in loklak upon the arrival of the tweet which is the current local time
protected Date created_at; // the time given in the tweet which is the time when the user created it. This is also use to do the index partition into minute, hour, week
protected Date on; // on means 'valid from'
protected Date to; // 'to' means 'valid_until' and may not be set
Expand All @@ -74,7 +74,8 @@ public class MessageEntry extends AbstractObjectEntry implements ObjectEntry {
private Map<Context, Classification<String, Category>> classifier;

public MessageEntry() throws MalformedURLException {
this.timestamp = new Date();
this.timestamp = new Date().getTime();
this.timestampDate = new Date(this.timestamp);
this.created_at = new Date();
this.on = null;
this.to = null;
Expand Down Expand Up @@ -113,7 +114,8 @@ public MessageEntry() throws MalformedURLException {
}

public MessageEntry(JSONObject json) {
Object timestamp_obj = lazyGet(json, AbstractObjectEntry.TIMESTAMP_FIELDNAME); this.timestamp = parseDate(timestamp_obj);
Object timestamp_obj = lazyGet(json, AbstractObjectEntry.TIMESTAMP_FIELDNAME); this.timestampDate = parseDate(timestamp_obj);
this.timestamp = this.timestampDate.getTime();
Object created_at_obj = lazyGet(json, AbstractObjectEntry.CREATED_AT_FIELDNAME); this.created_at = parseDate(created_at_obj);
Object on_obj = lazyGet(json, "on"); this.on = on_obj == null ? null : parseDate(on);
Object to_obj = lazyGet(json, "to"); this.to = to_obj == null ? null : parseDate(to);
Expand Down Expand Up @@ -174,8 +176,8 @@ public MessageEntry(JSONObject json) {
enrich();
}

public Date getTimestamp() {
return this.timestamp == null ? new Date() : this.timestamp;
public Date getTimestampDate() {
return this.timestampDate == null ? new Date() : this.timestampDate;
}

public Date getCreatedAt() {
Expand Down Expand Up @@ -310,6 +312,14 @@ public void setLocationPoint(double[] location_point) {
this.location_point = location_point;
}

public void setPostId() {
this.postId = String.valueOf(this.timestamp) + String.valueOf(this.created_at.getTime());
}

public String getPostId() {
return String.valueOf(this.postId);
}

/**
* @return [longitude, latitude] which is inside of getLocationRadius() from getLocationPoint()
*/
Expand Down Expand Up @@ -561,7 +571,7 @@ public JSONObject toJSON(final UserEntry user, final boolean calculatedData, fin
JSONObject m = new JSONObject(true);

// tweet data
m.put(AbstractObjectEntry.TIMESTAMP_FIELDNAME, utcFormatter.print(getTimestamp().getTime()));
m.put(AbstractObjectEntry.TIMESTAMP_FIELDNAME, utcFormatter.print(getTimestampDate().getTime()));
m.put(AbstractObjectEntry.CREATED_AT_FIELDNAME, utcFormatter.print(getCreatedAt().getTime()));
if (this.on != null) m.put("on", utcFormatter.print(this.on.getTime()));
if (this.to != null) m.put("to", utcFormatter.print(this.to.getTime()));
Expand Down

0 comments on commit 28efecf

Please sign in to comment.