Skip to content

Commit

Permalink
[dbs-leipzig#951] add degree distribution and average degree to sampl…
Browse files Browse the repository at this point in the history
…ing statistics
  • Loading branch information
foerster-finsternis committed Aug 26, 2018
1 parent 066df4f commit c112033
Show file tree
Hide file tree
Showing 12 changed files with 633 additions and 0 deletions.
@@ -0,0 +1,66 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.utils.sampling.statistics;

import org.apache.flink.api.common.ProgramDescription;
import org.apache.flink.api.java.DataSet;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.model.api.epgm.LogicalGraph;
import org.gradoop.flink.model.impl.functions.tuple.ObjectTo1;
import org.gradoop.flink.model.impl.operators.sampling.statistics.AverageDegree;
import org.gradoop.flink.model.impl.operators.sampling.statistics.SamplingEvaluationConstants;
import org.gradoop.flink.model.impl.operators.statistics.writer.StatisticWriter;

/**
* Calls the average degree computation for a logical graph. Writes the result to a csv-file
* named {@value SamplingEvaluationConstants#FILE_AVERAGE_DEGREE}
* in the output directory, containing a single line with the average degree value, e.g.:
*
* BOF
* 4
* EOF
*/
public class AverageDegreeRunner extends AbstractRunner implements ProgramDescription {

/**
* Calls the average degree computation for the graph.
*
* args[0] - path to graph
* args[1] - format of graph (csv, json, indexed)
* args[2] - output path
*
* @param args command line arguments
* @throws Exception in case of read/write failure
*/
public static void main(String[] args) throws Exception {

LogicalGraph graph = readLogicalGraph(args[0], args[1]);

DataSet<Long> averageDegree = graph.callForGraph(new AverageDegree()).getGraphHead()
.map(gh -> gh.getPropertyValue(SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_DEGREE)
.getLong());

StatisticWriter.writeCSV(averageDegree.map(new ObjectTo1<>()),
appendSeparator(args[2]) + SamplingEvaluationConstants.FILE_AVERAGE_DEGREE);

getExecutionEnvironment().execute("Sampling Statistics: Average degree");
}

@Override
public String getDescription() {
return AverageDegreeRunner.class.getName();
}
}
@@ -0,0 +1,67 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.utils.sampling.statistics;

import org.apache.flink.api.common.ProgramDescription;
import org.apache.flink.api.java.DataSet;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.model.api.epgm.LogicalGraph;
import org.gradoop.flink.model.impl.functions.tuple.ObjectTo1;
import org.gradoop.flink.model.impl.operators.sampling.statistics.AverageIncomingDegree;
import org.gradoop.flink.model.impl.operators.sampling.statistics.SamplingEvaluationConstants;
import org.gradoop.flink.model.impl.operators.statistics.writer.StatisticWriter;

/**
* Calls the average incoming degree computation for a logical graph. Writes the result to a
* csv-file named {@value SamplingEvaluationConstants#FILE_AVERAGE_INCOMING_DEGREE}
* in the output directory, containing a single line with the average incoming degree value, e.g.:
*
* BOF
* 4
* EOF
*/
public class AverageIncomingDegreeRunner extends AbstractRunner implements ProgramDescription {

/**
* Calls the average incoming degree computation for the graph.
*
* args[0] - path to graph
* args[1] - format of graph (csv, json, indexed)
* args[2] - output path
*
* @param args command line arguments
* @throws Exception in case of read/write failure
*/
public static void main(String[] args) throws Exception {

LogicalGraph graph = readLogicalGraph(args[0], args[1]);

DataSet<Long> averageIncomingDegree = graph.callForGraph(new AverageIncomingDegree())
.getGraphHead()
.map(gh -> gh.getPropertyValue(
SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_INCOMING_DEGREE).getLong());

StatisticWriter.writeCSV(averageIncomingDegree.map(new ObjectTo1<>()),
appendSeparator(args[2]) + SamplingEvaluationConstants.FILE_AVERAGE_INCOMING_DEGREE);

getExecutionEnvironment().execute("Sampling Statistics: Average incoming degree");
}

@Override
public String getDescription() {
return AverageIncomingDegreeRunner.class.getName();
}
}
@@ -0,0 +1,67 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.utils.sampling.statistics;

import org.apache.flink.api.common.ProgramDescription;
import org.apache.flink.api.java.DataSet;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.flink.model.api.epgm.LogicalGraph;
import org.gradoop.flink.model.impl.functions.tuple.ObjectTo1;
import org.gradoop.flink.model.impl.operators.sampling.statistics.AverageOutgoingDegree;
import org.gradoop.flink.model.impl.operators.sampling.statistics.SamplingEvaluationConstants;
import org.gradoop.flink.model.impl.operators.statistics.writer.StatisticWriter;

/**
* Calls the average outgoing degree computation for a logical graph. Writes the result to a
* csv-file named {@value SamplingEvaluationConstants#FILE_AVERAGE_OUTGOING_DEGREE}
* in the output directory, containing a single line with the average outgoing degree value, e.g.:
*
* BOF
* 4
* EOF
*/
public class AverageOutgoingDegreeRunner extends AbstractRunner implements ProgramDescription {

/**
* Calls the average outgoing degree computation for the graph.
*
* args[0] - path to graph
* args[1] - format of graph (csv, json, indexed)
* args[2] - output path
*
* @param args command line arguments
* @throws Exception in case of read/write failure
*/
public static void main(String[] args) throws Exception {

LogicalGraph graph = readLogicalGraph(args[0], args[1]);

DataSet<Long> averageOutgoingDegree = graph.callForGraph(new AverageOutgoingDegree())
.getGraphHead()
.map(gh -> gh.getPropertyValue(
SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_OUTGOING_DEGREE).getLong());

StatisticWriter.writeCSV(averageOutgoingDegree.map(new ObjectTo1<>()),
appendSeparator(args[2]) + SamplingEvaluationConstants.FILE_AVERAGE_OUTGOING_DEGREE);

getExecutionEnvironment().execute("Sampling Statistics: Average outgoing degree");
}

@Override
public String getDescription() {
return AverageOutgoingDegreeRunner.class.getName();
}
}
Expand Up @@ -17,6 +17,9 @@

import org.apache.flink.api.common.ProgramDescription;
import org.gradoop.examples.AbstractRunner;
import org.gradoop.utils.statistics.VertexDegreeDistributionRunner;
import org.gradoop.utils.statistics.VertexIncomingDegreeDistributionRunner;
import org.gradoop.utils.statistics.VertexOutgoingDegreeDistributionRunner;

/**
* Calls the computation of all given graph properties for a logical graph. Results are written
Expand All @@ -37,6 +40,12 @@ public class SamplingStatisticsRunner extends AbstractRunner implements ProgramD
*/
public static void main(String[] args) throws Exception {
GraphDensityRunner.main(args);
VertexDegreeDistributionRunner.main(args);
VertexOutgoingDegreeDistributionRunner.main(args);
VertexIncomingDegreeDistributionRunner.main(args);
AverageDegreeRunner.main(args);
AverageIncomingDegreeRunner.main(args);
AverageOutgoingDegreeRunner.main(args);
}

@Override
Expand Down
@@ -0,0 +1,62 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.model.impl.operators.sampling.statistics;

import org.apache.flink.api.common.functions.CrossFunction;
import org.apache.flink.api.java.DataSet;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.pojo.GraphHead;
import org.gradoop.flink.model.api.epgm.LogicalGraph;
import org.gradoop.flink.model.api.operators.UnaryGraphToGraphOperator;
import org.gradoop.flink.model.impl.operators.aggregation.functions.count.VertexCount;
import org.gradoop.flink.model.impl.operators.sampling.statistics.functions.CalculateAverageDegree;
import org.gradoop.flink.model.impl.operators.statistics.VertexDegrees;
import org.gradoop.flink.model.impl.tuples.WithCount;

/**
* Calculates the average degree of a graph and writes it to the graph head.
* Uses: ceiling( sum(vertex degrees) / |vertices| )
*/
public class AverageDegree implements UnaryGraphToGraphOperator {

/**
* {@inheritDoc}
*/
@Override
public LogicalGraph execute(LogicalGraph graph) {
graph = graph.aggregate(new VertexCount());
DataSet<GraphHead> newGraphHead = new VertexDegrees().execute(graph)
.sum(1)
.cross(graph.getGraphHead().first(1))
.with(new CrossFunction<WithCount<GradoopId>, GraphHead, GraphHead>() {
@Override
public GraphHead cross(WithCount<GradoopId> sumDeg, GraphHead graphHead) throws Exception {
graphHead.setProperty(
SamplingEvaluationConstants.PROPERTY_KEY_SUM_DEGREES, sumDeg.getCount());
return graphHead;
}
})
.map(new CalculateAverageDegree(SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_DEGREE));

return graph.getConfig().getLogicalGraphFactory()
.fromDataSets(newGraphHead, graph.getVertices(), graph.getEdges());
}

@Override
public String getName() {
return AverageDegree.class.getName();
}
}
@@ -0,0 +1,64 @@
/*
* Copyright © 2014 - 2018 Leipzig University (Database Research Group)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gradoop.flink.model.impl.operators.sampling.statistics;

import org.apache.flink.api.common.functions.CrossFunction;
import org.apache.flink.api.java.DataSet;
import org.gradoop.common.model.impl.id.GradoopId;
import org.gradoop.common.model.impl.pojo.GraphHead;
import org.gradoop.flink.model.api.epgm.LogicalGraph;
import org.gradoop.flink.model.api.operators.UnaryGraphToGraphOperator;
import org.gradoop.flink.model.impl.operators.aggregation.functions.count.VertexCount;
import org.gradoop.flink.model.impl.operators.sampling.statistics.functions.CalculateAverageDegree;
import org.gradoop.flink.model.impl.operators.statistics.IncomingVertexDegrees;
import org.gradoop.flink.model.impl.tuples.WithCount;

/**
* Calculates the average incoming degree of a graph and writes it to the graph head.
* Uses: ceiling( sum(vertex incoming degrees) / |vertices| )
*/
public class AverageIncomingDegree implements UnaryGraphToGraphOperator {

/**
* {@inheritDoc}
*/
@Override
public LogicalGraph execute(LogicalGraph graph) {

graph = graph.aggregate(new VertexCount());
DataSet<GraphHead> newGraphHead = new IncomingVertexDegrees().execute(graph)
.sum(1)
.cross(graph.getGraphHead().first(1))
.with(new CrossFunction<WithCount<GradoopId>, GraphHead, GraphHead>() {
@Override
public GraphHead cross(WithCount<GradoopId> sumDeg, GraphHead graphHead) throws Exception {
graphHead.setProperty(
SamplingEvaluationConstants.PROPERTY_KEY_SUM_DEGREES, sumDeg.getCount());
return graphHead;
}
})
.map(new CalculateAverageDegree(
SamplingEvaluationConstants.PROPERTY_KEY_AVERAGE_INCOMING_DEGREE));

return graph.getConfig().getLogicalGraphFactory()
.fromDataSets(newGraphHead, graph.getVertices(), graph.getEdges());
}

@Override
public String getName() {
return AverageIncomingDegree.class.getName();
}
}

0 comments on commit c112033

Please sign in to comment.