Skip to content

Commit

Permalink
资产探测任务分析
Browse files Browse the repository at this point in the history
  • Loading branch information
babymm authored and babymm committed Dec 27, 2019
1 parent a1c7f2d commit 1938ffd
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 4 deletions.
6 changes: 4 additions & 2 deletions mumu-spark.iml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
<orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-client:2.6.5" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-mesos_2.11:2.2.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.mesos:mesos:shaded-protobuf:1.0.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.mesos:mesos:1.5.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.mesos:mesos:1.5.2" level="project" />
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:3.5.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-streaming_2.11:2.2.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.spark:spark-streaming-kafka_2.11:1.6.3" level="project" />
Expand Down Expand Up @@ -221,7 +221,6 @@
<orderEntry type="library" name="Maven: org.codehaus.janino:janino:3.0.0" level="project" />
<orderEntry type="library" name="Maven: org.codehaus.janino:commons-compiler:3.0.0" level="project" />
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.5.3" level="project" />
<orderEntry type="library" name="Maven: mysql:mysql-connector-java:5.1.38" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-mllib_2.11:2.2.0" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.scalanlp:breeze_2.11:0.13.1" level="project" />
<orderEntry type="library" scope="PROVIDED" name="Maven: org.scalanlp:breeze-macros_2.11:0.13.1" level="project" />
Expand Down Expand Up @@ -292,5 +291,8 @@
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
<orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.6.1" level="project" />
<orderEntry type="library" name="Maven: com.gbase:gbase-connector-java:8.8" level="project" />
<orderEntry type="library" name="Maven: org.postgresql:postgresql:42.2.5" level="project" />
<orderEntry type="library" name="Maven: mysql:mysql-connector-java:5.1.38" level="project" />
</component>
</module>
30 changes: 30 additions & 0 deletions src/main/java/com/lovecws/mumu/spark/sql/AtdSparkAnalyze.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package com.lovecws.mumu.spark.sql;

import com.lovecws.mumu.spark.MumuSparkConfiguration;
import org.apache.spark.sql.*;

/**
* @program: mumu-spark
* @description: ${description}
* @author: 甘亮
* @create: 2019-12-12 20:00
**/
public class AtdSparkAnalyze {

public void analyze() {
String filePath = "E:\\data\\industrydataprocessing\\atd\\storage\\20191212";
SQLContext sqlContext = new MumuSparkConfiguration().sqlContext();
Dataset<Row> rowDataset = sqlContext.read().parquet(filePath);

rowDataset.show(10);
try {
rowDataset.createTempView("atd");
rowDataset.sqlContext().sql("select count(1) as counter from atd").show(100);
rowDataset.sqlContext().sql("select count(1) as counter from atd where corp_name is not null or src_corp_name is not null").show(100);
rowDataset.sqlContext().sql("select count(1) as counter from atd where corp_name is null and src_corp_name is null").show(100);
rowDataset.sqlContext().sql("select distinct dst_ip from atd where corp_name is null and src_corp_name is null and dst_ip_province='湖南省' union select distinct src_ip from atd where corp_name is null and src_corp_name is null and src_ip_province='湖南省'").coalesce(1).write().mode(SaveMode.Overwrite).csv("E:\\data\\industrydataprocessing\\atd\\corpiphunan");
} catch (AnalysisException e) {
e.printStackTrace();
}
}
}
22 changes: 20 additions & 2 deletions src/main/java/com/lovecws/mumu/spark/sql/GynetresSparkAnalyze.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.storage.StorageLevel;

import java.io.Serializable;

Expand All @@ -29,14 +30,31 @@ public void analyze() {
}

public void deviceSrcondaryNamecnAnalyze() {
String filePath = "G:\\ACTPython\\trunk\\test\\cache\\gynetres.json";
//String filePath = "G:\\databack\\gynetres\\20191203\\industry_gynetres_20190807.json";
String filePath = "G:\\ACTPython\\gynetres\\trunk\\test\\cache\\gynetres.json";
// String filePath = "G:\\ACTPython\\gynetres\\trunk\\test\\cache\\bugynetres.json";
// String filePath = "E:\\data\\mumuflink\\atd\\localfile\\gynetres";
//String filePath = "G:\\\\databack\\\\gynetres\\\\20191203\\industry_gynetres_20191100.json";
// String filePath = "G:\\databack\\gynetres\\20191204\\industry_gynetres_20191100";

SQLContext sqlContext = new MumuSparkConfiguration().sqlContext();
Dataset<Row> rowDataset = sqlContext.read().json(filePath);
rowDataset.persist(StorageLevel.MEMORY_AND_DISK_2());

try {
rowDataset.printSchema();
rowDataset.createTempView("gynetres");
rowDataset.sqlContext().sql("select device.secondary.namecn,count(1) as counter from gynetres group by device.secondary.namecn order by counter desc").show(100);
// rowDataset.sqlContext().sql("select device.primary.name as devicePrimaryName,device.primary.namecn as devicePrimaryNamecn,device.secondary.namecn as deviceSecondaryNamecn,service as service,count(1) as counter from gynetres group by device.primary.name,device.primary.namecn,device.secondary.name,device.secondary.namecn,service order by counter desc").show(100);
// rowDataset.sqlContext().sql("select device.primary.namecn as devicePrimaryName,count(1) as counter from gynetres group by device.primary.namecn order by counter desc").show(100);
// rowDataset.sqlContext().sql("select service as service,count(1) as counter from gynetres where device.primary.namecn='未知' group by service order by counter desc").show(100);
// rowDataset.sqlContext().sql("select device.secondary.namecn,service,count(1) as counter from gynetres group by device.secondary.namecn,service order by counter desc").show(100);
// rowDataset.sqlContext().sql("select device.secondary.namecn,service,count(1) as counter from gynetres where device.primary.namecn='物联网设备' group by device.secondary.namecn,service order by counter desc").show(100);
// rowDataset.sqlContext().sql("select vendor,vendor_source,count(1) as counter from gynetres group by vendor,vendor_source order by counter desc").show(100);
// rowDataset.sqlContext().sql("select distinct vendor from gynetres").coalesce(1).show(100);
// rowDataset.sqlContext().sql("select device.primary.name as devicePrimaryName,device.primary.namecn as devicePrimaryNamecn,device.secondary.name as deviceSecondaryName,device.secondary.namecn as deviceSecondaryNamecn from gynetres where service='http'").coalesce(1).show(100);
rowDataset.sqlContext().sql("select to_date(create_time,'yyyy-MM-dd'),count(1) counter from gynetres group by to_date(create_time,'yyyy-MM-dd')").coalesce(1).show(100);
// rowDataset.sqlContext().sql("select count(1) counter from gynetres where create_time>update_time").coalesce(1).show(100);

} catch (AnalysisException e) {
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.junit.Test;

import java.util.Map;
import java.util.concurrent.Executors;

/**
* @author babymm
Expand Down
17 changes: 17 additions & 0 deletions src/test/java/com/lovecws/mumu/spark/sql/AtdSparkAnalyzeTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.lovecws.mumu.spark.sql;

import org.junit.Test;

/**
* @program: mumu-spark
* @description: ${description}
* @author: 甘亮
* @create: 2019-12-12 20:02
**/
public class AtdSparkAnalyzeTest {

@Test
public void analyze() {
new AtdSparkAnalyze().analyze();
}
}

0 comments on commit 1938ffd

Please sign in to comment.