In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%precision 4
%reload_ext autoreload
# Load the TensorBoard notebook extension.
%load_ext tensorboard

import re
import sys
import math
import random
import os
from datetime import datetime

import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
from IPython.core.interactiveshell import InteractiveShell
import warnings


warnings.filterwarnings(action='once')
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', None)

In [None]:
import scrd_tool

In [None]:
from scrd_tool import feature_engineering

In [None]:
from elasticsearch import Elasticsearch, helpers
from pyspark import SparkContext
import pyspark
from pyspark import SparkConf
from pyspark.sql import SQLContext
import json
import findspark
import os,sys


SCP_ES_HOST = '192.168.10.150'
SCP_ES_PORT = 9200
SCP_DNS_NETFLOW_INDEX = '*-chewbacca-session*'
SCROLL = "12h"
SIZE = 1000

SPARK_JARS = '/root/spark_folder/elasticsearch-hadoop-7.3.1/dist/elasticsearch-spark-20_2.11-7.3.1.jar'
SCP_SPARK_MASTER_URL = "local[8]"
APP_NAME = "chewbacca session flow"
os.environ['SPARK_HOME'] = "/root/spark"
sys.path.append("/root/spark/python/")
sys.path.append("/root/spark/python/lib/py4j-0.10.7-src.zip")


findspark.init()


sc_conf = SparkConf() \
            .setMaster(SCP_SPARK_MASTER_URL) \
            .setAppName(APP_NAME) \
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
            .set("spark.jars", SPARK_JARS)
sc = SparkContext(conf=sc_conf, pyFiles=[])

In [None]:
# DNS Log Query
query = {
    # define required field 
    "_source": ["dns.status-term", "dns.status-term-cnt", 'timestamp', 'dnsho'],
    # filter condition
    "query": {
        "bool": {
            "should": [
                {"term": {"ipSrc": "192.168.0.247"}},
                {"term": {"ipSrc": "192.168.10.100"}}
            ],
            "minimum_should_match" : 1,
        }
    }
}

conf = {"es.nodes" : SCP_ES_HOST, 
        "es.port" : str(SCP_ES_PORT), 
        "es.nodes.data.only" : "true", 
        "es.resource.read" : SCP_DNS_NETFLOW_INDEX,
        "es.query" : json.dumps(query)
       }

# create rdd
rdd = sc.newAPIHadoopRDD(
    inputFormatClass="org.elasticsearch.hadoop.mr.EsInputFormat",
    keyClass="org.apache.hadoop.io.NullWritable",
    valueClass="org.elasticsearch.hadoop.mr.LinkedMapWritable",
    conf=conf)

In [None]:
df = pd.DataFrame([[1,2,3], [4,5,6]], columns=['a', 'b', 'c'])
df['cat'] = pd.Categorical(['cat', 'dog'])
feature_engineering.run(df)

In [None]:
import tensorflow as tf
from packaging import version
from tensorflow import keras

print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."

In [None]:
# Define the model.
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

In [None]:
(train_images, train_labels), _ = keras.datasets.fashion_mnist.load_data()
train_images = train_images / 255.0

In [None]:
# Create log folder
logs_base_dir = os.path.abspath("./logs/")
os.makedirs(logs_base_dir, exist_ok=True)
logs_fitbase_dir = os.path.join(logs_base_dir, "fit")
os.makedirs(logs_fitbase_dir, exist_ok=True)
logdir = os.path.join(logs_fitbase_dir, datetime.now().strftime("%Y%m%d-%H%M%S"))
os.makedirs(logdir, exist_ok=True)

# Define the Keras TensorBoard callback.

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

# Train the model.
model.fit(
    train_images,
    train_labels, 
    batch_size=64,
    epochs=5, 
    callbacks=[tensorboard_callback])

In [None]:
logs_base_dir = "./logs/"
os.makedirs(logs_base_dir, exist_ok=True)
%tensorboard --logdir {logs_base_dir}