In [1]:
!pip install tldextract

[33mYou are using pip version 10.0.1, however version 20.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import boto3
import json
import time
import tldextract
from pprint import pprint

In [3]:
def predict_one_dga_value(sm_client, features, endpoint_name):
    # print('Using model endpoint {} to predict dga for this feature vector: {}'.format(endpoint_name, features))
    is_dga = False
    body = features + '\n'
    start_time = time.time()

    response = sm_client.invoke_endpoint(
                        EndpointName=endpoint_name,
                        ContentType='text/csv',
                        Body=body)
    predicted_value = json.loads(response['Body'].read())
    duration = time.time() - start_time
    if predicted_value > 0.5:
        is_dga = True
    return is_dga
    

In [4]:

VALID_CHARS = 'abcdefghijklmnopqrstuvwxyz0123456789-_'
LOOKUP_TABLE = None

def encode_fqdn(fqdn='www.google.com'):
    global VALID_CHARS
    global LOOKUP_TABLE
    if not LOOKUP_TABLE:
        LOOKUP_TABLE = dict()
        idx = 1
        for c in VALID_CHARS:
            LOOKUP_TABLE[c] = int(idx)
            idx += int(1)
            
    ds = tldextract.extract(fqdn)
    domain = ds.domain
    rvalue = list()
    for c in domain:
        rvalue.append(str(LOOKUP_TABLE[c]))
    for _ in range(len(rvalue), 63):
        rvalue.append('0')
    return ','.join(rvalue)

    

In [5]:
runtime_sm_client = boto3.client(service_name='sagemaker-runtime')
right = float(0)
wrong = float(0)
with open('assignment.csv', mode='r', encoding='utf-8') as ih:
    line = ih.readline()
    while True:
        line = ih.readline()
        if line == '':
            break
        tokens = line.split(',')
        domain = tokens[0]
        threat = tokens[1]
        domain = domain.lstrip().rstrip().lower()
        threat = threat.lstrip().rstrip().lower()
        features = encode_fqdn(fqdn=domain)
        p = predict_one_dga_value(sm_client=runtime_sm_client, features=features, endpoint_name='dga-endpoint-0')
        if p == True:
            print(domain + ', dga')
        else:
            print(domain + ', benign')
        
        if (p == True and threat == 'dga') or (p == False and threat == 'benign'):
            right += 1.0
        else:
            wrong += 1.0
            
print('Right: ', right, ' Wrong: ', wrong, 'Score: ', (right / (right + wrong)) * 100.0)
        
        


www.dutchwebdesign.com, benign
www.pploulsosohvmkv.com, dga
www.cfsjdqwdhmwkiv.com, dga
www.uocoqgiusyeciouaimcauykqswsymo.com, dga
www.total-toolbar.com, benign
www.australianwoodenboatfestival.com, benign
www.okdbnxoauhzrawyu.com, dga
www.409485.com, benign
www.m8sdetc0u81lgdcpshoxsvy.com, dga
www.qkdccn.com, dga
www.bangsfamilydentistry.com, benign
www.g2g0kfwnw65lqri.com, dga
www.rukketsports.com, benign
www.eayprkbngtpgmoi.com, benign
www.docheads.com, benign
www.xretmysxptli.com, dga
www.trialbriefs.com, benign
www.cewesckwaqc.com, dga
www.jknwld.com, dga
www.kuekgowceqaomwkyeyys.com, dga
www.cyneugril.com, benign
www.grqbkcqbjjrncdoxv.com, dga
www.eikmcosam.com, benign
www.rohrhofer.com, benign
www.votegivegrow.com, dga
www.mamysymikazadoci.com, dga
www.kosgkkammkaflsdhy.com, benign
www.stewed.com, benign
www.motransfer.com, benign
www.paulinepisano.com, benign
www.73666.com, benign
www.phunuthudo.com, benign
www.84807.com, benign
www.abexteriors.com, benign
www.qquorah.com, dga