<a href="https://colab.research.google.com/github/mibeaum/ML/blob/master/Domain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Domain Classification**

Proposed to split a given URL into initial subdomain name and domain name
![domain](https://github.com/mibeaum/ML/blob/master/domain.png?raw=1)

The data csv columns:
*   'querysub1' = domain name
*   'querysub(n)' = initial sub domain (last 'querysub(n) that is populated)



In [1]:
import numpy as np  # Arrays, matrices and functions on them. Required by Pandas, below
import pandas as pd # A data analysis library
from sklearn.model_selection import train_test_split # scikit-learn, machine learning tools
import matplotlib.pyplot as plt # A plotting library
import seaborn as sns # Built on matplotlib, facilitates aesthetically pleasing plots
import datetime
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import metrics

# General settings
sns.set_style('whitegrid') # Plots will have a white grid
# Variables that will help us work with the classes
class_names = ['workstation', 'server']
class_colors = ['darkorange', 'steelblue']

classifier_3NN = KNeighborsClassifier(n_neighbors=3, metric='euclidean')
classifier_rf = RandomForestClassifier()
classifier_svc = SVC()

  import pandas.util.testing as tm


Functions

In [2]:
def load_data(filename):
    ''' Load the data from a file in a GitHub repo '''
    url_root = 'https://raw.githubusercontent.com/mibeaum/ML/master'
    url = url_root+'/'+filename    
    df = pd.read_csv(url, sep=',', header=0)
    print('Loaded from', url)
    return df

In [13]:
filename = 'data.csv'
df = load_data(filename) 

df

Loaded from https://raw.githubusercontent.com/mibeaum/ML/master/data.csv


Unnamed: 0,server,dateTime,threadID,context,internalPacketIdentifier,tcpipIndicator,sendReceiveIndicatorinFile,remoteIP,xid,response,query,querytld,querysub1,querysub2,querysub3,querysub4,querysub5,querysub6,querysub7,querysub8,querysub9,querysub10,udpQuestionInfoAt,socket,remoteIPAddr,remotePort,timeQuery,queued,expire,bufLength,msgLength,messageXID,messageFlags,messageFlagsQR,messageFlagsOPCODE,messageFlagsAA,messageFlagsTC,messageFlagsRD,messageFlagsRA,messageFlagsZ,messageFlagsCD,messageFlagsAD,messageFlagsRCODE,messageFlagsQCOUNT,messageFlagsACOUNT,messageFlagsNSCOUNT,messageFlagsARCOUNT,Server
0,0,01/06/202008:11:10,1014,PACKET,0000026335FA1560,UDP,Rcv,10.0.0.1,b698,,CR+O54yt4mUy90jzcodsOg==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,CR+O54yt4mUy90jzcodsOg==,,,,,0000026335FA1560,740,10.50.35.204,34348,751243,0,0,0x0fa0,0x0029,0xb698,0x0100,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,N,
1,0,01/06/202009:23:02,1014,PACKET,0000026335FA1560,UDP,Snd,10.0.0.1,b698,Q,CR+O54yt4mUy90jzcodsOg==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,CR+O54yt4mUy90jzcodsOg==,,,,,0000026335FA1560,740,10.50.35.204,34348,751243,0,0,0x0fa0,0x007c,0xb698,0x8583,1,0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,3,1.0,0.0,1.0,0.0,N,
2,1,01/06/202019:30:22,1014,PACKET,0000026335C83DB0,UDP,Rcv,10.0.0.1,6507,,IgIrw9+rtg0TJ5WH7dtOZQ==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,IgIrw9+rtg0TJ5WH7dtOZQ==,,,,,0000026335C83DB0,740,10.50.35.204,54758,751243,0,0,0x0fa0,0x0029,0x6507,0x0100,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,N,
3,1,01/06/202020:03:23,1014,PACKET,0000026335C83DB0,UDP,Snd,10.0.0.1,6507,Q,IgIrw9+rtg0TJ5WH7dtOZQ==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,IgIrw9+rtg0TJ5WH7dtOZQ==,,,,,0000026335C83DB0,740,10.50.35.204,54758,751243,0,0,0x0fa0,0x007c,0x6507,0x8583,1,0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,3,1.0,0.0,1.0,0.0,N,
4,0,01/06/202010:19:31,1014,PACKET,0000026338FC9920,UDP,Rcv,10.0.0.1,452c,,af1s9DODJiTLlpIlPcKHqg==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,af1s9DODJiTLlpIlPcKHqg==,,,,,0000026338FC9920,740,10.50.35.204,35367,751243,0,0,0x0fa0,0x0029,0x452c,0x0100,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,N,
5,0,01/06/202011:23:21,1014,PACKET,0000026338FC9920,UDP,Snd,10.0.0.1,452c,Q,af1s9DODJiTLlpIlPcKHqg==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,af1s9DODJiTLlpIlPcKHqg==,,,,,0000026338FC9920,740,10.50.35.204,35367,751243,0,0,0x0fa0,0x007c,0x452c,0x8583,1,0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,3,1.0,0.0,1.0,0.0,N,
6,0,01/06/202011:30:12,1014,PACKET,00000263351894F0,UDP,Rcv,10.0.0.2,,,LQc/ynfQRCJMd7TSrLnN5Q==.q0YYXIYqpVjaaa+o8FhvP...,BrDDM5zokQiZhrbLyyiNYQ==,q0YYXIYqpVjaaa+o8FhvPA==,LQc/ynfQRCJMd7TSrLnN5Q==,,,,,,,,00000263351894F0,740,10.50.56.21,53568,751243,0,0,0x0fa0,0x0021,0xa056,0x0100,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,N,
7,0,01/06/202012:23:48,1014,PACKET,00000263351894F0,UDP,Snd,10.0.0.2,,R,LQc/ynfQRCJMd7TSrLnN5Q==.q0YYXIYqpVjaaa+o8FhvP...,BrDDM5zokQiZhrbLyyiNYQ==,q0YYXIYqpVjaaa+o8FhvPA==,LQc/ynfQRCJMd7TSrLnN5Q==,,,,,,,,00000263351894F0,740,10.50.56.21,53568,751243,0,0,0x0fa0,0x0062,0xa056,0x8583,1,0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,3,1.0,0.0,1.0,0.0,N,
8,1,01/06/202020:27:51,1014,PACKET,000002633810C960,UDP,Rcv,10.0.0.1,25ca,,49Y/LJd4619AIg8gZBG/8A==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,49Y/LJd4619AIg8gZBG/8A==,,,,,000002633810C960,740,10.50.35.204,38395,751243,0,0,0x0fa0,0x0029,0x25ca,0x0100,0,0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,N,
9,1,01/06/202020:49:12,1014,PACKET,000002633810C960,UDP,Snd,10.0.0.1,25ca,Q,49Y/LJd4619AIg8gZBG/8A==.uxaI8rtakc2a3NYkhCZsG...,h0wobGq/qu26mbwmaSuEfQ==,eamMvh7i9p8yS1FnyeNr9w==,3nj7le0qJ7hrwsByvmH/1w==,Or4JIvm/JaRpBwX7P1phWg==,uxaI8rtakc2a3NYkhCZsGQ==,49Y/LJd4619AIg8gZBG/8A==,,,,,000002633810C960,740,10.50.35.204,38395,751243,0,0,0x0fa0,0x007c,0x25ca,0x8583,1,0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,3,1.0,0.0,1.0,0.0,N,


In [16]:
for index, row in df.iterrows():
    if not pd.isnull(df.loc[index, 'querysub1']):   
      print("1: " + row['querysub1'] + ",10:" + row['querysub10'])
    

1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335FA1560
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335FA1560
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335C83DB0
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335C83DB0
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026338FC9920
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026338FC9920
1: q0YYXIYqpVjaaa+o8FhvPA==,10:00000263351894F0
1: q0YYXIYqpVjaaa+o8FhvPA==,10:00000263351894F0
1: eamMvh7i9p8yS1FnyeNr9w==,10:000002633810C960
1: eamMvh7i9p8yS1FnyeNr9w==,10:000002633810C960
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335FA1560
1: LQc/ynfQRCJMd7TSrLnN5Q==,10:0000026335C83DB0
1: LQc/ynfQRCJMd7TSrLnN5Q==,10:0000026335C83DB0
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026338FC9920
1: eamMvh7i9p8yS1FnyeNr9w==,10:00000263351894F0
1: eamMvh7i9p8yS1FnyeNr9w==,10:000002633810C960
1: eamMvh7i9p8yS1FnyeNr9w==,10:000002633810C960
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335FA1560
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335FA1560
1: eamMvh7i9p8yS1FnyeNr9w==,10:0000026335C83DB0
1: eamMvh7i9p8yS1FnyeNr9w==,10:000002633