In [17]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib as mpl
import json
import requests
import geoip2.database
from datetime import datetime  
from datetime import timedelta  
from dateutil import parser

from scipy.stats import spearmanr
from scipy.stats import rankdata
from scipy.stats import chisquare

In [13]:
files = [ "database/cmds_sequence_2016-07-01.csv", "database/cmds_sequence_2016-07-02.csv", "database/cmds_sequence_2016-07-03.csv", "database/cmds_sequence_2016-07-04.csv", 
    "database/cmds_sequence_2016-07-05.csv", "database/cmds_sequence_2016-07-06.csv", "database/cmds_sequence_2016-07-07.csv", "database/cmds_sequence_2016-07-08.csv", 
    "database/cmds_sequence_2016-07-09.csv", "database/cmds_sequence_2016-07-10.csv", "database/cmds_sequence_2016-07-11.csv", "database/cmds_sequence_2016-07-12.csv", 
    "database/cmds_sequence_2016-07-13.csv", "database/cmds_sequence_2016-07-14.csv", "database/cmds_sequence_2016-07-15.csv", "database/cmds_sequence_2016-07-16.csv", 
    "database/cmds_sequence_2016-07-17.csv", "database/cmds_sequence_2016-07-18.csv", "database/cmds_sequence_2016-07-19.csv", "database/cmds_sequence_2016-07-20.csv", 
    "database/cmds_sequence_2016-07-21.csv", "database/cmds_sequence_2016-07-22.csv", "database/cmds_sequence_2016-07-23.csv", "database/cmds_sequence_2016-07-24.csv", 
    "database/cmds_sequence_2016-07-25.csv", "database/cmds_sequence_2016-07-26.csv", "database/cmds_sequence_2016-07-27.csv", "database/cmds_sequence_2016-07-28.csv", 
    "database/cmds_sequence_2016-07-29.csv", "database/cmds_sequence_2016-07-30.csv", "database/cmds_sequence_2016-07-31.csv", "database/cmds_sequence_2016-08-29.csv", 
    "database/cmds_sequence_2016-08-30.csv", "database/cmds_sequence_2016-08-31.csv", "database/cmds_sequence_2016-09-01.csv", "database/cmds_sequence_2016-09-02.csv", 
    "database/cmds_sequence_2016-09-03.csv", "database/cmds_sequence_2016-09-04.csv", "database/cmds_sequence_2016-09-05.csv", "database/cmds_sequence_2016-09-06.csv", 
    "database/cmds_sequence_2016-09-07.csv", "database/cmds_sequence_2016-09-08.csv", "database/cmds_sequence_2016-09-09.csv", "database/cmds_sequence_2016-09-10.csv", 
    "database/cmds_sequence_2016-09-11.csv", "database/cmds_sequence_2016-09-12.csv", "database/cmds_sequence_2016-09-13.csv"
]

# Read in IP geolocation database
reader = geoip2.database.Reader('GeoLite2-Country.mmdb')

# create countrycode => # of IP addresses mapping
ip_mapping_df = pd.read_csv('ip_addresses_country.csv')
ip_mapping = ip_mapping_df['IP addresses']
ip_mapping.index = ip_mapping_df['ISO country code']
time_mapping = ip_mapping_df['Timezone']

def get_country_code(ip):
    try:
        return reader.country(ip).country.iso_code
    except geoip2.errors.AddressNotFoundError:
        return "Not in Database"

def read_data(path: str) -> pd.DataFrame:
    """ Reads csv at `path` into a pandas dataframe """
    df = pd.read_csv(path, 
                   sep="\$\$",
                   header=None, 
                   names=["Timestamp", "Src IP", "Src Port", "Dest IP", "Dest Port", "Commandlist"], 
                   index_col=False)
    df['Commandlist'] = df['Commandlist'].apply(parse_command_string)
    return df

def login_attempts(data: pd.DataFrame) -> pd.Series:
    """ Returns amount of login attempts the honeypot got for a day """
    return len(data)

def parse_command_string(commandstring: str) -> list:
    """ Parses a commandstring from the dataset into a python list """
    # Remove square brackets and first/last apostrophe
    commandstring = commandstring[2:-2]
    
    # Split on ', '
    commandstring = commandstring.split(r"', '")
    
    return commandstring

def get_isp(ip: str) -> str:
    """ Returns the ISP given the IP address """
    url = "http://ip-api.com/json/" + ip
    response = requests.get(url=url)
    json = response.json()
    if json["status"] == "fail":
        return "unknown"
    return json["isp"]

def country_counts(data: pd.DataFrame) -> pd.Series:
    """ Return value counts of src IP countries """
    return data['country'].value_counts()

In [15]:
data = read_data(files[0])
print("Login attempts", login_attempts(data))



Login attempts 54857


In [4]:
print(time_mapping)

0       5.0
1       2.0
2       1.0
3       1.0
4     -12.0
       ... 
245    12.0
246     NaN
247     3.0
248     2.0
249     2.0
Name: Timezone, Length: 250, dtype: float64


In [6]:
morning = 0    # 06:00-12:00
afternoon = 0  # 12:00-18:00
evening = 0    # 18:00-00:00
night = 0      # 00:00-06:00
for f in files:
    print("Reading file:", f)
    
    # Read data
    data = read_data(f)
    
    for index, row in data.iterrows():
        try:
            time = row['Timestamp']
            country_code = get_country_code(row['Src IP'])
            time_zone = ip_mapping_df.loc[ip_mapping_df['ISO country code'] == country_code]['Timezone'].values[0]
            time = parser.parse(time)
            time = time + timedelta(hours=time_zone)
            
            if time.hour >= 6 and time.hour < 12:
                morning += 1
            elif time.hour >= 12 and time.hour < 18:
                afternoon += 1
            elif time.hour >= 18 and time.hour < 24:
                evening += 1
            elif time.hour >= 0 and time.hour < 6:
                night += 1
        except:
            print("Something went wrong")
    print("Morning: ", morning)
    print("Afternoon: ", afternoon)
    print("Evening: ", evening)
    print("Night: ", night)
print("Morning: ", morning)
print("Afternoon: ", afternoon)
print("Evening: ", evening)
print("Night: ", night)

Reading file: database/cmds_sequence_2016-07-01.csv




Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  11140
Afternoon:  14174
Evening:  14319
Night:  15184
Reading file: database/cmds_sequence_2016-07-02.csv
Something went wrong
Something went wrong
So

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  97572
Afternoon:  110639
Evening:  121884
Night:  104056
Reading file: database/cmds_sequence_2016-07-09.csv
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  107389
Afternoon:  122664
Evening:  134014
Night:  114175
Reading file: database/cmds_sequence

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something wen

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  267743
Afternoon:  302473
Evening:  326525
Night:  295909
Reading file: database/cmds_sequence_2016-07-24.csv
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wron

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  408844
Afternoon:  43935

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  512283
Afternoon:  540504
Evening:  624732
Night:  556894
Reading file: database/cmds_sequence_2016-09-01.csv
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wron

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something wen

Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  696080
Afternoon:  730170
Evening:  881838
Night:  752078
Reading file: database/cmds_sequence_2016-09-07.csv
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Morning:  707686
Afternoon:  742493
Evening:  896417
Night:  764354
Reading file: database/cmds_sequence_2016-09-08.csv
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Something went wrong
Some

In [36]:
chi_result = chisquare([780430, 810375, 989159, 831268])
print(chi_result)
chi_result = chisquare([13,9,8,11,5,14])
print(chi_result)

Power_divergenceResult(statistic=30598.56517997017, pvalue=0.0)
Power_divergenceResult(statistic=5.6, pvalue=0.34710506828171545)
