# Requests Analysis per hour

This analysis focusses on the hour of the day, when requests occur.



In [None]:
APP_NAME = "Request Analysis per hour of SSH and Telnet requests"


In [None]:
%load configuration.py

In [None]:
%load setup-pd.py


In [None]:
%load data.py


In [None]:
# Get logs per protocol category
# ================

# Logs per category allows to distinguish between the requests

df_ssh = df_log[df_log.category == 'ssh']
df_ssh = df_ssh[['origin', 'origin_id', 'timestamp', 'ip']]
df_tel = df_log[df_log.category == 'telnet']
df_tel = df_tel[['origin', 'origin_id', 'timestamp', 'ip']]

df_ssh = df_ssh.set_index('timestamp')
df_tel = df_tel.set_index('timestamp')


In [None]:
df_ts_ssh = df_ssh.groupby([pd.Grouper(freq='H'), 'origin', 'origin_id']).count()
df_ts_tel = df_tel.groupby([pd.Grouper(freq='H'), 'origin', 'origin_id']).count()

df_ts_ssh = df_ts_ssh.reset_index().set_index('timestamp')
df_ts_tel = df_ts_tel.reset_index().set_index('timestamp')

df_ts_ssh = df_ts_ssh.groupby(['origin', 'origin_id', df_ts_ssh.index.hour]).sum()
df_ts_tel = df_ts_tel.groupby(['origin', 'origin_id', df_ts_tel.index.hour]).sum()

columns = {
    'timestamp' : 'hour',
    'ip' : 'requests',
}

df_ts_ssh = df_ts_ssh.reset_index().rename(columns=columns).set_index('hour')
df_ts_tel = df_ts_tel.reset_index().rename(columns=columns).set_index('hour')

#df_ts_ssh
#df_ts_tel

In [None]:
# Summarize data frames to plots in comparison of each protocol
# ================

# This section summarizes different dataframes to collections.
# Improves the ability to plot, because more information can be plotted in a single plot.

plots = {}

df = df_ts_ssh.groupby(df_ts_ssh.index).mean().drop(['origin_id'], axis=1).rename(columns={'requests' : 'SSH'})
df = df.join(df_ts_tel.groupby(df_ts_tel.index).mean().drop(['origin_id'], axis=1).rename(columns={'requests' : 'Telnet'}))

df.plot.bar(
    figsize=(12, 8),
    title="Comparison SSH & Telnet",
    xlabel="Hour of the day",
    ylabel="No. of requests (avg. per honeypot)",
)

In [None]:
# Group dataframes according to specific network types
# ================
#
# Just consideres the SSH plots

plots = {}

df = df_ts_ssh.groupby([df_ts_ssh.index, 'origin']).mean().drop(['origin_id'], axis=1).unstack('origin')
df.columns = df.columns.map(
    lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
)

plots['Average SSH requests per network type'] = df

for net in origins:
    df = df_ts_ssh[df_ts_ssh.origin == net]
    df = df.groupby(['hour', 'origin', 'origin_id']).sum().unstack(['origin', 'origin_id'])
    df.columns = df.columns.map(
        lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
    )
    
    plots[f"SSH requests per honeypot in {net} networks"] = df


for k, v in plots.items():
    v.plot.bar(
        figsize=(12, 8),
        title=k,
        xlabel="Hour of the day",
        ylabel="No. of requests",
    )



In [None]:
# Group dataframes according to specific network types
# ================
#
# Just consideres the Telnet plots

plots = {}

df = df_ts_tel.groupby([df_ts_tel.index, 'origin']).mean().drop(['origin_id'], axis=1).unstack('origin')
df.columns = df.columns.map(
    lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
)

plots['Average Telnet requests per network type'] = df

for net in origins:
    df = df_ts_tel[df_ts_tel.origin == net]
    df = df.groupby(['hour', 'origin', 'origin_id']).sum().unstack(['origin', 'origin_id'])
    df.columns = df.columns.map(
        lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
    )
    
    plots[f"Telnet requests per honeypot in {net} networks"] = df


for k, v in plots.items():
    v.plot.bar(
        figsize=(12, 8),
        title=k,
        xlabel="Hour of the day",
        ylabel="No. of requests",
    )

