# Requests Analysis per date

This analysis focusses on the date of the requests.



In [None]:
APP_NAME = "Request Analysis per date"


In [None]:
%load configuration.py


In [None]:
%load setup-pd.py


In [None]:
%load data.py


In [None]:
# Get logs per protocol category
# ================

# Logs per category allows to distinguish between the requests

df_ssh = df_log[df_log.category == 'ssh']
df_ssh = df_ssh[['origin', 'origin_id', 'timestamp', 'ip']]
df_tel = df_log[df_log.category == 'telnet']
df_tel = df_tel[['origin', 'origin_id', 'timestamp', 'ip']]

df_ssh = df_ssh.set_index('timestamp')
df_tel = df_tel.set_index('timestamp')

df_ts_ssh = df_ssh.groupby([pd.Grouper(freq='D'), 'origin', 'origin_id']).count()
df_ts_ssh = df_ts_ssh.reset_index().set_index('timestamp')

df_ts_tel = df_tel.groupby([pd.Grouper(freq='D'), 'origin', 'origin_id']).count()
df_ts_tel = df_ts_tel.reset_index().set_index('timestamp')

df_ts_ssh.rename(columns={'ip' : 'requests'}, inplace=True)
df_ts_tel.rename(columns={'ip' : 'requests'}, inplace=True)


In [None]:
df = df_ts_ssh.groupby(df_ts_ssh.index).mean().drop('origin_id', axis=1).rename(columns={'requests' : 'SSH'})
df = df.join(df_ts_tel.groupby(df_ts_tel.index).mean().drop('origin_id', axis=1).rename(columns={'requests': 'Telnet'}))

df.plot.bar(
    figsize=(12,8),
    title="Comparison SSH & Telnet",
    xlabel="Date",
    ylabel="No. of requests (avg. per honeypot)",
)


#df = df_ts_ssh.groupby(df_ts_ssh.index).mean().drop(['origin_id'], axis=1).rename(columns())

#df = df_ts_ssh.unstack(['origin', 'origin_id'])


#df.columns = df.columns.map(
#    lambda x: ' '.join([str(i).capitalize() for i in x if str(i) != 'ip'])
#)
#df[df.origin == 'campus']

In [None]:
# Group dataframes according to specific network types
# ================
#
# Just considers the SSH plots

plots = {}

df = df_ts_ssh.groupby([df_ts_ssh.index, 'origin']).mean().drop(['origin_id'], axis=1).unstack('origin')
df.columns = df.columns.map(
    lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
)

plots['Average SSH requests per network type'] = df

for net in origins:
    df = df_ts_ssh[df_ts_ssh.origin == net]
    df = df.groupby(['timestamp', 'origin', 'origin_id']).sum().unstack(['origin', 'origin_id'])
    df.columns = df.columns.map(
        lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
    )
    plots[f"SSH requests per honeypot in {net} networks"] = df

for k, v in plots.items():
    v.plot.bar(
        figsize=(12, 8),
        title=k,
        xlabel="Date",
        ylabel="No. of requests",
    )




In [None]:
# Group dataframes according to specific network types
# ================
#
# Just considers the Telnet plots

plots = {}

df = df_ts_tel.groupby([df_ts_tel.index, 'origin']).mean().drop(['origin_id'], axis=1).unstack('origin')
df.columns = df.columns.map(
    lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
)

plots['Average Telnet requests per network type'] = df

for net in origins:
    df = df_ts_tel[df_ts_tel.origin == net]
    df = df.groupby(['timestamp', 'origin', 'origin_id']).sum().unstack(['origin', 'origin_id'])
    df.columns = df.columns.map(
        lambda x : ' '.join([str(i).capitalize() for i in x if str(i) != 'requests'])
    )
    plots[f"Telnet requests per honeypot in {net} networks"] = df

for k, v in plots.items():
    v.plot.bar(
        figsize=(12, 8),
        title=k,
        xlabel="Date",
        ylabel="No. of requests",
    )


