# Requests Analysis per hour

This analysis focusses on the hour of the day, when requests occur.



In [None]:
APP_NAME = "Request Analysis per hour of SSH and Telnet requests"


In [None]:
%load configuration.py

In [None]:
%load setup-pd.py

In [None]:
%load data.py


In [None]:
# Get logs per protocol category
# ================

# Logs per category allows to distinguish between the requests

df_ssh = df_log[df_log.category == 'ssh']
df_ssh = df_ssh[['origin', 'origin_id', 'timestamp', 'ip']]
df_tel = df_log[df_log.category == 'telnet']
df_tel = df_tel[['origin', 'origin_id', 'timestamp', 'ip']]

df_ssh


In [None]:
# Group dataframes according to specific groups
# ================

# Total requests per hour of the day.

df_ts_ssh = {}
df_ts_ssh['total'] = df_ssh.resample('H', on='timestamp').count()
df_ts_tel = {}
df_ts_tel['total'] = df_tel.resample('H', on='timestamp').count()

for h in honeypots:
    # Add total of current network type
    if h[0] not in df_ts_ssh.keys():
        k = h[0]
        df_ts_ssh[k] = df_ssh[df_ssh.origin == k].resample('H', on='timestamp').count()
        df_ts_tel[k] = df_tel[df_tel.origin == k].resample('H', on='timestamp').count()
    
    k = f"{h[0]} {h[1]}"
    
    df_ts_ssh[k] = df_ssh[df_ssh.origin == h[0]][df_ssh.origin_id == h[1]].resample('H', on='timestamp').count()
    df_ts_tel[k] = df_tel[df_tel.origin == h[0]][df_tel.origin_id == h[1]].resample('H', on='timestamp').count()

for k, v in df_ts_ssh.items():
    v = v.groupby(v.index.hour).sum()
    v = v.drop(['origin', 'origin_id', 'ip'], axis=1)
    v.rename(columns= {'timestamp' : 'requests'}, inplace=True)
    df_ts_ssh[k] = v
    
for k, v in df_ts_tel.items():
    v = v.groupby(v.index.hour).sum()
    v = v.drop(['origin', 'origin_id', 'ip'], axis=1)
    v.rename(columns= {'timestamp' : 'requests'}, inplace=True)
    df_ts_tel[k] = v
    

In [None]:
# Summarize data frames to plots in comparison of each protocol
# ================

# This section summarizes different dataframes to collections.
# Improves the ability to plot, because more information can be plotted in a single plot.

plots = {}
df = df_ts_ssh['total'].rename(columns = {'requests' : 'SSH'})
df = df.join(df_ts_tel['total']).rename(columns = {'requests' : 'Telnet'})

df.plot.bar(
    figsize=(12, 8),
    title="Comparison SSH & Telnet",
    xlabel="Hour of the day",
    ylabel="No. of requests",
)


In [None]:
# Summarize data frames to plots of SSH requests
# ================

# This section summarizes different dataframes to collections.
# Improves the ability to plot, because more information can be plotted in a single plot.

plots = {}

# Overview of networks
#keys = ['total', 'cloud', 'residential', 'campus']
keys = ['cloud', 'residential', 'campus']
df = None
for k in keys:
    df = df_ts_ssh[k] if df is None else df.join(df_ts_ssh[k])
    df.rename(columns={'requests' : k.capitalize()}, inplace=True)
    

plots['SSH Requests per network type'] = df


# Overview specific network types
network_types = ['cloud', 'residential', 'campus']

for net in network_types:
    keys = [k for k in df_ts_ssh.keys() if net in k]
    # Comment the following line if total summary should be excluded
    keys = [k for k in keys if k != net]

    df = None
    for k in keys:
        df = df_ts_ssh[k] if df is None else df.join(df_ts_ssh[k])
        df.rename(columns={'requests' : k.capitalize()}, inplace=True)

    plots[f"SSH requests per honeypot in {net} networks"] = df


for k, v in plots.items():
    v.plot.bar(
        figsize=(12, 8),
        title=k,
        xlabel="Hour of the day",
        ylabel="No. of requests",
    )


In [None]:
# Summarize data frames to plots of SSH requests
# ================

# This section summarizes different dataframes to collections.
# Improves the ability to plot, because more information can be plotted in a single plot.

plots = {}

# Overview of networks
#keys = ['total', 'cloud', 'residential', 'campus']
keys = ['cloud', 'residential', 'campus']
df = None
for k in keys:
    df = df_ts_tel[k] if df is None else df.join(df_ts_tel[k])
    df.rename(columns={'requests' : k.capitalize()}, inplace=True)
    

plots['Telnet requests per network type'] = df


# Overview specific network types
network_types = ['cloud', 'residential', 'campus']

for net in network_types:
    keys = [k for k in df_ts_tel.keys() if net in k]
    # Comment the following line if total summary should be excluded
    keys = [k for k in keys if k != net]

    df = None
    for k in keys:
        df = df_ts_tel[k] if df is None else df.join(df_ts_tel[k])
        df.rename(columns={'requests' : k.capitalize()}, inplace=True)

    plots[f"Telnet requests per honeypot in {net} networks"] = df


for k, v in plots.items():
    v.plot.bar(
        figsize=(12, 8),
        title=k,
        xlabel="Hour of the day",
        ylabel="No. of requests",
    )
