# NSG Flow Logs Analysis
This book is created for the analysis of NSG log files.
This book helps in the different phases of the investigation:
* [Azure simple connection](#azure)
* [Azure connection with filters](#azure_filter)
    * [Summarizing data](#summarizing)
    * [Filtering data](#filtering)
* [DataFrame Creation](#dataframe)
* [Log analysis](#analysis):
    * Crossing of logs with IOCs
    * IP address behavior analysis

### <a name="azure"></a> 1 - Azure simple connection
This could be used when you know that the amount of data is not very large and you want to work with all the information available.


In [62]:
#!pip install azure-storage-blob



In [22]:
import os, uuid
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

connect_str = <storageaccount-connectionstring>

blob_service_client = BlobServiceClient.from_connection_string(connect_str)

In [23]:
def azure_containers_menu():
    import ipywidgets as widgets
    from IPython.display import display, Markdown, clear_output
    
    containers_names = []

    containers_list = blob_service_client.list_containers()
    for container in containers_list:
        containers_names.append(container.name)
    
    output = widgets.Output()
    container_select = widgets.SelectMultiple(
                    description='Containers:',
                    disabled=False,
                    options=containers_names)
    
    button = widgets.Button(description=f'Read container')
    display(container_select, button, output)
    
    def _click_function(_):
        with output:
            import json
            tuples_list = []
            containers_selected = list(container_select.value)
            for container_name in containers_selected:
                container_client = blob_service_client.get_container_client(container_name)

                blob_list = container_client.list_blobs()
                for blob in blob_list:
                    blob_content = container_client.download_blob(blob.name)

                    ngs_dict = json.loads(blob_content.readall())

                    for records in ngs_dict.values():
                        for record in records:
                            for flow in record["properties"]["flows"]:
                                rule = flow["rule"]
                                for f in flow["flows"]:
                                    for tuple in f["flowTuples"]:
                                        tuples_list.append(f"{tuple},{rule}".split(","))

            from picatrix.lib import utils
            utils.ipython_bind_global('tuples_list', tuples_list)
            display(Markdown(f'{len(tuples_list)} results are stored in the **tuples_list**.'))
        
    button.on_click(_click_function)


In [24]:
azure_containers_menu()

SelectMultiple(description='Containers:', options=('insights-logs-networksecuritygroupflowevent',), value=())

Button(description='Read container', style=ButtonStyle())

Output()

### <a name="azure_filter"></a> 2 - Azure connection with filters 
This can be used when you want to filter data prior to analysis.  
Date filtering is done on the fly using the latest blob modification and data filtering is done by summarizing the data.

## <a name="summarizing"></a> 2.1 - Summarizing the data

In [25]:
import os, uuid
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

connect_str = <storageaccount-connectionstring>

blob_service_client = BlobServiceClient.from_connection_string(connect_str)

In [26]:
def azure_containers_menu_filtered_data():
    import ipywidgets as widgets
    from IPython.display import display, Markdown, clear_output
    
    containers_names = []

    containers_list = blob_service_client.list_containers()
    for container in containers_list:
        containers_names.append(container.name)
    
    output = widgets.Output()
    container_select = widgets.SelectMultiple(
                    description='Containers:',
                    disabled=False,
                    options=containers_names)
    
    button = widgets.Button(description=f'Read container')
    display(container_select, button, output)
    
    def _click_function(_):
        with output:
            import json
            tuples_list = []
            data_min = {0:{"name":"TimeStamp",
                          "values":{}},
                       1:{"name":"SourceIP",
                          "values":{}},
                       2:{"name":"DestinationIP",
                          "values":{}},
                       3:{"name":"SourcePort",
                          "values":{}},
                       4:{"name":"DestinationPort",
                          "values":{}},
                       5:{"name":"Protocol",
                          "values":{}},
                       6:{"name":"TrafficFlow",
                          "values":{}},
                       7:{"name":"TrafficDecision",
                          "values":{}},
                       8:{"name":"FlowState",
                          "values":{}},
                       9:{"name":"PacketsSent",
                          "values":{}},
                       10:{"name":"BytesSent",
                          "values":{}},
                       11:{"name":"PacketsReceived",
                          "values":{}},
                       12:{"name":"BytesReceived",
                          "values":{}},
                       13:{"name":"Rule",
                          "values":{}}}
            
            desired_indexes = (5, 6, 7, 8, 13)
            
            containers_selected = list(container_select.value)
            for container_name in containers_selected:
                container_client = blob_service_client.get_container_client(container_name)

                blob_list = container_client.list_blobs()
                for blob in blob_list:
                    blob_content = container_client.download_blob(blob.name)

                    ngs_dict = json.loads(blob_content.readall())

                    for records in ngs_dict.values():
                        for record in records:
                            for flow in record["properties"]["flows"]:
                                rule = flow["rule"]
                                for f in flow["flows"]:
                                    for tuple in f["flowTuples"]:
                                        elements = f"{tuple},{rule}".split(",")
                                        for i, element in enumerate(elements):
                                            if i in desired_indexes:
                                                if element in data_min[i]["values"]:
                                                    data_min[i]["values"][element] = data_min[i]["values"][element] + 1
                                                else:
                                                    data_min[i]["values"][element] = 1

            from picatrix.lib import utils
            utils.ipython_bind_global('data_min', data_min)
            utils.ipython_bind_global('container_select', container_select)
            display(Markdown('Summarized data are stored in the **data_min**.'))
        
    button.on_click(_click_function)


In [27]:
azure_containers_menu_filtered_data()

SelectMultiple(description='Containers:', options=('insights-logs-networksecuritygroupflowevent',), value=())

Button(description='Read container', style=ButtonStyle())

Output()

## <a name="filtering"></a> 2.2 - Filtering data

In [28]:
def filter_data_menu():
    import ipywidgets as widgets
    from IPython.display import display, Markdown, clear_output
    from ipywidgets import GridspecLayout
    import ipydatetime
    import datetime
    import pytz
    
    desired_indexes = (5, 6, 7, 8, 13)
    widgets_list = []

    for index in desired_indexes:
        name = data_min[index]["name"]

        values = []
        for value in data_min[index]["values"]:
            v = data_min[index]["values"][value]
            values.append(f"{value}: {v}")

        widgets_list.append(widgets.SelectMultiple(
            description=f'{name}:',
            disabled=False,
            options=values,
            layout={'height': '200px', 'width': '95%'}))
    
    grid_datetimes = GridspecLayout(1, 3)
    from_datetime_picker = ipydatetime.DatetimePicker(tzinfo=pytz.utc, layout=widgets.Layout(width='auto'),description='From:')
    to_datetime_picker = ipydatetime.DatetimePicker(tzinfo=pytz.utc, layout=widgets.Layout(width='auto'),description='To:')
    grid_datetimes[0, 0] = from_datetime_picker
    grid_datetimes[0, 1] = to_datetime_picker

    def filter_data(_):
        with output:
            #grid.close()
            #button_filter.close()
            clear_output()
            
            # Get widgets values
            query_vals = {}
            for i, w in enumerate(widgets_list):
                for value in w.value:
                    if desired_indexes[i] in query_vals:
                        query_vals[desired_indexes[i]].append(value.split(":")[0])
                    else:
                        query_vals[desired_indexes[i]] = [value.split(":")[0]]
                #query_vals[w.description.split(":")[0]] = [i.split()[0] for i in w.value]
                
            import json
            tuples_list = []
            containers_selected = list(container_select.value)
            for container_name in containers_selected:
                container_client = blob_service_client.get_container_client(container_name)

                blob_list = container_client.list_blobs()
                for blob in blob_list:
                    if from_datetime_picker.value is not None and to_datetime_picker.value is not None:
                        if blob.last_modified > from_datetime_picker.value and blob.last_modified < to_datetime_picker.value:
                            included = True
                        else:
                            included = False
                    else:
                        included = True
                    if included:
                        blob_content = container_client.download_blob(blob.name)

                        ngs_dict = json.loads(blob_content.readall())

                        for records in ngs_dict.values():
                            for record in records:
                                for flow in record["properties"]["flows"]:
                                    rule = flow["rule"]
                                    for f in flow["flows"]:
                                        for tuple in f["flowTuples"]:
                                            values = f"{tuple},{rule}".split(",")
                                            selected = True
                                            for index in desired_indexes:
                                                if index in query_vals:
                                                    if values[index] in query_vals[index]:
                                                        selected = False
                                                        break
                                            if selected:
                                                tuples_list.append(values)

            from picatrix.lib import utils
            utils.ipython_bind_global('tuples_list', tuples_list)
            display(Markdown(f'{len(tuples_list)} esults are stored in **tuples_list**.'))


    # Creates a grid with widgets
    row = 0
    col = 0
    n_rows = len(widgets_list) // 3 + 1
    if len(widgets_list) % 3 == 0:
        n_rows -= 1
    if len(widgets_list) < 3:
        grid = GridspecLayout(n_rows, len(widgets_list))
    else:
        grid = GridspecLayout(n_rows, 3)

    for e in range(len(widgets_list)):
        grid[row, col] = widgets_list[e]
        col += 1
        if e > 0:
            if e % 2 == 0:
                row += 1
                col = 0

    output = widgets.Output()
    button_filter = widgets.Button(description='Filter')
    button_filter.on_click(filter_data)
    help = Markdown('''HINTS:  
                    *Protocol*: T (TCP), U (UDP)  
                    *FlowState*: B (Begin) C (Continuing) E (End)  
                    *Traffic Flow*: I (Inbound) O (Outbound)  
                    *Traffic Decission*: D (Denied) A (Allowed)''')
    display(help, grid_datetimes, grid, button_filter, output)

In [29]:
filter_data_menu()

HINTS:  
                    *Protocol*: T (TCP), U (UDP)  
                    *FlowState*: B (Begin) C (Continuing) E (End)  
                    *Traffic Flow*: I (Inbound) O (Outbound)  
                    *Traffic Decission*: D (Denied) A (Allowed)

GridspecLayout(children=(DatetimePicker(value=None, description='From:', layout=Layout(grid_area='widget001', …

GridspecLayout(children=(SelectMultiple(description='Protocol:', layout=Layout(grid_area='widget001', height='…

Button(description='Filter', style=ButtonStyle())

Output()

## <a name="dataframe"></a> 2.3 - DataFrame Creation

In [30]:
import pandas as pd

dtypes = {"TimeStamp":"object",
        "SourceIP":"object",
        "DestinationIP":"object",
        "SourcePort":"int",
        "DestinationPort":"int",
        "Protocol":"object",
        "TrafficFlow":"object",
        "TrafficDecision":"object",
        "FlowState":"object",
        "PacketsSent":"int",
        "BytesSent":"int",
        "PacketsReceived":"int",
        "BytesReceived":"int",
        "Rule":"object"}

df = pd.DataFrame(tuples_list, 
                    columns =['TimeStamp', 
                            'SourceIP', 
                            'DestinationIP', 
                            'SourcePort',
                            'DestinationPort', 
                            'Protocol',
                            'TrafficFlow', 
                            'TrafficDecision',
                            'FlowState', 
                            'PacketsSent', 
                            'BytesSent', 
                            'PacketsReceived', 
                            'BytesReceived', 
                            'Rule' ])

df.replace("", 0, inplace=True)
df = df.astype(dtype=dtypes)
df['TimeStamp'] = pd.to_datetime(df['TimeStamp'],unit='s')


In [31]:
df[(df['DestinationIP'] == '20.106.153.175') & (df["FlowState"] == "E")].sort_values(by="BytesSent", ascending=False)

Unnamed: 0,TimeStamp,SourceIP,DestinationIP,SourcePort,DestinationPort,Protocol,TrafficFlow,TrafficDecision,FlowState,PacketsSent,BytesSent,PacketsReceived,BytesReceived,Rule
17,2021-12-11 21:08:21,10.0.0.6,20.106.153.175,53388,443,T,O,A,E,11,1290,8,778,DefaultRule_AllowInternetOutBound
26,2021-12-11 21:13:23,10.0.0.6,20.106.153.175,53427,443,T,O,A,E,5,490,4,375,DefaultRule_AllowInternetOutBound
51,2021-12-11 21:23:27,10.0.0.6,20.106.153.175,53511,443,T,O,A,E,4,430,2,126,DefaultRule_AllowInternetOutBound
0,2021-12-11 21:00:19,10.0.0.6,20.106.153.175,53322,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
627,2021-12-12 23:44:20,10.0.0.6,20.106.153.175,50664,443,T,O,A,E,1,66,0,0,DefaultRule_AllowInternetOutBound
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
574,2021-12-12 23:42:48,10.0.0.6,20.106.153.175,50632,443,T,O,A,E,1,66,0,0,DefaultRule_AllowInternetOutBound
577,2021-12-12 23:42:50,10.0.0.6,20.106.153.175,50637,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
31,2021-12-11 21:15:35,10.0.0.6,20.106.153.175,53412,443,T,O,A,E,0,0,0,0,DefaultRule_AllowInternetOutBound
6,2021-12-11 21:02:19,10.0.0.6,20.106.153.175,53339,443,T,O,A,E,0,0,0,0,DefaultRule_AllowInternetOutBound


In [40]:
df[(df['DestinationIP'] == '52.152.170.33') & (df["FlowState"] == "E")]

Unnamed: 0,TimeStamp,SourceIP,DestinationIP,SourcePort,DestinationPort,Protocol,TrafficFlow,TrafficDecision,FlowState,PacketsSent,BytesSent,PacketsReceived,BytesReceived,Rule
683,2021-12-12 23:47:14,10.0.0.6,52.152.170.33,50707,443,T,O,A,E,1,66,19,16882,DefaultRule_AllowInternetOutBound
684,2021-12-12 23:47:14,10.0.0.6,52.152.170.33,50708,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
689,2021-12-12 23:48:15,10.0.0.6,52.152.170.33,50716,443,T,O,A,E,1,66,9,838,DefaultRule_AllowInternetOutBound
692,2021-12-12 23:49:15,10.0.0.6,52.152.170.33,50725,443,T,O,A,E,5,490,1,66,DefaultRule_AllowInternetOutBound
694,2021-12-12 23:50:15,10.0.0.6,52.152.170.33,50733,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1381,2021-12-13 02:39:36,10.0.0.6,52.152.170.33,52396,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
1384,2021-12-13 02:40:34,10.0.0.6,52.152.170.33,52406,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
1386,2021-12-13 02:41:13,10.0.0.6,52.152.170.33,52411,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound
1387,2021-12-13 02:41:44,10.0.0.6,52.152.170.33,52414,443,T,O,A,E,1,66,1,66,DefaultRule_AllowInternetOutBound


### <a name="analysis"></a> 5 - Data analysis
In this section in possible to analyze de data.  
Firstly is possible to cross all the logs with a IOCs file located in maxmind folder.  
These IOCs where obtained from <a href="https://github.com/stamparm/maltrail">Maltrails proyect</a>.  
Second, it is possible to create a behavior analysis for each destination IP address. It is useful to obtain suspicious patterns on information such as number of connections, information sent, duration of connections...   
It is also possible to dynamically create scatter charts using the data.

In [32]:
def maltrail_parser():
    """
    Parse maltrails csv file, extract domain or Ip and creates a Pandas DataFrame.
    :return mal_df: DataFrame: Pandas DataFrame
    """
    import pandas as pd
    
    database_path = ("maxmind/IOCS.csv")
    mal_df = pd.read_csv(database_path, names=['ioc', 'hit', 'source'])

    mal_df["domain_ip"] = mal_df["ioc"].str.extract("^(?:http.*?)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n?]+)")
    mal_df = mal_df[mal_df['domain_ip'].notna()]
    return mal_df

def geoip_maxmind(df, ip_columns):
    """
    Geo-localize IP address in provided column and create a new column in DataFrame.
    :param df: DataFrame: Logs Pandas DataFrame.
    :param ip_column: List: List of columns to geo-localize.
    :return df: DataFrame: Pandas DataFrame with geolocation columns included in.
    """
    import geoip2.database
    import pandas as pd
    import numpy as np

    def get_country(ip):
        try:
            x = reader.country(ip)
        except:
            return np.nan
        try:
            return x.country.name if x else np.nan
        except KeyError:
            return np.nan

    database_path = ("maxmind/GeoLite2-Country.mmdb")

    with geoip2.database.Reader(database_path) as reader:
        for column in ip_columns:
            unique_ips = df[column].unique()
            unique_ips = pd.Series(unique_ips, index=unique_ips)
            dst_column = str(column) + "_geo"
            df[dst_column] = df[column].map(unique_ips.apply(get_country))
    return df

            
def asn_maxmind(df, ip_columns):
    """
    Obtain ASN from a IPs in certain columns.
    :param df: DataFrame: Logs Pandas DataFrame.
    :param ip_columns: List: List of columns to obtain ASN of.
    :return df: DataFrame: Pandas DataFrame with ASN columns included in.
    """
    import geoip2.database
    import pandas as pd
    import numpy as np

    def get_asn(ip):
        try:
            x = reader.asn(ip)
        except:
            return np.nan
        try:
            # return x.country.name if x else pd.np.nan
            return x.autonomous_system_organization if x else np.nan
        except KeyError:
            return np.nan
    
    database_path = ("maxmind/GeoLite2-ASN.mmdb")
  
    with geoip2.database.Reader(database_path) as reader:
        for column in ip_columns:
            unique_ips = df[column].unique()
            unique_ips = pd.Series(unique_ips, index=unique_ips)
            dst_column = str(column) + "_asn"
            df[dst_column] = df[column].map(unique_ips.apply(get_asn))

    return df

In [33]:
def model_logips(df, src_col, dst_col):
    import pandas as pd
    from pandas.api.types import is_numeric_dtype
    import numpy as np

    df_final = df[dst_col].unique()
    df_final = pd.Series(df_final).to_frame().rename(columns={0: dst_col})

    df_final["n_connections"] = df_final[dst_col].map(df.value_counts(dst_col))
    df_final["n_sourceips"] = df_final[dst_col].map(
        df[[dst_col, src_col]].groupby([dst_col]).nunique()[src_col])

    #n_cols = [i for i in df.columns if is_numeric_dtype(df[i])]
    n_cols = ["PacketsSent", "BytesSent", "PacketsReceived", "BytesReceived"]

    for col in n_cols:
        df[col].fillna(0, inplace=True)
        df_final[f"{col}_sum"] = df_final[dst_col].map(df[[dst_col, col]].groupby(dst_col).sum()[col])
        df_final[f"{col}_min"] = df_final[dst_col].map(df[[dst_col, col]].groupby(dst_col).agg(np.min)[col])
        df_final[f"{col}_max"] = df_final[dst_col].map(df[[dst_col, col]].groupby(dst_col).agg(np.max)[col])
        df_final[f"{col}_mean"] = df_final[dst_col].map(df[[dst_col, col]].groupby(dst_col).agg(np.mean)[col])
        df_final[f"{col}_var"] = df_final[dst_col].map(df[[dst_col, col]].groupby(dst_col).agg(np.var)[col])
        df_final[f"{col}_mode"] = df_final[dst_col].map(df[[dst_col, col]].groupby(dst_col).agg(lambda x: pd.Series.mode(x)[0])[col])

    return df_final

def is_in_maltrails_dask(df, column_names=None,
                    maltrails_path="", type=None):
    """
    Calls maltrails parser function, search in provided columns if the value is in maltrails.
    :param df: DataFrame: Log in Dask DataFrame format.
    :param column_names: List: List of columns to search about.
    :param maltrails_path: String: Maltrails file path.
    :return result: DataFrame: Logs that contains an IOC in maltrails.
    """
    import pandas as pd
    pd.options.mode.chained_assignment = None

    if "Maltrails IOCs" in type:
        mal_df = maltrail_parser()

    set_mailtrails = set(mal_df["domain_ip"].values)

    if not column_names:
        column_names = list(df.columns)

    df_work = df

    first = True
    for i in column_names:
        uniq_df = pd.Series(df_work.loc[:, i].dropna().unique())
        df_mask = uniq_df.map(lambda x: True if x in set_mailtrails else False)
        if any(df_mask):
            match = uniq_df[df_mask]
            if first:
                df_match = df_work[df_work.loc[:, i].isin(match)].compute()
                first = False
            else:
                df_match = df_match.append(df_work[df_work.loc[:, i].isin(match)].compute())

    if "df_match" in locals():
        for col in column_names:
            min_mal_df = mal_df[mal_df["domain_ip"].isin(df_match[col])].drop_duplicates(subset=['domain_ip'])
            min_mal_result = min_mal_df[["domain_ip", "hit"]].set_index("domain_ip")["hit"].to_frame()
            col_name = f'{col}_hit'
            df_match[col_name] = df_match[col].map(min_mal_result["hit"])
    else:
        df_match = False

    pd.options.mode.chained_assignment = 'warn'
    return df_match

In [34]:
def disk_log_analysis_menu(click_function = None):
    import ipywidgets as widgets
    from IPython.display import display, Markdown, clear_output
    from ipyfilechooser import FileChooser
    import datetime
    from time import time
    import pandas as pd

    options = ["Maltrails IOCs","Graph Analysis"]
    display(Markdown(f'### Log Analysis menu.'))

    output = widgets.Output()
    analysis_type = widgets.Dropdown(
        options=options,
        description='Analysis:',
        disabled=False,
    )

    geo_check = widgets.Checkbox(description='Geolocate IPs', value=True)
    asn_check = widgets.Checkbox(description='Obtain IP ASN', value=True)
    button_init = widgets.Button(description=f'Analyze logs')

    display(analysis_type, geo_check, asn_check, button_init, output)

    def button_init_click(_):
        from picatrix.lib import utils
        #df = utils.ipython_get_global('df_result')
        with output:
            if "Maltrails IOCs" in analysis_type.value:
                from ipywidgets import GridspecLayout
                grid = GridspecLayout(1, 2)
                display(Markdown(f'### IOCs log search'))

                columns = df.columns
                mal_col_select = widgets.SelectMultiple(
                    description='Column:',
                    disabled=False,
                    options=columns,
                    layout={'height': '200px', 'width': '80%'})
                grid[0, 0] = mal_col_select

                def search_iocs(_):
                    with output:
                        grid.close()
                        button_search.close()
                        display(Markdown(f'Searching IOCs in logs please **wait**.'))
                        start_time = time()
                        columns = list(mal_col_select.value)

                        result = is_in_maltrails_dask(df, column_names=columns, type=analysis_type.value)
                        if isinstance(result, pd.DataFrame):
                            if geo_check.value:
                                result = geoip_maxmind(result, columns)
                            if asn_check.value:
                                result = asn_maxmind(result, columns)

                            utils.ipython_bind_global('result', result)
                            elapsed_time = time() - start_time
                            elapsed_time = str(datetime.timedelta(seconds=elapsed_time))
                            display(Markdown(f'Elapsed time: {elapsed_time}.'))
                            display(Markdown(f'Results are stored in **result** dataframe.'))
                        else:
                            clear_output()
                            display(Markdown(f'There are **not results** in data.'))

                button_search = widgets.Button(description=f'Search IOCs')
                button_search.on_click(search_iocs)

                display(grid, button_search)

            if "Graph Analysis" in analysis_type.value:
                from ipywidgets import GridspecLayout, Layout
                import numpy as np

                clear_output()

                display(Markdown('Graph Investigation'))
                columns = list(df.columns)

                dst_widget = widgets.Dropdown(
                    options=columns,
                    description='Remote IP:',
                    disabled=False)

                src_widget = widgets.Dropdown(
                    options=columns,
                    description='Source IP:',
                    disabled=False)

                def model_data(_):
                    with output:
                        from pandas.api.types import is_numeric_dtype

                        clear_output()

                        display(Markdown('Modeling data please **wait**'))
                        df_model = model_logips(df, src_widget.value, dst_widget.value)
                        
                        if geo_check.value:
                            df_model = geoip_maxmind(df_model, [dst_widget.value])
                        if asn_check.value:
                            df_model = asn_maxmind(df_model, [dst_widget.value])

                        from picatrix.lib import utils
                        utils.ipython_bind_global('df_model', df_model)
                        display(Markdown(f'Results are stored in **df_model** dataframe.'))

                        columns = [i for i in df_model.columns if is_numeric_dtype(df_model[i])]

                        grid = GridspecLayout(2, 4)

                        filter_widget = widgets.Dropdown(
                            options=columns,
                            description='Column filter:',
                            disabled=False)

                        filter_order = widgets.ToggleButtons(
                            options=['Highest', 'Lowest'],
                            description='Selection:',
                            disabled=False,
                            value="Highest",
                            button_style='info',  # 'success', 'info', 'warning', 'danger' or ''
                            tooltips=['Highests events', 'Lowest events'],
                            layout=Layout(height='80px'))

                        int_select = widgets.FloatSlider(
                            value=1000,
                            min=0,
                            max=5000,
                            step=500,
                            description='Events:',
                            disabled=False,
                            continuous_update=False,
                            orientation='horizontal',
                            readout=True,
                            readout_format='')

                        x_widget = widgets.Dropdown(
                            options=columns,
                            description='X:',
                            disabled=False)

                        y_widget = widgets.Dropdown(
                            options=columns,
                            description='Y:',
                            disabled=False)

                        def generate_graph(_):
                            with output:
                                import plotly.express as px
                                output_graph = widgets.Output()
                                display(Markdown('Drawing graph, please **wait**'))
                                display(
                                    Markdown(f'Filtered {int(int_select.value)} values from {filter_widget.value} column'))

                                # Cut the dataframe from top or bottom depends on user selection
                                if "Highest" in filter_order.value:
                                    df_final_plot = df_model.nlargest(int(int_select.value), filter_widget.value)
                                elif "Lowest" in filter_order.value:
                                    df_final_plot = df_model.nsmallest(int(int_select.value), filter_widget.value)

                                # Draw a scatter graph
                                col_x = x_widget.value
                                col_y = y_widget.value

                                fig = px.scatter(df_final_plot, x=col_x, y=col_y, color=dst_widget.value,
                                                 title=f'Graph {col_x} VS {col_y}',
                                                 color_discrete_sequence=["blue"])
                                output_graph.clear_output()
                                display(fig.show(), output_graph)

                        grid[0, 0] = filter_widget
                        grid[0, 1] = filter_order
                        grid[0, 2] = int_select
                        grid[1, 0] = x_widget
                        grid[1, 1] = y_widget
                        button_graph = widgets.Button(description=f'Generate Graph')
                        button_graph.on_click(generate_graph)

                        display(grid, button_graph)

                button_model = widgets.Button(description=f'Data model')
                button_model.on_click(model_data)
                display(src_widget, dst_widget, button_model)
    if click_function:
        button_init.on_click(click_function)
    else:
        button_init.on_click(button_init_click)

In [35]:
disk_log_analysis_menu()

### Log Analysis menu.

Dropdown(description='Analysis:', options=('Maltrails IOCs', 'Graph Analysis'), value='Maltrails IOCs')

Checkbox(value=True, description='Geolocate IPs')

Checkbox(value=True, description='Obtain IP ASN')

Button(description='Analyze logs', style=ButtonStyle())

Output()

In [36]:
import pandas as pd
pd.set_option("display.max_columns", None)

In [37]:
df_model.sort_values(by="n_connections", ascending=False)

Unnamed: 0,DestinationIP,n_connections,n_sourceips,PacketsSent_sum,PacketsSent_min,PacketsSent_max,PacketsSent_mean,PacketsSent_var,PacketsSent_mode,BytesSent_sum,BytesSent_min,BytesSent_max,BytesSent_mean,BytesSent_var,BytesSent_mode,PacketsReceived_sum,PacketsReceived_min,PacketsReceived_max,PacketsReceived_mean,PacketsReceived_var,PacketsReceived_mode,BytesReceived_sum,BytesReceived_min,BytesReceived_max,BytesReceived_mean,BytesReceived_var,BytesReceived_mode,DestinationIP_geo,DestinationIP_asn
143,52.152.170.33,263,1,350,0,10,1.330798,1.550460,1,27750,0,1236,105.513308,21711.662990,66,338,0,19,1.285171,2.334398,1,39628,0,16882,150.676806,1.082870e+06,66,United States,MICROSOFT-CORP-MSN-AS-BLOCK
4,52.239.141.132,202,1,3181,0,52,15.747525,135.921014,7,281261,0,3433,1392.381188,481757.062928,985,7965,0,100,39.430693,633.221541,43,10329619,0,132850,51136.727723,1.187090e+09,55966,Netherlands,MICROSOFT-CORP-MSN-AS-BLOCK
1,52.239.212.68,190,1,3306,0,53,17.400000,154.876190,7,283250,0,3433,1490.789474,517559.447508,985,6719,0,98,35.363158,481.629323,43,8638295,0,131355,45464.710526,9.092666e+08,55966,Netherlands,MICROSOFT-CORP-MSN-AS-BLOCK
5,52.239.141.164,184,1,4450,0,37,24.184783,81.954740,28,301586,0,2375,1639.054348,306117.734735,1883,2812,0,93,15.282609,62.630078,16,3431565,0,125600,18649.809783,1.164392e+08,19631,Netherlands,MICROSOFT-CORP-MSN-AS-BLOCK
0,20.106.153.175,149,1,163,0,11,1.093960,0.855977,1,11648,0,1290,78.174497,12171.834210,66,127,0,8,0.852349,0.599673,1,8737,0,778,58.637584,5.011462e+03,66,United States,MICROSOFT-CORP-MSN-AS-BLOCK
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,216.58.208.100,1,1,0,0,0,0.000000,,0,0,0,0,0.000000,,0,1,1,1,1.000000,,1,60,60,60,60.000000,,60,United States,GOOGLE
168,95.100.141.210,1,1,0,0,0,0.000000,,0,0,0,0,0.000000,,0,0,0,0,0.000000,,0,0,0,0,0.000000,,0,Netherlands,AKAMAI-AS
169,50.17.214.10,1,1,0,0,0,0.000000,,0,0,0,0,0.000000,,0,0,0,0,0.000000,,0,0,0,0,0.000000,,0,United States,AMAZON-AES
173,52.58.9.178,1,1,1,1,1,1.000000,,1,66,66,66,66.000000,,66,1,1,1,1.000000,,1,66,66,66,66.000000,,66,Germany,AMAZON-02
