In [1]:
import os
import random
import warnings
from typing import Union, List

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import matplotlib.pyplot as plt

import torch

In [2]:
gpu_id = [0, 1]
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_id))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [3]:
folder: str = "/data/experience/CSV"

sub_folders: list = [
    '/1.Deauth',
    '/2.Disas',
    '/3.(Re)Assoc',
    '/4.Rogue_AP',
    '/5.Krack',
    '/6.Kr00k',
    '/7.SSH',
    '/8.Botnet',
    '/9.Malware',
    '/10.SQL_Injection',
    '/11.SSDP',
    '/12.Evil_Twin',
    '/13.Website_spoofing'
]

In [4]:
# Load dataset
def load_fusion_dataset(idx: Union[int, List[int]], all: bool = False) -> pd.DataFrame:
    res = []
    
    for sub_folder in sub_folders:
        path: str = folder + sub_folder
        files: list = os.listdir(path)
        
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
        
            if all: df = pd.concat([pd.read_csv(path + '/' + file) for file in files])
            elif type(idx) == list: df = pd.concat([pd.read_csv(path + '/' + files[i % len(files)]) for i in idx])
            else: df = pd.read_csv(path + '/' + files[idx % len(files)])
        
        res.append(df)
    
    return pd.concat(res, axis=0)
        

def load_dataset(num: int = 1, all: bool = False) -> pd.DataFrame:
    if num < 1 or num > 13: return None
    
    path: str = folder + sub_folders[num-1]
    files: list = os.listdir(path)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)
        df = pd.concat([pd.read_csv(path + '/' + file) for file in files], ignore_index=True) if all else pd.read_csv(path + '/' + files[0])
    
    return df

In [65]:
# Preprocessing
def wireless_preprocessing(df_copy: pd.DataFrame) -> pd.DataFrame:
    # Filter protocols related with wireless attacks
    df_copy = df_copy[[col for col in df_copy.columns if col.startswith(('frame', 'radiotap', 'wlan', 'eapol', 'Label'))]]

    df_copy = df_copy.dropna(subset=['Label'])

    # fill NaN data
    for col in ['frame.encap_type', 'frame.len', 'frame.number', 'frame.time_delta', 'frame.time_epoch',
                'radiotap.channel.freq', 'radiotap.length', 'radiotap.timestamp.ts', 'wlan.fc.frag',
                'wlan.fc.order', 'wlan.fc.moredata', 'wlan.fc.protected', 'wlan.fc.pwrmgt',
                'wlan.fc.type', 'wlan.fc.retry', 'wlan.fc.subtype', 'wlan_radio.duration',
                'wlan_radio.channel', 'wlan_radio.data_rate', 'wlan_radio.frequency',
                'wlan_radio.signal_dbm', 'radiotap.datarate', 'radiotap.mactime', 'eapol.type']:
        if col in df_copy.columns: 
            df_copy[col] = df_copy[col].fillna(-10000.0)
    
    df_copy['eapol.type'] = df_copy['eapol.type'].fillna(-1.0)
    df_copy['eapol.len'] = df_copy['eapol.len'].fillna(0.0)
    df_copy['eapol.keydes.replay_counter'] = df_copy['eapol.keydes.replay_counter'].fillna(-1.0)
    df_copy['eapol.keydes.key_len'] = df_copy['eapol.keydes.key_len'].fillna(0.0)
    df_copy['wlan.duration'] = df_copy['wlan.duration'].fillna(0.0) 
    df_copy['wlan_radio.timestamp'] = df_copy['wlan_radio.timestamp'].fillna(0.0) 
    df_copy['radiotap.dbm_antsignal'] = df_copy['radiotap.dbm_antsignal'].fillna(-1000)
    df_copy['radiotap.rxflags'] = df_copy['radiotap.rxflags'].fillna('0xffffffff')
    df_copy['wlan.fc.ds'] = df_copy['wlan.fc.ds'].fillna('0xffffffff')
    df_copy['wlan_radio.phy'] = df_copy['wlan_radio.phy'].fillna(-1000.0)
    
    df_copy.loc[df_copy['wlan_radio.phy'] == 'Normal', 'wlan_radio.phy'] = -1000.0

    # Change type of several flags columns
    df_copy.loc[:, 'radiotap.rxflags'] = df_copy['radiotap.rxflags'].apply(lambda x: int(x, 16) if type(x)==str else x)
    df_copy.loc[:, 'wlan_radio.phy'] = df_copy['wlan_radio.phy'].apply(lambda x: float(x) if type(x)==str else x)
    df_copy.loc[:, 'Label'] = df_copy['Label'].apply(lambda x: x != 'Normal')
    df_copy.loc[:, 'radiotap.present.tsft'] = df_copy['radiotap.present.tsft'].apply(lambda x: x == '1-0-0')
    df_copy.loc[:, 'radiotap.dbm_antsignal'] = df_copy['radiotap.dbm_antsignal'].apply(lambda x: float(x) if isinstance(x, (int, float)) else (float(x) if '.' in x else -int(x.split('-')[1])))
    df_copy.loc[:, 'wlan.fc.ds'] = df_copy['wlan.fc.ds'].apply(lambda x: x if isinstance(x, (int, float)) else int(x, 16))
    df_copy.loc[:, 'wlan.country_info.fnm'] = df_copy['wlan.country_info.fnm'].apply(lambda x: x if not isinstance(x, (int, float)) else x)
    df_copy.loc[:, 'wlan.analysis.has_key'] = df_copy['wlan.analysis.kck'].isna()
    df_copy.loc[:, 'wlan.fixed.used'] = df_copy['wlan.fixed.timestamp'].notna()
    df_copy.loc[:, 'wlan.rsn.used'] = df_copy['wlan.rsn.ie.gtk.key'].notna()
    df_copy.loc[:, 'eapol.used'] = df_copy['eapol.type'].notna()
    
    df_copy = df_copy.astype({
        'radiotap.channel.flags.cck': 'bool', 
        'radiotap.channel.flags.ofdm': 'bool', 
        'Label': 'bool',
        'radiotap.present.tsft': 'bool',
        'radiotap.dbm_antsignal': 'int64',
        'radiotap.rxflags': 'int64',
        'wlan.fc.ds': 'int64'
    })

    # delete unnecessary columns
    drop_columns = [
        'frame.time', 'frame.time_delta_displayed', 'frame.time_relative',
        'wlan.analysis.kck', 'wlan.analysis.kek', 'wlan_radio.end_tsf', 'wlan_radio.start_tsf'
    ]
    df_copy = df_copy.drop(columns=drop_columns, axis=1)
    df_copy = df_copy[[col for col in df_copy.columns if not col.startswith(('wlan.fixed', 'wlan.rsn', 'wlan_rsn'))]]

    # Fill category data
    df_copy['wlan.bssid'] = df_copy['wlan.bssid'].fillna('-')
    df_copy['wlan.country_info.fnm'] = df_copy['wlan.country_info.fnm'].fillna('-')
    df_copy['wlan.country_info.code'] = df_copy['wlan.country_info.code'].fillna('-')
    df_copy['wlan.da'] = df_copy['wlan.da'].fillna('-')
    df_copy['wlan.sa'] = df_copy['wlan.sa'].fillna('-')
    df_copy['wlan.ta'] = df_copy['wlan.ta'].fillna('-')
    df_copy['wlan.tag'] = df_copy['wlan.tag'].fillna('-')
    df_copy['wlan.tag.length'] = df_copy['wlan.tag.length'].fillna('-')
    df_copy['wlan.seq'] = df_copy['wlan.seq'].fillna(-1.0)
    df_copy['wlan.ssid'] = df_copy['wlan.ssid'].fillna('Unknown')
    
    df_copy = df_copy.dropna(how='all', axis=1)

    for col in df_copy.select_dtypes('object').columns:
        le = LabelEncoder()
        df_copy[col] = le.fit_transform(df_copy[col])
    
    features = df_copy.drop(columns=['Label'])
    label = df_copy['Label']
    
    scaler = StandardScaler().fit(features)
    scaled_features = pd.DataFrame(scaler.transform(features), columns=features.columns)
    
    return pd.concat([scaled_features.reset_index(drop=True), label.reset_index(drop=True)], axis=1)

In [6]:
def load_and_split_dataset(num: int = None) -> tuple:
    if num: df = load_dataset(num, True)
    else: df = load_fusion_dataset(random.randint(0, 10))
    
    df = wireless_preprocessing(df)
    
    df_normal = df[df['Label'] == False]
    df_attack = df[df['Label'] == True]
    
    df_normal_features = df_normal.drop(columns=['Label'])
    df_normal_labels = df_normal['Label']
    
    df_attack_features = df_attack.drop(columns=['Label'])
    df_attack_labels = df_attack['Label']
    
    return df_normal_features, df_normal_labels, df_attack_features, df_attack_labels

In [7]:
# Define the autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim, encoding_dim, hidden_dim=16):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, encoding_dim),
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
    def __call__(self, x):
        return self.forward(x)

In [8]:
class AutoencoderClassifier(nn.Module):
    def __init__(self, encoder, clf):
        super(AutoencoderClassifier, self).__init__()
        self.encoder = encoder
        self.clf = clf
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.clf(x)
        return x
    
    def fit(self, X: pd.DataFrame, y: pd.Series):
        X_tensor = torch.tensor(X.values, dtype=torch.float32).to(device)
        self.clf.fit(self.encoder(X_tensor).cpu().detach().numpy(), y)
    
    def predict(self, x: pd.DataFrame):
        X_tensor = torch.tensor(x.values, dtype=torch.float32).to(device)
        x = self.encoder(X_tensor).cpu().detach().numpy()
        x = self.clf.predict(x)
        return x
    
    def predict_proba(self, x: pd.DataFrame):
        X_tensor = torch.tensor(x.values, dtype=torch.float32).to(device)
        x = self.encoder(X_tensor).cpu().detach().numpy()
        x = self.clf.predict_proba(x)
        return x
    
    def __call__(self, x: pd.DataFrame):
        return self.forward(x)

In [9]:
def get_clf_eval(y_test, pred) -> list:
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    f1 = f1_score(y_test, pred)
    return [accuracy, precision, recall, f1]
    
def get_model_eval(model, X_test = None, y_test = None):
    pred = model.predict(X_test)
    
    res = get_clf_eval(y_test, pred)
    
    return res

In [38]:
def load_and_sample_dataset(idx: int = None, train: bool = True, num: int = 100) -> tuple:
    path: str = "/data/experience/wireless/CSV/"
    sub_folders: list = [
        'Deauth_',
        'Disas_',
        '(Re)Assoc_',
        'Rogue_AP_',
        'Krack_',
        'Kr00k_',
        'SSH_',
        'Botnet_',
        'Malware_',
        'SQL_Injection_',
        'SSDP_',
        'Evil_Twin_',
        'Website_spoofing_'
    ]
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=pd.errors.DtypeWarning)

        if train: df = pd.read_csv(path + "train/" + sub_folders[idx-1] + str(num) + '_train.csv')
        else: df = pd.read_csv(path + "test/" + sub_folders[idx-1] + 'test.csv')
    
    return df

In [39]:
def preprocessing(df_train: pd.DataFrame, df_test: pd.DataFrame, WiFIDS: bool) -> pd.DataFrame:
    if WiFIDS:
        warnings.simplefilter("ignore", category=FutureWarning)
        
        df_train = wireless_preprocessing(df_train)
        df_test = wireless_preprocessing(df_test)
        
    else:
        df_train = df_train.dropna(subset=['Label'])
        df_test = df_test.dropna(subset=['Label'])
        
        df_train.loc[:, 'Label'] = df_train['Label'].apply(lambda x: x != 'Normal')
        df_test.loc[:, 'Label'] = df_train['Label'].apply(lambda x: x != 'Normal')
        df_train = df_train.astype({'Label':'bool'})
        df_test = df_test.astype({'Label':'bool'})
        
        df_deletion = list(set(df_train.select_dtypes('object').columns)|set(df_test.select_dtypes('object').columns))
        
        df_train = df_train.drop(columns=df_deletion)
        df_test = df_test.drop(columns=df_deletion)
    
    return df_train, df_test

In [15]:
def plot_model_performance(model_data):
    metrics = ['Accuracy', 'Precision', 'Recall', 'F1']
    num_metrics = len(metrics)
    models = list(model_data.keys())
    num_models = len(models)

    # Creating a list for each metric containing scores for all models
    scores = {metric: [model_data[model][i] * 100 for model in models] for i, metric in enumerate(metrics)}

    # Setting up the plot
    fig, ax = plt.subplots()
    index = np.arange(num_metrics)
    bar_width = 0.2

    # Plotting data for each model
    for i, model in enumerate(models):
        bar_positions = index + (i * bar_width)
        ax.bar(bar_positions, [scores[metric][i] for metric in metrics], bar_width, label=model)

    # Adding labels, title, and legend
    ax.set_xlabel('Metrics')
    ax.set_ylabel('Scores (%)')
    ax.set_title('Model Performance Comparison')
    ax.set_xticks(index + bar_width * (num_models - 1) / 2)
    ax.set_xticklabels(metrics)
    ax.set_ylim(0, 100)
    ax.legend()

    # Display the plot
    plt.show()

# Load Datasets

In [10]:
# Normal Datasets for pretrain
df_normal_features, df_normal_labels, _, _ = load_and_split_dataset()

sampling_idx = random.sample(range(len(df_normal_features)), 10000)
df_normal_features_sampled = df_normal_features.iloc[sampling_idx]

df_normal_features_tensor = torch.tensor(df_normal_features_sampled.values, dtype=torch.float32).to(device)
normal_dataset = TensorDataset(df_normal_features_tensor)
normal_loader = DataLoader(normal_dataset, batch_size=4, shuffle=True)

# 1. Pretrain Encoder

In [11]:
input_dim = df_normal_features.shape[1]
encoding_dim = 16

model = Autoencoder(input_dim, encoding_dim).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train the autoencoder
num_epochs = 50
for epoch in range(num_epochs):
    cnt = 0
    for data in normal_loader:
        outputs = model(data[0])
        loss = criterion(outputs, data[0])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

In [None]:
torch.save(model.state_dict(), 'autoencoder.pth')

In [12]:
# model = Autoencoder(input_dim, encoding_dim).to(device)
# model.load_state_dict(torch.load('autoencoder.pth'))

# 2. Experiments

## 1) Comparison with pure classifier

In [None]:
for idx in [1, 2, 3, 4, 5, 6, 12]:
    print("[Test {}: {}] ==========".format(idx, sub_folders[idx-1].split(".")[1]))
    
    train_sampled = load_and_sample_dataset(idx, True, 1000)
    test_sampled = load_and_sample_dataset(idx, False)
    
    train_WiFIDS, test_WiFIDS = preprocessing(train_sampled, test_sampled, True)
    train_df, test_df = preprocessing(train_sampled, test_sampled, False)
    
    train_WiFIDS_features = train_WiFIDS.drop(columns=['Label'])
    train_WiFIDS_label = train_WiFIDS['Label']
    test_WiFIDS_features = test_WiFIDS.drop(columns=['Label'])
    test_WiFIDS_label = test_WiFIDS['Label']
    
    train_df_features = train_df.drop(columns=['Label'])
    train_df_label = train_df['Label']
    test_df_features = test_df.drop(columns=['Label'])
    test_df_label = test_df['Label']
    
    dt_encoding_clf = DecisionTreeClassifier()
    dt_clf = DecisionTreeClassifier()

    WiFIDS = AutoencoderClassifier(model.encoder, dt_encoding_clf)
    
    WiFIDS.fit(train_WiFIDS_features, train_WiFIDS_label)
    dt_clf.fit(train_df_features, train_df_label)
    
    WiFIDS_score = get_model_eval(WiFIDS, test_WiFIDS_features, test_WiFIDS_label)
    dt_clf_score = get_model_eval(dt_clf, test_df_features, test_df_label)
    
    print("==========================")
    
    plot_model_performance({
        'WiFIDS': WiFIDS_score,
        'DecisionTree': dt_clf_score
    })

## 2) Comparison of Total Detection Rate

In [None]:

train_sampled = []
test_sampled = []

for idx in [1, 2, 3, 4, 5, 6, 12]: train_sampled.append(load_and_sample_dataset(idx, True, 1000))
for idx in [1, 2, 3, 4, 5, 6, 12]: test_sampled.append(load_and_sample_dataset(idx, False))

train_sampled = pd.concat(train_sampled, axis=0, ignore_index=True)
test_sampled = pd.concat(test_sampled, axis=0, ignore_index=True)

train_WiFIDS, test_WiFIDS = preprocessing(train_sampled, test_sampled, True)
train_df, test_df = preprocessing(train_sampled, test_sampled, False)

train_WiFIDS_features = train_WiFIDS.drop(columns=['Label'])
train_WiFIDS_label = train_WiFIDS['Label']
test_WiFIDS_features = test_WiFIDS.drop(columns=['Label'])
test_WiFIDS_label = test_WiFIDS['Label']

train_df_features = train_df.drop(columns=['Label'])
train_df_label = train_df['Label']
test_df_features = test_df.drop(columns=['Label'])
test_df_label = test_df['Label']

dt_encoding_clf = DecisionTreeClassifier()
dt_clf = DecisionTreeClassifier()

WiFIDS = AutoencoderClassifier(model.encoder, dt_encoding_clf)

WiFIDS.fit(train_WiFIDS_features, train_WiFIDS_label)
dt_clf.fit(train_df_features, train_df_label)

WiFIDS_score = get_model_eval(WiFIDS, test_WiFIDS_features, test_WiFIDS_label)
dt_clf_score = get_model_eval(dt_clf, test_df_features, test_df_label)

print("==========================")

plot_model_performance({
    'WiFIDS': WiFIDS_score,
    'DecisionTree': dt_clf_score
})

## 3) Compare with other model

In [None]:
for idx in [1, 2, 3, 4, 5, 6, 12]:
    train_sampled = load_and_sample_dataset(idx, True, 1000)
    test_sampled = load_and_sample_dataset(idx, False)
    
    train_WiFIDS, test_WiFIDS = preprocessing(train_sampled, test_sampled, True)
    train_df, test_df = preprocessing(train_sampled, test_sampled, False)
    
    train_WiFIDS_features = train_WiFIDS.drop(columns=['Label'])
    train_WiFIDS_label = train_WiFIDS['Label']
    test_WiFIDS_features = test_WiFIDS.drop(columns=['Label'])
    test_WiFIDS_label = test_WiFIDS['Label']
    
    train_df_features = train_df.drop(columns=['Label'])
    train_df_label = train_df['Label']
    test_df_features = test_df.drop(columns=['Label'])
    test_df_label = test_df['Label']
    
    WiFIDS_rf = AutoencoderClassifier(model.encoder, RandomForestClassifier())
    rf_clf = RandomForestClassifier()

    WiFIDS = AutoencoderClassifier(model.encoder, dt_encoding_clf)
    
    WiFIDS.fit(train_WiFIDS_features, train_WiFIDS_label)
    rf_clf.fit(train_df_features, train_df_label)
    
    WiFIDS_score = get_model_eval(WiFIDS, test_WiFIDS_features, test_WiFIDS_label)
    dt_clf_score = get_model_eval(rf_clf, test_df_features, test_df_label)
    
    print("==========================")
    
    plot_model_performance({
        'WiFIDS': WiFIDS_score,
        'DecisionTree': dt_clf_score
    })

In [None]:
for idx in [1, 2, 3, 4, 5, 6, 12]:
    print("[Test {}: {}] ==========".format(idx, sub_folders[idx-1].split(".")[1]))
    
    train_sampled = load_and_sample_dataset(idx)
    test_sampled = load_and_sample_dataset(idx, False)
    
    train_WiFIDS, test_WiFIDS = preprocessing(train_sampled, test_sampled, True)
    train_df, test_df = preprocessing(train_sampled, test_sampled, False)
    
    train_WiFIDS_features = train_WiFIDS.drop(columns=['Label'])
    train_WiFIDS_label = train_WiFIDS['Label']
    test_WiFIDS_features = test_WiFIDS.drop(columns=['Label'])
    test_WiFIDS_label = test_WiFIDS['Label']
    
    train_df_features = train_df.drop(columns=['Label'])
    train_df_label = train_df['Label']
    test_df_features = test_df.drop(columns=['Label'])
    test_df_label = test_df['Label']
    
    WiFIDS_rf = AutoencoderClassifier(model.encoder, RandomForestClassifier())
    rf_clf = RandomForestClassifier()

    WiFIDS = AutoencoderClassifier(model.encoder, dt_encoding_clf)
    
    WiFIDS.fit(train_WiFIDS_features, train_WiFIDS_label)
    rf_clf.fit(train_df_features, train_df_label)
    
    WiFIDS_score = get_model_eval(WiFIDS, test_WiFIDS_features, test_WiFIDS_label)
    rf_clf_score = get_model_eval(rf_clf, test_df_features, test_df_label)
    
    print("==========================")
    
    plot_model_performance({
        'WiFIDS': WiFIDS_score,
        'DecisionTree': rf_clf_score
    })

## 4) Comparison of Accuracy Based on Number of Training Data

In [None]:
shipsagi = []

for idx in [4, 5, 6, 12]:
    results = {}
    print("[Test {}: {}] ==========".format(idx, sub_folders[idx-1].split(".")[1]))
    
    for num in [3, 5, 10, 20, 50, 100, 150, 200, 300, 500, 800, 1000]:
        
        train_sampled = load_and_sample_dataset(idx, True, num)
        test_sampled = load_and_sample_dataset(idx, False)
        
        train_WiFIDS, test_WiFIDS = preprocessing(train_sampled, test_sampled, True)
        train_df, test_df = preprocessing(train_sampled, test_sampled, False)
        
        train_WiFIDS_features = train_WiFIDS.drop(columns=['Label'])
        train_WiFIDS_label = train_WiFIDS['Label']
        test_WiFIDS_features = test_WiFIDS.drop(columns=['Label'])
        test_WiFIDS_label = test_WiFIDS['Label']
        
        train_df_features = train_df.drop(columns=['Label'])
        train_df_label = train_df['Label']
        test_df_features = test_df.drop(columns=['Label'])
        test_df_label = test_df['Label']
        
        dt_encoding_clf = DecisionTreeClassifier()

        WiFIDS = AutoencoderClassifier(model.encoder, dt_encoding_clf)
        WiFIDS.fit(train_WiFIDS_features, train_WiFIDS_label)
        WiFIDS_score = get_model_eval(WiFIDS, test_WiFIDS_features, test_WiFIDS_label)
        
        results.update({num: WiFIDS_score})
        
    plt.figure(figsize=(10, 6))
    
    shipsagi.append(results)
    
    accuracy = list(data[0] for data in results.values())
    precision = list(data[1] for data in results.values())
    recall = list(data[2] for data in results.values())
    f1 = list(data[3] for data in results.values())
    
    plt.plot(results.keys(), accuracy, marker='o', linestyle='-', color='b', label="Accuracy")
    plt.plot(results.keys(), precision, marker='o', linestyle='-', color='r', label="Precision")
    plt.plot(results.keys(), recall, marker='o', linestyle='-', color='g', label="Recall")
    plt.plot(results.keys(), f1, marker='o', linestyle='-', color='m', label="F1")
    
    
    plt.title('Accuracy vs Number of Training Rows')
    plt.xlabel('Number of Training Rows')
    plt.ylabel('Score')
    plt.ylim(0, 1)
    plt.grid(True)
    plt.show()

In [38]:
a, s, d, f = shipsagi

In [40]:
def average_sum(data1, data2):
    avg_sum = {}
    for key in data1:
        avg_sum[key] = [(v1 + v2) / 2 for v1, v2 in zip(data1[key], data2[key])]
    return avg_sum

In [None]:
results = a

plt.figure(figsize=(10, 6))

accuracy = list(data[0] for data in results.values())
precision = list(data[1] for data in results.values())
recall = list(data[2] for data in results.values())
f1 = list(data[3] for data in results.values())

plt.plot(results.keys(), accuracy, marker='o', linestyle='-', color='b', label="Accuracy")
plt.plot(results.keys(), precision, marker='o', linestyle='-', color='r', label="Precision")
plt.plot(results.keys(), recall, marker='o', linestyle='-', color='g', label="Recall")
plt.plot(results.keys(), f1, marker='o', linestyle='-', color='m', label="F1")


plt.title('Accuracy vs Number of Training Rows')
plt.xlabel('Number of Training Rows')
plt.ylabel('Score')
plt.ylim(0, 1)
plt.grid(True)
plt.show()