In [9]:
import numpy as np
import pandas as pd
import csv
import os
import glob

In [10]:
timeouts = [0.5, 1, 2, 3, 4, 5, 6, 10, 30, 60]

# Process data extracted by Argus

In [3]:
def process_benign(out_dir):

    files = glob.glob(out_dir+'/Benign/features/*.csv')
    dfs = []
    for file in files:
        df = pd.read_csv(file)
        dfs.append(df)


    # Combine all DataFrames into a single DataFrame
    combined_df_normal = pd.concat(dfs, ignore_index=True)
    combined_df_normal['label'] = 'normal'

    # Store the combined DataFrame in a CSV file
    #combined_df_normal.to_csv(output_file, index=False)

    print(f'Combined {len(files)} conn.log files ...')
    return combined_df_normal


In [4]:
def process_malware(out_dir):

    files = glob.glob(out_dir+'/Malware/features/*.csv')
    dfs = []
    for file in files:
        parts = file.split('/')
        attack_name = parts[-1].split('_')[0]
        df = pd.read_csv(file)
        df['label'] = attack_name
        dfs.append(df)

    # Combine all DataFrames into a single DataFrame
    combined_df_anomaly = pd.concat(dfs, ignore_index=True)

    # Store the combined DataFrame in a CSV file
    #combined_df_anomaly.to_csv(output_file, index=False)

    print(f'Combined {len(files)} conn.log files ...')
    return combined_df_anomaly


In [6]:
# Combine DataFrames
for timeout in timeouts:
    print('Processing timeout : ', timeout , '... ')
    out_dir = f'/home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/Datasets/ustc/Argus/timeout{timeout}'
    combined_df_normal = process_benign(out_dir)
    combined_df_anomaly = process_malware(out_dir)

    combined_df = pd.concat([combined_df_normal, combined_df_anomaly], ignore_index=True)

    # Save to CSV
    combined_df.to_csv(f'{out_dir}/USTC-TFC16_argus_{timeout}.csv', index=False, header=True)
    
    print(combined_df['label'].value_counts())
    print(f'Combined DataFrame saved to {out_dir}')
    print('_____________________________________________________________')

Processing timeout :  0.5 ... 
Combined 14 conn.log files ...


  combined_df_anomaly = process_malware(out_dir)


Combined 10 conn.log files ...
normal     309917
Geodo      132835
Neris      104237
Cridex      98425
Virut       52217
Miuref      34432
Htbot       18919
Zeus        18751
Shifu       18241
Nsis-ay     12745
Tinba       11279
Name: label, dtype: int64
Combined DataFrame saved to /home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/Datasets/ustc/Argus/timeout0.5
_____________________________________________________________
Processing timeout :  1 ... 
Combined 14 conn.log files ...
Combined 10 conn.log files ...
normal     309917
Geodo      132835
Neris      104079
Cridex      98425
Virut       52217
Miuref      34432
Htbot       18919
Zeus        18751
Shifu       18241
Nsis-ay     12745
Tinba       11279
Name: label, dtype: int64
Combined DataFrame saved to /home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/Datasets/ustc/Argus/timeout1
_____________________________________________________________
Processing timeout :  2 ... 
Combined 

In [8]:

print('Processing timeout default: ')
out_dir = f'/home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/Datasets/ustc/Argus/default'
combined_df_normal = process_benign(out_dir)
combined_df_anomaly = process_malware(out_dir)

combined_df = pd.concat([combined_df_normal, combined_df_anomaly], ignore_index=True)

# Save to CSV
combined_df.to_csv(f'{out_dir}/USTC-TFC16_argus_default.csv', index=False, header=True)

print(combined_df['label'].value_counts())
print(f'Combined DataFrame saved to {out_dir}')
print('_____________________________________________________________')

Processing timeout default: 
Combined 14 conn.log files ...


  combined_df_anomaly = process_malware(out_dir)


Combined 10 conn.log files ...
normal     309917
Geodo      132835
Neris      104237
Cridex      98425
Virut       52217
Miuref      34432
Htbot       18919
Zeus        18751
Shifu       18241
Nsis-ay     12745
Tinba       11279
Name: label, dtype: int64
Combined DataFrame saved to /home/meryem.janati/lustre/nlp_team-um6p-st-sccs-id7fz1zvotk/IDS/janati/IDS/Datasets/ustc/Argus/default
_____________________________________________________________
