In [None]:
import pandas as pd
df = pd.read_csv('room_1min.csv')

df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP'])

grouped = df.groupby(['SADDR', 'DADDR', 'PROTOCOL'])

def calculate_inter_packet_time(df_group):
    df_group = df_group.sort_values(by='TIMESTAMP')
    inter_packet_time = (df_group['TIMESTAMP'] - df_group['TIMESTAMP'].shift()).dropna().mean()
    return inter_packet_time.total_seconds() * 1e3 if inter_packet_time is not pd.NaT else None

network_flows = grouped.agg({'I-COUNT': 'sum', 'O-COUNT': 'sum'})

network_flows['SBYTES'] = grouped.apply(lambda x: x[x['TYPE'] == 'IN']['BYTES'].sum())
network_flows['SBYTES_MEAN'] = grouped.apply(lambda x: x[x['TYPE'] == 'IN']['BYTES'].mean())
network_flows['DBYTES'] = grouped.apply(lambda x: x[x['TYPE'] == 'OUT']['BYTES'].sum())
network_flows['DBYTES_MEAN'] = grouped.apply(lambda x: x[x['TYPE'] == 'OUT']['BYTES'].mean())

network_flows['TIME_DIFF_SEC'] = grouped.apply(lambda x: (x['TIMESTAMP'].max() - x['TIMESTAMP'].min()).total_seconds())

network_flows['SINKPT'] = grouped.apply(lambda x: calculate_inter_packet_time(x[x['TYPE'] == 'IN']))
network_flows['DINKPT'] = grouped.apply(lambda x: calculate_inter_packet_time(x[x['TYPE'] == 'OUT']))

network_flows.reset_index(inplace=True)
print(network_flows)


In [16]:
import pandas as pd

df = pd.read_csv('room_30s.csv')
df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP'])
grouped = df.groupby(['SADDR', 'DADDR', 'PROTOCOL'])
print(df.columns)


Index(['TYPE', 'SADDR', 'DADDR', 'PROTOCOL', 'I-COUNT', 'O-COUNT', 'TIMESTAMP',
       'BYTES'],
      dtype='object')


In [22]:
def calculate_inter_packet_time(df_group, packet_type):
    if df_group.empty:
        return 0
    df_group = df_group.sort_values(by='TIMESTAMP')
    inter_packet_time = (df_group['TIMESTAMP'] - df_group['TIMESTAMP'].shift()).dropna().abs().mean()
    return inter_packet_time.total_seconds() * 1e3 if inter_packet_time is not pd.NaT else 0

network_flows = grouped.agg({'I-COUNT': 'sum', 'O-COUNT': 'sum'})
network_flows['spkts'] = network_flows['I-COUNT']
network_flows['dpkts'] = network_flows['O-COUNT']
network_flows['sbytes'] = grouped.apply(lambda x: x[x['TYPE'] == 'IN']['BYTES'].sum())
network_flows['dbytes'] = grouped.apply(lambda x: x[x['TYPE'] == 'OUT']['BYTES'].sum())
network_flows['smean'] = grouped.apply(lambda x: x[x['TYPE'] == 'IN']['BYTES'].mean())
network_flows['dmean'] = grouped.apply(lambda x: x[x['TYPE'] == 'OUT']['BYTES'].mean())
network_flows['sinkpt'] = grouped.apply(lambda x: calculate_inter_packet_time(x[x['TYPE'] == 'IN'], 'IN'))
network_flows['dinkpt'] = grouped.apply(lambda x: calculate_inter_packet_time(x[x['TYPE'] == 'OUT'], 'OUT'))

# Drop 'SADDR' and 'DADDR' columns and rename 'PROTOCOL' column to 'PROTO'
network_flows.drop(columns=['I-COUNT' , 'O-COUNT'], inplace=True)
network_flows.rename(columns={'PROTOCOL': 'proto'}, inplace=True)

# Add 'ID' column
network_flows['id'] = range(1, len(network_flows) + 1)

# Make 'id' the first column
cols = list(network_flows.columns)
cols = [cols[-1]] + cols[:-1]
network_flows = network_flows[cols]

# Reset index without adding a new column
network_flows.reset_index(drop=True, inplace=True)

print(network_flows)

    id  spkts  dpkts  sbytes  dbytes   smean   dmean     sinkpt     dinkpt
0    1      1      0      72       0    72.0     NaN      0.000      0.000
1    2      0      1       0      72     NaN    72.0      0.000      0.000
2    3     61     61     416     364    52.0    52.0   4140.572   4825.170
3    4      9     14     156     156    52.0    52.0   1024.432   1023.978
4    5     26     13     795     477   159.0   159.0    338.715    709.565
5    6     13     26     477     795   159.0   159.0    709.566    338.718
6    7     40     42   29922   29922  4987.0  4987.0   1126.381   1126.385
7    8      1      2      52      52    52.0    52.0      0.000      0.000
8    9      1      2      52      52    52.0    52.0      0.000      0.000
9   10      1      2      52      52    52.0    52.0      0.000      0.000
10  11     73    218     483    1173    69.0    69.0   2721.725   1642.690
11  12    209    180    1200    1040    80.0    80.0   1514.715   1724.312
12  13      1      2     