In [None]:
import os
import pandas as pd
import numpy as np
from sklearn import tree, linear_model
from sklearn.feature_selection import SelectFromModel
import joblib
import warnings
warnings.filterwarnings(action = 'ignore')
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import confusion_matrix,f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report,accuracy_score

In [None]:
def LoadForecasting_analysis(y_true, y_pred, labels, ymap=None, figsize=(10,10)):
    if ymap != None:
        y_pred = [ymap[yi] for yi in y_pred]
        y_true = [ymap[yi] for yi in y_true]
        labels = [ymap[yi] for yi in labels]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_sum = np.sum(cm, axis=1, keepdims=True)
    cm_perc = cm / cm_sum * 100
    annot = np.empty_like(cm).astype(str)
    nrows, ncols = cm.shape
    for i in range(nrows):
        for j in range(ncols):
            c = cm[i, j]
            p = cm_perc[i, j]
            if i == j:
                s = cm_sum[i]
                annot[i, j] = '%.1f%%\n%d/%d' % (p, c, s)
            elif c == 0:
                annot[i, j] = ''
            else:
                annot[i, j] = '%.1f%%\n%d' % (p, c)
    cm = pd.DataFrame(cm, index=labels, columns=labels)
    cm.index.name = 'Actual'
    cm.columns.name = 'Predicted'
    fig, ax = plt.subplots(figsize=figsize)
    sns.heatmap(cm, annot=annot, fmt='', ax=ax)

In [None]:
df=pd.read_csv('dataset/TrainingSet.csv')

In [None]:
df

In [None]:
df.columns

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df['attack_cat'].unique()

In [None]:
df['attack_cat'].value_counts()


In [None]:
df1=df[(df['attack_cat']=='Normal') | (df['attack_cat']=='Scaling Attack')]

In [None]:
df1

In [None]:
df1['attack_cat'].unique()

In [None]:
df2=df[(df['attack_cat']=='Smooth Curve Attack') | (df['attack_cat']=='Pulse Attack')]

In [None]:
df2

In [None]:
df2['attack_cat'].unique()

In [None]:
final_data=df1.merge(df2,how='outer')  #data=df1.join(df_1,how='outer')

In [None]:
final_data

In [None]:
final_data.describe()

In [None]:
final_data.info()

In [None]:
final_data['attack_cat'].unique()
final_data['attack_cat'].value_counts()

In [None]:
final_data.hist(figsize=(15, 10))
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(final_data['attack_cat'])
plt.title('Attack Category Distribution')
plt.xlabel('Attack Category')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Box plots for numerical variables
plt.figure(figsize=(15, 10))
sns.boxplot(data=final_data.drop(columns=['id', 'attack_cat']))
plt.xticks(rotation=45)
plt.show()


In [None]:
# Correlation matrix
plt.figure(figsize=(12, 8))
sns.heatmap(final_data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix'|)
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(final_data['dur'], final_data['rate'], hue=final_data['attack_cat'])
plt.title('Duration vs Rate')
plt.xlabel('Duration')
plt.ylabel('Rate')
plt.show()


In [None]:
final_data.to_csv('data.csv')