In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix



In [8]:
from google.colab import files
upload = files.upload()
df = pd.read_csv("cryptography_dataset_processed.csv")


Saving cryptography_dataset_processed.csv to cryptography_dataset_processed (2).csv


AttributeError: 'dict' object has no attribute 'columns'

In [9]:
df = df[~df['Algorithm'].isin(['ElGamal', 'Serpent'])]

print(f"Dataset after removing ElGamal and Serpent: {df.shape}")

Dataset after removing ElGamal and Serpent: (100000, 5)


In [10]:
# Feature 1: Ciphertext Length
df["CipherLength"] = df["Ciphertext"].apply(len)

# Feature 2: Hex Ratio - Ratio of hex characters
df["HexRatio"] = df["Ciphertext"].apply(lambda x: sum(c in "abcdefABCDEF" for c in x) / len(x))

# Feature 3: Entropy Calculation
def calculate_entropy(text):
    probabilities = [text.count(c) / len(text) for c in set(text)]
    return -sum(p * np.log2(p) for p in probabilities)

df["Entropy"] = df["Ciphertext"].apply(calculate_entropy)


In [11]:
# Encode the algorithm labels into numerical values
label_encoder = LabelEncoder()
df["AlgorithmEncoded"] = label_encoder.fit_transform(df["Algorithm"])
print("Algorithms:", label_encoder.classes_)


Algorithms: ['AES' 'Blowfish' 'Camellia' 'ChaCha20' 'DES' 'ECC' 'GOST' 'RC4' 'RSA'
 'Triple DES']


In [12]:
# Define features and target
features = df[["CipherLength", "HexRatio", "Key Length (bits)", "Entropy"]]
target = df["AlgorithmEncoded"]


In [13]:
# Split the data into training and test sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Convert to NumPy arrays for TensorFlow
X_train, X_test, y_train, y_test = map(np.array, (X_train, X_test, y_train, y_test))

# Print shapes for verification
print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)


Training data shape: (80000, 4)
Test data shape: (20000, 4)


In [14]:
# Define a Sequential Neural Network Model
model = Sequential([
    Dense(128, activation="relu", input_shape=(X_train.shape[1],)),  # Input layer with 128 neurons
    Dropout(0.2),  # Prevent overfitting
    Dense(64, activation="relu"),  # First hidden layer
    Dropout(0.2),
    Dense(32, activation="relu"),  # Second hidden layer
    Dense(len(label_encoder.classes_), activation="softmax")  # Output layer with softmax activation
])

# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# Print the Model Summary
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
# Train the model for 50 epochs with a batch size of 16
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/50
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.3635 - loss: 1.6281 - val_accuracy: 0.8939 - val_loss: 0.2191
Epoch 2/50
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.8670 - loss: 0.2455 - val_accuracy: 0.8907 - val_loss: 0.1654
Epoch 3/50
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - accuracy: 0.8852 - loss: 0.1946 - val_accuracy: 0.8997 - val_loss: 0.1560
Epoch 4/50
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 3ms/step - accuracy: 0.8876 - loss: 0.1873 - val_accuracy: 0.8923 - val_loss: 0.1565
Epoch 5/50
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.8871 - loss: 0.1854 - val_accuracy: 0.8963 - val_loss: 0.1501
Epoch 6/50
[1m5000/5000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 3ms/step - accuracy: 0.8873 - loss: 0.1822 - val_accuracy: 0.8916 - val_loss: 0.1629
Epoch 7/50

In [16]:
# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc:.2f}")


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9040 - loss: 0.1439
Test Accuracy: 0.90


In [17]:
# Predict on the test set
y_pred = np.argmax(model.predict(X_test), axis=1)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Print confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step

Classification Report:
              precision    recall  f1-score   support

         AES       0.52      0.91      0.66      2100
    Blowfish       1.00      1.00      1.00      1973
    Camellia       0.99      1.00      1.00      1992
    ChaCha20       1.00      1.00      1.00      1974
         DES       1.00      1.00      1.00      2023
         ECC       1.00      1.00      1.00      1917
        GOST       1.00      1.00      1.00      2032
         RC4       0.49      0.09      0.16      1951
         RSA       1.00      1.00      1.00      2000
  Triple DES       1.00      1.00      1.00      2038

    accuracy                           0.90     20000
   macro avg       0.90      0.90      0.88     20000
weighted avg       0.90      0.90      0.88     20000


Confusion Matrix:
[[1904    0    6    0    0    0    0  190    0    0]
 [   0 1973    0    0    0    0    0    0    0    0]
 [   0    0 1991 