In [85]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Load dataset
df = pd.read_csv("fullDataset.csv")

# Separate features and target
X = df.drop(columns=['Cipher'])
y = df['Cipher']

# Convert categorical data in X to numeric using LabelEncoder
label_encoders = {}  # To store label encoders for each categorical column
for column in X.columns:
    if X[column].dtype == 'object':  # Check if the column is of type object (string)
        label_encoder = LabelEncoder()
        X[column] = label_encoder.fit_transform(X[column])
        label_encoders[column] = label_encoder

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Initialize and train the model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate accuracy
score = accuracy_score(y_test, predictions)
print(f"Accuracy: {score}")

# Prepare new data for prediction
new_data = {'cipher_text': 'dlgc mq cs ywexsre s hgnrr dlgxo gd amepb reto ambocn er kpj', 'some_numerical_feature': 0.04167}

# Transform categorical data in new data
for column, encoder in label_encoders.items():
    if column in new_data:
        new_data[column] = encoder.transform([new_data[column]])[0]  # Transform and get the single value

# Convert to DataFrame with the same structure as the training data
new_data_df = pd.DataFrame([new_data])

# Ensure the columns match the training data
new_data_df = new_data_df.reindex(columns=X.columns, fill_value=0)  # Fill missing columns with 0 or appropriate values

# Make prediction
prediction = model.predict(new_data_df)
print(f"Prediction: {prediction[0]}")


Accuracy: 0.6360153256704981
Prediction: Vigenere
