In [2]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 📥 Step 1: Load CSVs for multiple users
def load_multiple_user_data():
    num_users = int(input("Enter number of users (e.g. 4): "))
    dataframes = []
    for i in range(num_users):
        path = input(f"Enter CSV path for user {i+1} (e.g., user{i+1}.csv): ").strip('"').strip()
        if not os.path.exists(path):
            print(f"❌ File '{path}' does not exist. Skipping.")
            continue
        df = pd.read_csv(path)
        df['user'] = f'user{i+1}'  # Assign user label if not in file
        dataframes.append(df)
    return pd.concat(dataframes, ignore_index=True) if dataframes else pd.DataFrame()

# 📦 Step 2: Process the dataset
def prepare_data(df):
    if df.empty:
        raise ValueError("No valid data loaded.")
    
    # 🔍 Remove missing values
    df.dropna(inplace=True)

    # 🎯 Separate features and labels
    X = df.drop(columns=['user'])
    y = df['user']

    # 🔢 Encode labels
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    if len(y_encoded) == 0:
        raise ValueError("No labels found after encoding.")
    
    # 🏷️ One-hot encode labels
    y_categorical = to_categorical(y_encoded)

    # 💾 Save encoder
    joblib.dump(le, r"C:\Users\sagni\Downloads\KeyStrock Security\Data\label_encoder.pkl")

    return X, y_categorical

# 🧠 Step 3: Build a simple model
def build_model(input_shape, num_classes):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# 🚀 Run everything
try:
    df = load_multiple_user_data()
    X, y = prepare_data(df)
    
    model = build_model(X.shape[1], y.shape[1])
    model.fit(X, y, epochs=50, batch_size=32, validation_split=0.2,
              callbacks=[EarlyStopping(patience=5, restore_best_weights=True)])
    
    model.save(r"C:\Users\sagni\Downloads\KeyStrock Security\Data\keystroke_model.h5")
    print("✅ Model training complete and saved.")

except Exception as e:
    print(f"⚠️ Error: {e}")


Enter number of users (e.g. 4):  4
Enter CSV path for user 1 (e.g., user1.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Ankush_Ramteke.csv
Enter CSV path for user 2 (e.g., user2.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Sagnik_Patra.csv
Enter CSV path for user 3 (e.g., user3.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Rahi_Pandey.csv
Enter CSV path for user 4 (e.g., user4.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Annan_Sadr.csv


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.1250 - loss: 1.4175 - val_accuracy: 0.0000e+00 - val_loss: 1.4148
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.1250 - loss: 1.3740 - val_accuracy: 0.0000e+00 - val_loss: 1.4435
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.3125 - loss: 1.3712 - val_accuracy: 0.0000e+00 - val_loss: 1.4688
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step - accuracy: 0.1875 - loss: 1.3925 - val_accuracy: 0.0000e+00 - val_loss: 1.4909
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.3750 - loss: 1.3404 - val_accuracy: 0.0000e+00 - val_loss: 1.5135
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.3750 - loss: 1.3424 - val_accuracy: 0.0000e+00 - val_loss: 1.5372




✅ Model training complete and saved.


In [3]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
import joblib

# Step 1: Input number of users and file paths
num_users = int(input("Enter number of users (e.g. 4): "))
file_paths = []
for i in range(num_users):
    path = input(f"Enter CSV path for user {i+1} (e.g., user{i+1}.csv): ")
    file_paths.append(path)

# Step 2: Load all CSVs and tag with user names
all_data = []
for path in file_paths:
    df = pd.read_csv(path)
    user_name = os.path.splitext(os.path.basename(path))[0]
    df['User'] = user_name
    all_data.append(df)

# Step 3: Combine all into one DataFrame
data = pd.concat(all_data, ignore_index=True)

# Step 4: Encode user labels
label_encoder = LabelEncoder()
data['UserEncoded'] = label_encoder.fit_transform(data['User'])

# Optional: Save encoder for prediction use
joblib.dump(label_encoder, "label_encoder.pkl")

# Step 5: Prepare features and labels
X = data.drop(columns=['User', 'UserEncoded'])
y = data['UserEncoded']

# Step 6: Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save scaler for inference
joblib.dump(scaler, 'scaler.pkl')

# Step 7: One-hot encode the target
y_categorical = to_categorical(y)

# Step 8: Split into training and testing
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_categorical, test_size=0.2, random_state=42)

# Step 9: Define model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(num_users, activation='softmax')
])

# Step 10: Compile and train model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), verbose=1)

# Step 11: Save the model
model.save("keystroke_auth_model.h5")

print("✅ Model training complete and saved.")


Enter number of users (e.g. 4):  4
Enter CSV path for user 1 (e.g., user1.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Annan_Sadr.csv
Enter CSV path for user 2 (e.g., user2.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Rahi_Pandey.csv
Enter CSV path for user 3 (e.g., user3.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Ankush_Ramteke.csv
Enter CSV path for user 4 (e.g., user4.csv):  C:\Users\sagni\Downloads\KeyStrock Security\Data\Sagnik_Patra.csv


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.0625 - loss: 1.5706 - val_accuracy: 0.5000 - val_loss: 1.7310
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - accuracy: 0.1250 - loss: 1.4939 - val_accuracy: 0.5000 - val_loss: 1.6956
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 0.1250 - loss: 1.4207 - val_accuracy: 0.5000 - val_loss: 1.6614
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step - accuracy: 0.1875 - loss: 1.3510 - val_accuracy: 0.5000 - val_loss: 1.6275
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step - accuracy: 0.3125 - loss: 1.2854 - val_accuracy: 0.5000 - val_loss: 1.5940
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step - accuracy: 0.5000 - loss: 1.2240 - val_accuracy: 0.2500 - val_loss: 1.5611
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[



✅ Model training complete and saved.


In [None]:
import pandas as pd
import numpy as np
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder
import joblib

# Paths to your saved model and label encoder
MODEL_PATH = r"C:\Users\sagni\Downloads\KeyStrock Security\Data\keystroke_model.h5"
ENCODER_PATH = r"C:\Users\sagni\Downloads\KeyStrock Security\Data\label_encoder.pkl"

def load_user_input(file_path):
    df = pd.read_csv(file_path)

    # Drop columns that are not features
    if 'user' in df.columns:
        df = df.drop('user', axis=1)

    return df.values

def main():
    # Load the trained model
    print("🔁 Loading model and label encoder...")
    model = load_model(MODEL_PATH)
    label_encoder = joblib.load(ENCODER_PATH)

    # Ask for the path to the new keystroke input CSV
    test_path = input("📄 Enter path to the new keystroke input CSV file: ").strip()

    try:
        test_data = load_user_input(test_path)

        # Predict user
        predictions = model.predict(test_data)
        predicted_classes = np.argmax(predictions, axis=1)
        decoded_users = label_encoder.inverse_transform(predicted_classes)

        print("\n🔍 Prediction Results:")
        for i, user in enumerate(decoded_users):
            print(f"  ➤ Input {i+1} predicted as: {user}")
    except Exception as e:
        print(f"❌ Error: {e}")

if __name__ == "__main__":
    main()




🔁 Loading model and label encoder...
