<a href="https://colab.research.google.com/github/maryamelnahas/CSEN711-MS1-Weir-Algorithm/blob/main/BINF711_MS1_Password_Strength_Analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
import numpy as np
import math
import os
from google.colab import files
import hashlib
import secrets

# in-memory user database
user_database = {}

# This dictionary (loaded from your file) acts as the attacker's knowledge
# We define it globally so all functions can access it after it's loaded.
attacker_dictionary = {}

# Max dictionary size from your code
MAX_DICT_SIZE = 50000

# Password policy settings
MAX_FAILED_ATTEMPTS = 3
PASSWORD_EXPIRY_COUNT = 10 # Number of valid logins before expiry

# Map for reversing common l33t-speak substitutions
L33T_MAP = {
    '@': 'a', '4': 'a',
    '8': 'b',
    '(': 'c',
    '3': 'e',
    '9': 'g', '6': 'g',
    '1': 'i', '!': 'i', '|': 'i',
    '0': 'o',
    '5': 's', '$': 's',
    '7': 't', '+': 't',
    'z': 's'
}

# Thresholds are based on log2(Guess Count) for mapping.
STRENGTH_THRESHOLDS = {
    'very weak': 15,     # < ~32,000 guesses
    'weak': 25,          # < ~33 million guesses
    'medium': 35,        # < ~34 billion guesses
    'strong': 50,        # < ~1 quadrillion guesses
    'very strong': 50    # > ~1 quadrillion guesses
}

# --- 1. Password Strength Analyzer (Your Existing Code) ---

def load_ranked_dictionary(file_name, max_size):
    """Loads the attacker's dictionary from the uploaded file."""
    print(f"Loading ranked dictionary from {file_name}...")
    passwords = {}
    rank = 1
    try:
        with open(file_name, 'r', encoding='utf-8', errors='ignore') as f:
            for line in f:
                if rank > max_size:
                    break
                password = line.strip().split('\t')[0].lower()
                if password and password not in passwords:
                    passwords[password] = rank
                    rank += 1
        print(f"Loaded {len(passwords)} unique passwords into ranked dictionary.")
        return passwords
    except FileNotFoundError:
        print(f"Error: File '{file_name}' not found. Using a small placeholder dictionary.")
        return {"password": 1, "123456": 2, "qwerty": 3, "secret": 4, "iloveyou": 5}


def demangle_l33t(password):
    """Reverses common l33t-speak substitutions."""
    password_lower = password.lower()
    demangled = "".join(L33T_MAP.get(char, char) for char in password_lower)
    return demangled

def get_brute_force_entropy(password):
    """Calculates the brute-force guess count."""
    charset_size = 0
    if any(c.islower() for c in password): charset_size += 26
    if any(c.isupper() for c in password): charset_size += 26
    if any(c.isdigit() for c in password): charset_size += 10
    if any(c in "!@#$%^&*()_+-=[]{}|;:,.<>?~" for c in password): charset_size += 32
    if charset_size == 0: return 1.0
    return float(charset_size ** len(password))


def calculate_password_complexity(password, dictionary):
    """Estimates password complexity (guess count)"""
    if not password: return 1.0
    password_lower = password.lower()
    guess_counts = [get_brute_force_entropy(password)]

    # Attack 1: Direct Dictionary
    if password_lower in dictionary:
        guess_counts.append(dictionary[password_lower])
    # Attack 2: Capitalization
    if password_lower in dictionary and password != password_lower:
        guess_counts.append(dictionary[password_lower] * 10)
    # Attack 3: L33t-speak
    demangled_password = demangle_l33t(password)
    if demangled_password in dictionary:
        guess_counts.append(dictionary[demangled_password] * 100)
    # Attack 4: Reversed
    reversed_password = password_lower[::-1]
    if reversed_password in dictionary:
        guess_counts.append(dictionary[reversed_password] * 50)

    return max(1.0, min(guess_counts))


def get_strength_rating(guess_count):
    """Maps the final guess count to a human-readable strength rating."""
    try:
        complexity_bits = math.log2(guess_count)
    except ValueError:
        complexity_bits = 0

    if complexity_bits < STRENGTH_THRESHOLDS['very weak']: return "Very Weak"
    elif complexity_bits < STRENGTH_THRESHOLDS['weak']: return "Weak"
    elif complexity_bits < STRENGTH_THRESHOLDS['medium']: return "Medium"
    elif complexity_bits < STRENGTH_THRESHOLDS['strong']: return "Strong"
    else: return "Very Strong"

# --- 2. Hashing and Verification (New) ---

def get_password_hash(password, salt):
    """Hashes a password with a salt using PBKDF2."""
    # PBKDF2 is a standard for securely hashing passwords.
    # 100000 iterations is a good minimum.
    return hashlib.pbkdf2_hmac('sha256', password.encode('utf-8'), salt, 100000)

def verify_password(stored_hash, stored_salt, provided_password):
    """Checks if the provided password matches the stored hash."""
    new_hash = get_password_hash(provided_password, stored_salt)
    return secrets.compare_digest(new_hash, stored_hash)

# --- 3. Login System Functions (New) ---

def sign_up(username, password):
    """
    Creates a new user account.
    Enforces the password strength policy.
    """
    global user_database, attacker_dictionary

    # Check if username is taken
    if username in user_database:
        return f"🚨 Error: Username '{username}' already exists."

    # --- Password Policy Enforcement ---
    guess_count = calculate_password_complexity(password, attacker_dictionary)
    strength = get_strength_rating(guess_count)

    if strength in ["Very Weak", "Weak"]:
        return f"🚨 Error: Password is too weak ({strength}). Please choose a stronger password."
    # --- End of Policy ---

    # Generate a new salt and hash the password
    salt = os.urandom(16)
    password_hash = get_password_hash(password, salt)

    # Store the new user
    user_database[username] = {
        "password_hash": password_hash,
        "salt": salt,
        "is_locked": False,
        "failed_attempts": 0,
        "logins_remaining": PASSWORD_EXPIRY_COUNT
    }

    return f"✅ Success: Account '{username}' created. (Strength: {strength})"

def login(username, password):
    """
    Logs in a user, checking for lockouts, expiry, and correct password.
    """
    global user_database

    # 1. Check if user exists
    if username not in user_database:
        return "Login Failed: Invalid username or password." # Generic error

    user_data = user_database[username]

    # 2. Check for lockout
    if user_data["is_locked"]:
        return "Login Failed: Account is locked due to too many failed attempts."

    # 3. Check for password expiry
    if user_data["logins_remaining"] <= 0:
        return "Login Failed: Password has expired. Please reset."

    # 4. Verify the password
    if verify_password(user_data["password_hash"], user_data["salt"], password):
        # --- SUCCESS ---
        # Reset failed attempts
        user_data["failed_attempts"] = 0
        # Decrement expiry counter
        user_data["logins_remaining"] -= 1

        return f"✅ Login Successful. You have {user_data['logins_remaining']} logins remaining."

    else:
        # --- FAILURE ---
        user_data["failed_attempts"] += 1

        # Check if this failure triggers a lockout
        if user_data["failed_attempts"] >= MAX_FAILED_ATTEMPTS:
            user_data["is_locked"] = True
            return "Login Failed: Invalid password. Your account is now locked."

        attempts_left = MAX_FAILED_ATTEMPTS - user_data['failed_attempts']
        return f"Login Failed: Invalid password. You have {attempts_left} attempt(s) left."

def generate_random_strong_password():
    """
    Generates a new, random password that is guaranteed
    to pass the password strength policy.
    """
    global attacker_dictionary

    print("Generating a strong password...")

    while True:
        # Generate a 16-character password with letters, digits, and symbols
        alphabet = secrets.token_urlsafe(16) # A good mix of chars
        password = ''.join(secrets.choice(alphabet) for i in range(16))

        # Verify its strength
        guess_count = calculate_password_complexity(password, attacker_dictionary)
        strength = get_strength_rating(guess_count)

        if strength in ["Strong", "Very Strong"]:
            print("...Success! Found a strong password.")
            return password
        # If not strong, the loop continues and tries again

# --- Main Execution Block (for Colab) ---

if __name__ == '__main__':

    # 1. Upload the Dataset
    print("Please upload your password dataset file (e.g., production.tsv):")
    uploaded = files.upload()

    if not uploaded:
        print("\n--- ERROR ---")
        print("No file was uploaded. Please re-run the cell and select the file.")
    else:
        actual_filename = list(uploaded.keys())[0]
        print(f"\nUser uploaded file '{actual_filename}' with length {len(uploaded[actual_filename])} bytes")

        # 2. Load the Attacker's Dictionary (Globally)
        attacker_dictionary = load_ranked_dictionary(actual_filename, MAX_DICT_SIZE)

        # --- 3. DEMONSTRATION OF LOGIN SYSTEM ---

        print("\n--- 1. Password Generator Demo ---")
        # Generate a strong password that will be accepted
        generated_pass = generate_random_strong_password()
        print(f"Generated Password: {generated_pass}")

        print("\n--- 2. Sign-Up Demo (Enforcing Policy) ---")
        # Try to sign up with a weak password
        print(sign_up("user_weak", "password"))
        # Try to sign up with the strong, generated password
        print(sign_up("user_strong", generated_pass))

        print("\n--- 3. Login Lockout Demo (user_strong) ---")
        # Try to log in with the wrong password 3 times
        print(f"Attempt 1: {login('user_strong', 'wrongpassword')}")
        print(f"Attempt 2: {login('user_strong', 'wrongpassword')}")
        print(f"Attempt 3: {login('user_strong', 'wrongpassword')}")

        print("\n...Account should now be locked...")

        # Try to log in with the *correct* password
        print(f"Attempt 4: {login('user_strong', generated_pass)}")

        print("\n--- 4. Password Expiry Demo ---")
        # Create a new user for this demo
        print(sign_up("user_expiry", "MyPasswordIsMedium123!"))

        # Log in 10 times to show the counter decreasing
        for i in range(PASSWORD_EXPIRY_COUNT):
            print(f"Login {i+1}: {login('user_expiry', 'MyPasswordIsMedium123!')}")

        # Try to log in one more time (the 11th time)
        print(f"Login 11: {login('user_expiry', 'MyPasswordIsMedium123!')}")

Please upload your password dataset file (e.g., production.tsv):


Saving production.tsv to production (5).tsv

User uploaded file 'production (5).tsv' with length 11818456 bytes
Loading ranked dictionary from production (5).tsv...
Loaded 50000 unique passwords into ranked dictionary.

--- 1. Password Generator Demo ---
Generating a strong password...
...Success! Found a strong password.
Generated Password: UQmgIQBNbQQzNUzm

--- 2. Sign-Up Demo (Enforcing Policy) ---
🚨 Error: Password is too weak (Very Weak). Please choose a stronger password.
✅ Success: Account 'user_strong' created. (Strength: Very Strong)

--- 3. Login Lockout Demo (user_strong) ---
Attempt 1: Login Failed: Invalid password. You have 2 attempt(s) left.
Attempt 2: Login Failed: Invalid password. You have 1 attempt(s) left.
Attempt 3: Login Failed: Invalid password. Your account is now locked.

...Account should now be locked...
Attempt 4: Login Failed: Account is locked due to too many failed attempts.

--- 4. Password Expiry Demo ---
✅ Success: Account 'user_expiry' created. (Stren