In [None]:
import pandas as pd
import requests
from io import StringIO
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [None]:
# load pwlds csv files from github

files = [
    ("https://raw.githubusercontent.com/Infinitode/PWLDS/main/pwlds_very_weak.csv", 0),
    ("https://raw.githubusercontent.com/Infinitode/PWLDS/main/pwlds_weak.csv", 1),
    ("https://raw.githubusercontent.com/Infinitode/PWLDS/main/pwlds_average.csv", 2),
    ("https://raw.githubusercontent.com/Infinitode/PWLDS/main/pwlds_strong.csv", 3),
    ("https://raw.githubusercontent.com/Infinitode/PWLDS/main/pwlds_very_strong.csv", 4)
]

def load_pwlds(files, limit_per_file=None):
    X, y = [], []
    for url, label in files:
        print(f"Downloading {url.split('/')[-1]}...")
        response = requests.get(url)
        response.raise_for_status()
        df = pd.read_csv(StringIO(response.text))
        if limit_per_file:
            df = df.head(limit_per_file)
        X.extend(df['Password'].tolist())
        y.extend([label]*len(df))
    return X, y

print("Loading PWLDS passwords from GitHub...")
X, y = load_pwlds(files, limit_per_file=20000)
print(f"Dataset size: {len(X)}")

In [None]:
# train/test splits
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=True
)

# vectorize data
vectorizer = CountVectorizer(analyzer='char', ngram_range=(1,3), max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# evaluate model
y_pred = model.predict(X_test_vec)
print("\nModel Evaluation:")
print(classification_report(y_test, y_pred))

In [None]:
# prediction (strength, classification, confidence)

def password_strength(password):
    classes = ['Very Weak', 'Weak', 'Average', 'Strong', 'Very Strong']
    vec = vectorizer.transform([password])
    pred = model.predict(vec)[0]
    probs = model.predict_proba(vec)[0]  
    confidence = probs[pred]*100  
    return classes[pred], confidence

In [None]:
# interactive password strength prediction

print("-"*60)
print(" "*18 + "WEAK PASSWORD PREDICTOR")
print("-"*60)

while True:
    print()
    pwd = input("Enter a password (ENTER to exit): ")
    if pwd == "":
        break
    else:
        label, conf = password_strength(pwd)
        print()
        print(f"Password: \t{pwd}")
        print(f"Label: \t\t{label}")
        print(f"Confidence: \t{conf:.2f}%")