In [48]:
import pandas as pd
import numpy as np
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.multiclass import OneVsRestClassifier

In [49]:
pass_file = './dataset/passwd_data.csv'
data = pd.read_csv(pass_file, sep =',', on_bad_lines='skip', header=0)
data = data.dropna()
passwords = data.values.tolist()

print(data.columns)
print(data['strength'].unique())
print(data.head())

Index(['password', 'strength'], dtype='object')
[1 2 0]
      password  strength
0     kzde5577         1
1     kino3434         1
2    visi7k1yr         1
3     megzy123         1
4  lamborghin1         1


In [50]:

random.shuffle(passwords)
y = data.iloc[:, 1].values
allpasswd = data.iloc[:,0].values

In [51]:
vectorizer = TfidfVectorizer(token_pattern=r'.')
X = vectorizer.fit_transform(allpasswd)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [52]:
lgs = OneVsRestClassifier(LogisticRegression(penalty='l2'))
lgs.fit(X_train, y_train)
print(lgs.score(X_test, y_test))

0.8132728032972941


In [53]:
X_predict_raw = [
    'john_doe123', 'john_doe!@#', 'johnDoe2024', 'doeJohn_789',
    'securepass99', 'Pa$$w0rd123!', 'qwerty!@#$%', 'random_9823*',
    'hackerproof_42', 'p@ssW0rd9876', 'letmein_567', 'strongP@ss2025',
    'abcdefg123!', 'ghijklmnop!@#$', 'user2024_secure', 'myP@ssw0rd!',
    'super_secure_999', 'dontguessme_123', 'qazwsxedc2024!', 'pass_!@#_safe', '123456', 'abcdef'
]

X_predict = vectorizer.transform(X_predict_raw)
y_Predict = lgs.predict(X_predict)

# 라벨 매핑 (숫자 → 문자열)
label_mapping = {0: 'easy', 1: 'medium', 2: 'strong'}
y_Predict_mapped = [label_mapping[pred] for pred in y_Predict]  # 변환 적용

# 출력
for password, prediction in zip(X_predict_raw, y_Predict_mapped):
    print(f"🔹 Password: {password} → Predicted Strength: {prediction}")


🔹 Password: john_doe123 → Predicted Strength: strong
🔹 Password: john_doe!@# → Predicted Strength: strong
🔹 Password: johnDoe2024 → Predicted Strength: medium
🔹 Password: doeJohn_789 → Predicted Strength: strong
🔹 Password: securepass99 → Predicted Strength: medium
🔹 Password: Pa$$w0rd123! → Predicted Strength: strong
🔹 Password: qwerty!@#$% → Predicted Strength: strong
🔹 Password: random_9823* → Predicted Strength: strong
🔹 Password: hackerproof_42 → Predicted Strength: strong
🔹 Password: p@ssW0rd9876 → Predicted Strength: strong
🔹 Password: letmein_567 → Predicted Strength: strong
🔹 Password: strongP@ss2025 → Predicted Strength: strong
🔹 Password: abcdefg123! → Predicted Strength: strong
🔹 Password: ghijklmnop!@#$ → Predicted Strength: strong
🔹 Password: user2024_secure → Predicted Strength: medium
🔹 Password: myP@ssw0rd! → Predicted Strength: strong
🔹 Password: super_secure_999 → Predicted Strength: medium
🔹 Password: dontguessme_123 → Predicted Strength: strong
🔹 Password: qazwsxed