In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# 1. LOAD THE DATA
df = pd.read_csv('labeled_resumes.csv')

# 2. AUTOMATIC COLUMN CHECK (Fixes the KeyError)
# This looks for 'Label', 'label', or 'quality_score' automatically
if 'Label' in df.columns:
    target_col = 'Label'
elif 'label' in df.columns:
    target_col = 'label'
else:
    # If neither exists, we use the quality score to re-create them
    print("Warning: Label column not found. Re-creating labels from scores...")
    df['label'] = df['quality_score'].apply(lambda x: "Good" if x >= 75 else "Average" if x >= 45 else "Poor")
    target_col = 'label'

print(f"Using '{target_col}' as the target column for AI training.")

# 3. TEXT TO NUMBERS (TF-IDF)
tfidf = TfidfVectorizer(max_features=1000, stop_words='english')
X = tfidf.fit_transform(df['Resume_str']) 
y = df[target_col] # Using the correct column name found above

# 4. SPLIT THE DATA (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. TRAIN THE MODEL (Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42)
print("Training the AI model... this may take a moment.")
model.fit(X_train, y_train)

# 6. EVALUATE
y_pred = model.predict(X_test)

print("\n--- DAY 9 SUCCESS: MODEL PERFORMANCE ---")
print(f"Accuracy Score: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Using 'label' as the target column for AI training.
Training the AI model... this may take a moment.

--- DAY 9 SUCCESS: MODEL PERFORMANCE ---
Accuracy Score: 83.50%

Classification Report:
              precision    recall  f1-score   support

     Average       0.83      0.06      0.11        86
        Poor       0.84      1.00      0.91       411

    accuracy                           0.84       497
   macro avg       0.83      0.53      0.51       497
weighted avg       0.83      0.84      0.77       497

