In [7]:
#pip install streamlit scikit-learn sqlalchemy pandas pymupdf python-docx


In [11]:
%%writefile predictor.py
import streamlit as st
import pandas as pd
import sqlite3
import fitz  # PyMuPDF
import re
from docx import Document
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sqlalchemy import create_engine, text
import os

# ------------------ Train Sample Model ------------------
@st.cache_resource
def train_model():
    data = {
        'resume_text': [
            "Experienced in developing Python applications using Django and Flask frameworks. Skilled in machine learning and TensorFlow.",
            "Skilled in preparing financial statements, managing accounts, Excel, and auditing with strong financial knowledge.",
            "Expert in digital marketing, social media strategy, SEO, and Google Ads.",
            "Handled civil engineering projects including AutoCAD, project estimates, structural analysis, and site supervision.",
            "Led teams in customer service, communication, sales, and CRM tools."
        ],
        'role': [
            'ML Engineer',
            'Human Resources',
            'Marketing Specialist',
            'Data Scientist',
            'Customer Support'
        ]
    }

    df = pd.DataFrame(data)
    pipeline = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LogisticRegression())
    ])
    pipeline.fit(df['resume_text'], df['role'])
    return pipeline
# ------------------ Extract Text From Resume ------------------
def extract_text(file):
    if file.name.endswith('.pdf'):
        doc = fitz.open(stream=file.read(), filetype="pdf")
        return "\n".join([page.get_text() for page in doc])
    elif file.name.endswith('.docx'):
        doc = Document(file)
        return "\n".join([para.text for para in doc.paragraphs])
    else:
        return ""

# ------------------ Extract Candidate Name ------------------
def extract_name(text):
    lines = text.strip().split('\n')
    for line in lines:
        if line.strip():
            return re.sub(r'[^A-Za-z\s]', '', line.strip())
    return "Unknown"

# ------------------ Extract All Skills ------------------
def extract_skills(text):
    skills_found = re.findall(r'\b(?:python|django|flask|ml|machine learning|data analysis|excel|accounting|seo|marketing|social media|autocad|finance|google ads|sales|communication|crm|tensorflow|pandas|numpy|keras|sql|power bi|auditing|leadership|teamwork|analytics|problem solving|customer service|fast learning)\b', text.lower())
    return ', '.join(sorted(set([skill.title() for skill in skills_found])))

# ------------------ Setup Database ------------------
def setup_database():
    db_path = "resume_predictions.db"
    if not os.path.exists(db_path):
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS resume_predictions (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                candidate_name TEXT,
                predicted_role TEXT,
                extracted_skills TEXT,
                status TEXT
            )
        ''')
        conn.commit()
        conn.close()
    return create_engine(f"sqlite:///{db_path}", echo=False)

# ------------------ Save Prediction ------------------
def save_prediction(engine, candidate_name, role, skills, status):
    with engine.begin() as conn:
        conn.execute(text('''
            INSERT INTO resume_predictions (candidate_name, predicted_role, extracted_skills, status)
            VALUES (:name, :role, :skills, :status)
        '''), {
            "name": candidate_name,
            "role": role,
            "skills": skills,
            "status": status
        })

# ------------------ Streamlit App ------------------
st.set_page_config(page_title="📄 Resume Job Role Predictor", layout="centered")
st.title("📄 Resume Job Role Predictor")

model = train_model()
engine = setup_database()

file = st.file_uploader("📁 Upload a resume (PDF or DOCX)", type=["pdf", "docx"])

if file:
    resume_text_full = extract_text(file)
    candidate_name = extract_name(resume_text_full)
    extracted_skills = extract_skills(resume_text_full)
    predicted_role = model.predict([resume_text_full])[0]

    st.subheader("📄 Resume Preview")
    st.text_area("Full Resume Content", resume_text_full, height=300)

    st.subheader("🧠 Extracted Skills")
    st.write(extracted_skills if extracted_skills else "No clear skills found.")

    st.subheader("🔮 Predicted Job Role")
    st.write(f"**{predicted_role}**")

    # Match logic and status message
    if extracted_skills:
        status = "Match"
        st.success(f"✅ Congratulations {candidate_name}, you are predicted for the role of **{predicted_role}** based on your resume.\n\n🎯 All the best for your career!")
    else:
        status = "Not Match"
        st.error(f"❌ Unfortunately, {candidate_name}, your profile is not applicable for this role. Thank you for your interest.")

    # Save prediction to DB
    save_prediction(engine, candidate_name, predicted_role, extracted_skills, status)

# ------------------ Display Recent Predictions ------------------
st.subheader("📝 Recent Predictions")
try:
    with engine.begin() as conn:
        result = conn.execute(text('''
            SELECT id, candidate_name, predicted_role, extracted_skills, status
            FROM resume_predictions
            ORDER BY id DESC LIMIT 10
        '''))
        rows = result.fetchall()

    if rows:
        df = pd.DataFrame(rows, columns=["ID", "Candidate", "Predicted Role", "Skills", "Status"])
        st.dataframe(df)

        delete_id = st.number_input("Enter ID to delete:", min_value=1, step=1)
        if st.button("Delete Record"):
            with engine.begin() as conn:
                conn.execute(text("DELETE FROM resume_predictions WHERE id = :id"), {"id": delete_id})
            st.success(f"🗑️ Record with ID {delete_id} deleted successfully!")
            st.experimental_rerun()
    else:
        st.info("No predictions found.")
except Exception as e:
    st.error(f"Failed to load predictions: {e}")


Overwriting predictor.py
