In [2]:
# Step 1: Import Necessary Libraries
import requests
from bs4 import BeautifulSoup
import joblib

# Step 2: Load the Trained Model and TF-IDF Vectorizer
model = joblib.load('random_forest_model.pkl')  # Load the saved model
vectorizer = joblib.load('tfidf_vectorizer.pkl')     # Load the TF-IDF vectorizer

# Step 3: Function to Extract Job Details from a URL
def extract_job_details_from_url(job_url):
    """
    Extracts job details (title, description, requirements, benefits) from a job posting URL.

    Parameters:
        job_url (str): URL of the job posting.

    Returns:
        str: Combined job details text.
    """
    try:
        # Make a request to the job URL
        response = requests.get(job_url)
        response.raise_for_status()  # Raise an error for failed requests
        
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract job details
        title = soup.find('h1').get_text(strip=True) if soup.find('h1') else ''
        description = soup.find('div', {'class': 'description'}).get_text(strip=True) if soup.find('div', {'class': 'description'}) else ''
        requirements = soup.find('div', {'class': 'requirements'}).get_text(strip=True) if soup.find('div', {'class': 'requirements'}) else ''
        benefits = soup.find('div', {'class': 'benefits'}).get_text(strip=True) if soup.find('div', {'class': 'benefits'}) else ''
        
        # Combine all details into a single text
        job_text = f"{title} {description} {requirements} {benefits}"
        return job_text
    except Exception as e:
        print(f"Error extracting job details: {e}")
        return ''

# Step 4: Function to Predict Job Authenticity
def predict_job_from_url(job_url):
    """
    Predicts if a job post from the given URL is fake or real.

    Parameters:
        job_url (str): URL of the job posting.

    Returns:
        str: "Fake Job" or "Real Job".
    """
    # Extract job details
    job_text = extract_job_details_from_url(job_url)
    if not job_text:
        return "Could not extract job details. Please check the URL or website structure."

    # Preprocess the extracted text using TF-IDF
    job_text_tfidf = vectorizer.transform([job_text])
    
    # Predict using the trained model
    prediction = model.predict(job_text_tfidf)
    
    # Return the prediction result
    return "Fake Job" if prediction[0] else "Real Job"

# Step 5: Test the Function with a Job URL
job_url = input("Enter the job posting link: ")
result = predict_job_from_url(job_url)
print("\nPrediction for the job post:", result)



Prediction for the job post: Real Job
