Implementation of linear kernel SVM with these key parameters:
kernel='linear':
Uses a linear decision boundary
C=1.0: Regularization parameter (balance between margin width and classification errors)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
df = pd.read_csv('cleaned_dataset (1).csv')

# Select relevant features - focusing on numerical and clear scam indicators
features = [
       'telecommuting', 'has_company_logo', 'has_questions',
    'has_links_without_https', 'Identity_details_asked', 'payment_asked',
    'average_days_between_interview_and_job_offer', 'no_of_interview_rounds',
    'avg_no_of_time_sensitive_details', 'avg_no_of_baity_phrases'

]

# Target variable
target = 'is_scam'

# Handle missing values and convert 'No' to 0
for feature in features:
    # Convert string 'No' to 0 (assuming it represents False/No)
    if df[feature].dtype == 'object':
        df[feature] = df[feature].replace('No', 0)
        # Convert the column to numeric (in case there are other non-numeric values)
        df[feature] = pd.to_numeric(df[feature], errors='coerce')

    # Fill any remaining NaN values with 0 (or another appropriate value)
    df[feature] = df[feature].fillna(0)

# Prepare features and target
X = df[features]
y = df[target]

# Check data types to ensure all are numeric
print(X.dtypes)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the SVM model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)

# Evaluate the model
print("\nModel Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

telecommuting                                     int64
has_company_logo                                  int64
has_questions                                     int64
has_links_without_https                         float64
Identity_details_asked                          float64
payment_asked                                   float64
average_days_between_interview_and_job_offer    float64
no_of_interview_rounds                          float64
avg_no_of_time_sensitive_details                float64
avg_no_of_baity_phrases                         float64
dtype: object

Model Evaluation:
Accuracy: 0.9650447427293065

Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      3335
           1       0.93      0.52      0.67       241

    accuracy                           0.97      3576
   macro avg       0.95      0.76      0.82      3576
weighted avg       0.96      0.97      0.96      3576



In [None]:
import joblib
import os

# Create directory if it doesn't exist
os.makedirs('backend/model', exist_ok=True)

# Now save your files
joblib.dump(svm_model, 'backend/model/svm_scam_detector.joblib')
joblib.dump(scaler, 'backend/model/scaler.joblib')

print("Files saved successfully in backend/model/")

Files saved successfully in backend/model/


In [None]:
# Run this in your notebook to confirm the files exist
import os
print("Files in backend/model:", os.listdir('backend/model'))

Files in backend/model: ['svm_scam_detector.joblib', 'scaler.joblib']
