In [1]:
import re  # For regular expressions
import pandas as pd  # For handling dataframes
import numpy as np  # For numerical operations
import pickle  # For saving/loading models

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import VotingClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


In [None]:
# Import Standard Libraries
import re  # Regular expressions for text cleaning
import joblib  # To load trained models
import numpy as np  # For handling numerical data (CDR input)
import nltk  # Natural Language Toolkit for text processing
from nltk.corpus import stopwords  # Stopwords for text cleaning
from sklearn.feature_extraction.text import TfidfVectorizer  # TF-IDF for text vectorization

# Download stopwords (only needed once)
nltk.download('stopwords')

# Load stopwords
stop_words = set(stopwords.words('english'))

# Define function to clean text
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W+', ' ', text)  # Remove special characters
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

# File paths
cdr_model_path = 'cdr_fraud_model.pkl'
spam_model_path = 'spam_text_model.pkl'
vectorizer_path = 'tfidf_vectorizer.pkl'

# Load the trained CDR fraud detection model
try:
    cdr_model = joblib.load(cdr_model_path)
except FileNotFoundError:
    print(f'Error: {cdr_model_path} file not found.')
    exit(1)

# Load the trained vectorizer
try:
    vectorizer = joblib.load(vectorizer_path)
except FileNotFoundError:
    print(f'Error: {vectorizer_path} file not found.')
    exit(1)

# Function to predict fraud call based on CDR data
def predict_cdr_fraud(input_data):
    input_data = np.array(input_data).reshape(1, -1)  # Ensure input has the correct number of features
    return cdr_model.predict(input_data)[0]

# Load the trained spam detection model
try:
    spam_model = joblib.load(spam_model_path)
except FileNotFoundError:
    print(f'Error: {spam_model_path} file not found.')
    exit(1)

# Function to predict spam call based on text data
def predict_spam_text(input_text):
    text_vectorized = vectorizer.transform([clean_text(input_text)]).toarray()
    return spam_model.predict(text_vectorized)[0]

# Function to combine CDR fraud and spam text detection
def predict_call_fraud(cdr_input, call_text):
    cdr_prediction = predict_cdr_fraud(cdr_input)
    text_prediction = predict_spam_text(call_text)
    if cdr_prediction == 1 or text_prediction == 1:
        return 'Fraud/Spam Call Detected'
    else:
        return 'Safe Call'

# Example input with correct number of features
sample_cdr = np.array([[10, 5, 15, 8, 20, 10, 5, 2, 0]])  # Make sure it has 9 features
sample_text = 'Hello, I am from your bank. Please provide your OTP.'

# Run the model
print(predict_call_fraud(sample_cdr, sample_text))
