In [3]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from skimage import io
from pywt import dwt2
from sklearn.preprocessing import StandardScaler

def get_center_coefficients(matrix, block_size=10):
    """
    Extracts a central square block from the matrix.

    Parameters:
    - matrix (np.ndarray): The input 2D matrix.
    - block_size (int): The size of the central block (block_size x block_size).

    Returns:
    - np.ndarray: Flattened central coefficients.
    """
    center_x, center_y = matrix.shape[0] // 2, matrix.shape[1] // 2
    half_size = block_size // 2
    
    # Handle even and odd block sizes
    if block_size % 2 == 0:
        start_x = center_x - half_size
        start_y = center_y - half_size
    else:
        start_x = center_x - half_size
        start_y = center_y - half_size
    
    # Ensure indices are within matrix bounds
    start_x = max(start_x, 0)
    start_y = max(start_y, 0)
    end_x = start_x + block_size
    end_y = start_y + block_size
    
    # Extract the central block
    central_block = matrix[start_x:end_x, start_y:end_y]
    
    # If the extracted block is smaller than desired (due to matrix edges), pad with zeros
    if central_block.shape[0] != block_size or central_block.shape[1] != block_size:
        central_block = np.pad(central_block, 
                                ((0, max(block_size - central_block.shape[0], 0)),
                                 (0, max(block_size - central_block.shape[1], 0))),
                                mode='constant', constant_values=0)
    
    return central_block.flatten()

def extract_features(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # DCT
    dct_matrix = cv2.dct(np.float32(gray) / 255.0)
    dct_features = get_center_coefficients(dct_matrix, block_size=10)  # 10x10 block -> 100 coefficients
    
    # Wavelet Transform
    coeffs2 = dwt2(gray, 'haar')
    cA, (cH, cV, cD) = coeffs2
    wavelet_features = np.concatenate([cA.flatten(), cH.flatten(), cV.flatten(), cD.flatten()])[:100]
        
    # FFT
    fft_matrix = np.fft.fftshift(np.fft.fft2(gray))
    magnitude_spectrum = 20 * np.log(np.abs(fft_matrix) + 1e-10)  # To avoid log(0)
    fft_features = get_center_coefficients(magnitude_spectrum, block_size=10)  # 10x10 block -> 100 coefficients
    
    # Combine features
    features = np.concatenate([dct_features, wavelet_features, fft_features])
    
    # Check for NaN or infinite values
    if np.any(np.isnan(features)) or np.any(np.isinf(features)):
        return None  # Return None for invalid features
    
    return features

# Load dataset
def load_data(base_path):
    features, labels = [], []
    for label in ['real', 'fake']:
        folder_path = os.path.join(base_path, label)
        for image_name in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image_name)
            image = cv2.imread(image_path)
            
            # Ensure the image was read correctly
            if image is None:
                print(f"Warning: Unable to read image {image_path}. Skipping.")
                continue
            
            feature_vector = extract_features(image)
            if feature_vector is not None:  # Only append valid feature vectors
                features.append(feature_vector)
                labels.append(0 if label == 'real' else 1)  # 0 for real, 1 for fake
            else:
                print(f"Warning: Invalid features for image {image_path}. Skipping.")
    return np.array(features), np.array(labels)

# Prepare data
base_path = r"C:\Desktop\ML_Implementation\data(Final_ML)\train"
X, y = load_data(base_path)

print(f"Total samples: {X.shape[0]}")
print(f"Feature vector size: {X.shape[1]}")

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Example: Train a K-Nearest Neighbors classifier with GridSearchCV
param_grid = {'n_neighbors': list(range(1, 31))}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best estimator
best_knn = grid_search.best_estimator_
print(f"Best KNN Parameters: {grid_search.best_params_}")

# Predictions
y_pred = best_knn.predict(X_test)

# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Total samples: 5365
Feature vector size: 300
Best KNN Parameters: {'n_neighbors': 9}
Confusion Matrix:
[[486  86]
 [337 164]]

Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.85      0.70       572
           1       0.66      0.33      0.44       501

    accuracy                           0.61      1073
   macro avg       0.62      0.59      0.57      1073
weighted avg       0.62      0.61      0.58      1073



In [4]:
#perceptron Implementation
class Perceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize weights and bias
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Training
        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self._activation_function(linear_output)
                # Update rule
                update = self.lr * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self._activation_function(linear_output)
        return y_predicted

    def _activation_function(self, x):
        return np.where(x >= 0, 1, 0)  # Step activation function

# Initialize and train the Perceptron
perceptron = Perceptron(learning_rate=0.01, n_iters=1000)
perceptron.fit(X_train, y_train)

# Make predictions
y_pred = perceptron.predict(X_test)

# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[394 178]
 [264 237]]

Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.69      0.64       572
           1       0.57      0.47      0.52       501

    accuracy                           0.59      1073
   macro avg       0.58      0.58      0.58      1073
weighted avg       0.59      0.59      0.58      1073



In [5]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
# Initialize the Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(decision_tree, X_train, y_train, cv=5)  # 5-fold cross-validation

# Print the cross-validation scores
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV score: {np.mean(cv_scores)}")

# Fit the model on the training data
decision_tree.fit(X_train, y_train)

# Make predictions
y_pred = decision_tree.predict(X_test)

# Performance Metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.64144354 0.63213038 0.63053613 0.63636364 0.6025641 ]
Mean CV score: 0.6286075585260684
              precision    recall  f1-score   support

           0       0.65      0.69      0.67       572
           1       0.62      0.58      0.60       501

    accuracy                           0.64      1073
   macro avg       0.63      0.63      0.63      1073
weighted avg       0.64      0.64      0.64      1073

[[394 178]
 [212 289]]


In [6]:
from sklearn.ensemble import RandomForestClassifier
# Initialize the Random Forest Classifier with chosen parameters
random_forest = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42)

# Fit the model to the training data
random_forest.fit(X_train, y_train)

# Make predictions on the test set
y_pred = random_forest.predict(X_test)


# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[452 120]
 [133 368]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.79      0.78       572
           1       0.75      0.73      0.74       501

    accuracy                           0.76      1073
   macro avg       0.76      0.76      0.76      1073
weighted avg       0.76      0.76      0.76      1073



In [7]:
from catboost import CatBoostClassifier
# Initialize the CatBoost Classifier
catboost_model = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.1, random_seed=42, verbose=0)

# Fit the model to the training data
catboost_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = catboost_model.predict(X_test)


# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[467 105]
 [141 360]]

Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.82      0.79       572
           1       0.77      0.72      0.75       501

    accuracy                           0.77      1073
   macro avg       0.77      0.77      0.77      1073
weighted avg       0.77      0.77      0.77      1073



In [8]:
from sklearn.ensemble import AdaBoostClassifier
# Initialize the base estimator for AdaBoost (e.g., a DecisionTree)
base_estimator = DecisionTreeClassifier(max_depth=1)

# Initialize the AdaBoost Classifier
adaboost_model = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=100, learning_rate=1.0, random_state=42)

# Fit the model to the training data
adaboost_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = adaboost_model.predict(X_test)


# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Confusion Matrix:
[[453 119]
 [167 334]]

Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.79      0.76       572
           1       0.74      0.67      0.70       501

    accuracy                           0.73      1073
   macro avg       0.73      0.73      0.73      1073
weighted avg       0.73      0.73      0.73      1073



In [9]:
from xgboost import XGBClassifier
# Initialize the XGBoost Classifier
xgb_model = XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)

# Fit the model to the training data
xgb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = xgb_model.predict(X_test)


# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[468 104]
 [147 354]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.82      0.79       572
           1       0.77      0.71      0.74       501

    accuracy                           0.77      1073
   macro avg       0.77      0.76      0.76      1073
weighted avg       0.77      0.77      0.77      1073



In [10]:
from sklearn.naive_bayes import GaussianNB
# Initialize the Naive Bayes Classifier
naive_bayes_model = GaussianNB()

# Fit the model to the training data
naive_bayes_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = naive_bayes_model.predict(X_test)


# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[369 203]
 [166 335]]

Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.65      0.67       572
           1       0.62      0.67      0.64       501

    accuracy                           0.66      1073
   macro avg       0.66      0.66      0.66      1073
weighted avg       0.66      0.66      0.66      1073



In [11]:
from sklearn.svm import SVC
# Initialize SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)
# Train the model on the full training set
svm_classifier.fit(X_train, y_train)

# Make predictions
y_pred = svm_classifier.predict(X_test)


# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[414 158]
 [231 270]]

Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.72      0.68       572
           1       0.63      0.54      0.58       501

    accuracy                           0.64      1073
   macro avg       0.64      0.63      0.63      1073
weighted avg       0.64      0.64      0.63      1073

