In [3]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from skimage import io
from pywt import dwt2

def extract_features(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # DCT
    dct_features = cv2.dct(np.float32(gray) / 255.0)
    dct_features = dct_features.flatten()[:100]  # Take the top 100 coefficients
    
    # Wavelet Transform
    coeffs2 = dwt2(gray, 'haar')
    cA, (cH, cV, cD) = coeffs2
    wavelet_features = np.concatenate([cA.flatten(), cH.flatten(), cV.flatten(), cD.flatten()])[:100]
    
    # FFT
    f = np.fft.fft2(gray)
    fshift = np.fft.fftshift(f)
    epsilon = 1e-10  # To avoid log(0)
    magnitude_spectrum = 20 * np.log(np.abs(fshift) + epsilon)
    fft_features = magnitude_spectrum.flatten()[:100]  # Take the top 100 coefficients
    
    # Combine features
    features = np.concatenate([dct_features, wavelet_features, fft_features])
    
    # Check for NaN or infinite values
    if np.any(np.isnan(features)) or np.any(np.isinf(features)):
        return None  # Return None for invalid features
    
    return features

# Load dataset
def load_data(base_path):
    features, labels = [], []
    for label in ['real', 'fake']:
        folder_path = os.path.join(base_path, label)
        for image_name in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image_name)
            image = cv2.imread(image_path)
            feature_vector = extract_features(image)
            if feature_vector is not None:  # Only append valid feature vectors
                features.append(feature_vector)
                labels.append(0 if label == 'real' else 1)  # 0 for real, 1 for fake
    return np.array(features), np.array(labels)

# Prepare data
base_path = r"C:\Desktop\ML_Implementation\data(Final_ML)\train"
X, y = load_data(base_path)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Set up grid search for kNN
param_grid = {'n_neighbors': range(1, 21)}  # Testing k values from 1 to 20
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')  # 5-fold cross-validation

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best k value
best_k = grid_search.best_params_['n_neighbors']
print(f"Best k value: {best_k}")

# Evaluate the best model
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)

# Performance Metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


Best k value: 14
              precision    recall  f1-score   support

           0       0.71      0.60      0.65       573
           1       0.61      0.72      0.66       500

    accuracy                           0.66      1073
   macro avg       0.66      0.66      0.66      1073
weighted avg       0.67      0.66      0.66      1073

[[345 228]
 [138 362]]


In [4]:
class Perceptron:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # Initialize weights and bias
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Training
        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self._activation_function(linear_output)
                # Update rule
                update = self.lr * (y[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self._activation_function(linear_output)
        return y_predicted

    def _activation_function(self, x):
        return np.where(x >= 0, 1, 0)  # Step activation function

# Initialize and train the Perceptron
perceptron = Perceptron(learning_rate=0.01, n_iters=1000)
perceptron.fit(X_train, y_train)

# Make predictions
y_pred = perceptron.predict(X_test)

# Performance Metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.59      0.89      0.71       573
           1       0.69      0.29      0.40       500

    accuracy                           0.61      1073
   macro avg       0.64      0.59      0.56      1073
weighted avg       0.64      0.61      0.57      1073

[[509  64]
 [357 143]]


In [5]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
# Initialize the Decision Tree Classifier
decision_tree = DecisionTreeClassifier(random_state=42)

# Perform cross-validation
cv_scores = cross_val_score(decision_tree, X_train, y_train, cv=5)  # 5-fold cross-validation

# Print the cross-validation scores
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean CV score: {np.mean(cv_scores)}")

# Fit the model on the training data
decision_tree.fit(X_train, y_train)

# Make predictions
y_pred = decision_tree.predict(X_test)

# Performance Metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Cross-validation scores: [0.70779977 0.70314319 0.66200466 0.67715618 0.64568765]
Mean CV score: 0.6791582883550288
              precision    recall  f1-score   support

           0       0.72      0.70      0.71       573
           1       0.67      0.69      0.68       500

    accuracy                           0.69      1073
   macro avg       0.69      0.69      0.69      1073
weighted avg       0.70      0.69      0.69      1073

[[399 174]
 [154 346]]


In [6]:
from sklearn.ensemble import RandomForestClassifier
# Initialize the Random Forest Classifier
random_forest = RandomForestClassifier(random_state=42)

# Set up the parameter grid for cross-validation
param_grid = {
    'n_estimators': [50, 100, 150, 200],  # Different values for the number of estimators
    'max_depth': [None, 10, 20, 30],      # Optional: Adding max depth to the grid
}

# Initialize GridSearchCV for finding the optimal parameters
grid_search = GridSearchCV(estimator=random_forest, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the model using grid search
grid_search.fit(X_train, y_train)

# Best parameters found by GridSearchCV
print(f"Best parameters found: {grid_search.best_params_}")

# Use the best estimator found
best_model = grid_search.best_estimator_

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Performance Metrics
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best parameters found: {'max_depth': 20, 'n_estimators': 200}
              precision    recall  f1-score   support

           0       0.80      0.83      0.81       573
           1       0.80      0.76      0.78       500

    accuracy                           0.80      1073
   macro avg       0.80      0.79      0.79      1073
weighted avg       0.80      0.80      0.80      1073

[[476  97]
 [122 378]]


In [7]:
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
# Initialize XGBoost classifier
xgb_clf = xgb.XGBClassifier()

# Define hyperparameters to tune
param_grid = {
    'max_depth': [3, 4, 5],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 200],
    'colsample_bytree': [0.7, 1.0]
}

# Perform Grid Search with Cross Validation
grid_search = GridSearchCV(
    estimator=xgb_clf,
    param_grid=param_grid,
    cv=5,                  # 5-fold cross-validation
    scoring='accuracy',     # Evaluation metric
    verbose=1,
    n_jobs=-1              # Use all CPU cores
)

# Fit model
grid_search.fit(X, y)

# Print best parameters and best score
print("Best Parameters: ", grid_search.best_params_)
print("Best Cross-Validation Score: ", grid_search.best_score_)
# Use the best model to predict on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Best Parameters:  {'colsample_bytree': 1.0, 'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 200}
Best Cross-Validation Score:  0.8301957129543336

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       573
           1       1.00      1.00      1.00       500

    accuracy                           1.00      1073
   macro avg       1.00      1.00      1.00      1073
weighted avg       1.00      1.00      1.00      1073

[[573   0]
 [  0 500]]


In [None]:
from sklearn.svm import SVC
# Initialize SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)

# Cross-validation
cv_scores = cross_val_score(svm_classifier, X_train, y_train, cv=5)

# Train the model on the full training set
svm_classifier.fit(X_train, y_train)

# Make predictions
y_pred = svm_classifier.predict(X_test)

# Cross-validation results
print("Cross-validation scores (5-fold):", cv_scores)
print("Mean cross-validation score:", np.mean(cv_scores))

# Performance metrics
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

