In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Load features and labels
vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Confirm shapes
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate VGG and manual features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined features shape:", combined_features.shape)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    combined_features, labels, test_size=0.2, random_state=42, stratify=labels
)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict
y_pred = rf.predict(X_test)

# Evaluate
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("RF Accuracy:", accuracy_score(y_test, y_pred))


VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined features shape: (7000, 25106)

Classification Report:
              precision    recall  f1-score   support

           0       0.47      0.56      0.51       200
           1       0.70      0.85      0.77       200
           2       0.38      0.33      0.35       200
           3       0.67      0.78      0.72       200
           4       0.80      0.66      0.72       200
           5       0.41      0.47      0.44       200
           6       0.51      0.33      0.40       200

    accuracy                           0.56      1400
   macro avg       0.56      0.57      0.56      1400
weighted avg       0.56      0.56      0.56      1400

RF Accuracy: 0.565


In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer

vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Check dimensions
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined feature shape:", combined_features.shape)

# Handle missing values using imputation
imputer = SimpleImputer(strategy='mean')
combined_features_imputed = imputer.fit_transform(combined_features)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(combined_features_imputed, labels, test_size=0.2, random_state=42)

# SVM classifier
model = SVC(kernel='rbf', random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluation
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("SVM Accuracy:", accuracy_score(y_test, y_pred))

VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined feature shape: (7000, 25106)

Classification Report:
              precision    recall  f1-score   support

           0       0.47      0.57      0.52       203
           1       0.85      0.80      0.82       224
           2       0.39      0.38      0.38       178
           3       0.73      0.71      0.72       215
           4       0.72      0.81      0.77       183
           5       0.46      0.46      0.46       211
           6       0.40      0.32      0.35       186

    accuracy                           0.59      1400
   macro avg       0.58      0.58      0.58      1400
weighted avg       0.58      0.59      0.58      1400

SVM Accuracy: 0.5864285714285714


In [12]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report

# Load features and labels
vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Check dimensions
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined feature shape:", combined_features.shape)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
combined_features = imputer.fit_transform(combined_features)

# Scale features
scaler = StandardScaler()
combined_features = scaler.fit_transform(combined_features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Train model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
# Evaluation
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Logostic Regression Accuracy:", accuracy_score(y_test, y_pred))

VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined feature shape: (7000, 25106)

Classification Report:
              precision    recall  f1-score   support

           0       0.58      0.51      0.54       200
           1       0.83      0.90      0.86       200
           2       0.46      0.41      0.43       200
           3       0.77      0.78      0.77       200
           4       0.78      0.80      0.79       200
           5       0.51      0.57      0.54       200
           6       0.52      0.51      0.52       200

    accuracy                           0.64      1400
   macro avg       0.63      0.64      0.64      1400
weighted avg       0.63      0.64      0.64      1400

Logostic Regression Accuracy: 0.6385714285714286


In [14]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, accuracy_score

# Load features and labels
vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Check dimensions
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined feature shape:", combined_features.shape)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
combined_features = imputer.fit_transform(combined_features)

# Scale features
scaler = StandardScaler()
combined_features = scaler.fit_transform(combined_features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Train Decision Tree
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict and Evaluate
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred))


VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined feature shape: (7000, 25106)

Classification Report:
              precision    recall  f1-score   support

           0       0.43      0.43      0.43       200
           1       0.61      0.63      0.62       200
           2       0.34      0.32      0.32       200
           3       0.57      0.56      0.56       200
           4       0.61      0.60      0.61       200
           5       0.34      0.29      0.31       200
           6       0.29      0.34      0.31       200

    accuracy                           0.45      1400
   macro avg       0.45      0.45      0.45      1400
weighted avg       0.45      0.45      0.45      1400

Decision Tree Accuracy: 0.45285714285714285


In [16]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, accuracy_score

# Load features and labels
vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Check dimensions
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined feature shape:", combined_features.shape)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
combined_features = imputer.fit_transform(combined_features)

# Scale features
scaler = StandardScaler()
combined_features = scaler.fit_transform(combined_features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Train KNN
model = KNeighborsClassifier()
model.fit(X_train, y_train)

# Predict and Evaluate
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("KNN Accuracy:", accuracy_score(y_test, y_pred))


VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined feature shape: (7000, 25106)


found 0 physical cores < 1
  File "D:\Anaconda\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")



Classification Report:
              precision    recall  f1-score   support

           0       0.26      0.27      0.26       200
           1       0.66      0.44      0.53       200
           2       0.31      0.20      0.24       200
           3       0.70      0.12      0.20       200
           4       0.32      0.83      0.46       200
           5       0.33      0.12      0.17       200
           6       0.23      0.35      0.28       200

    accuracy                           0.33      1400
   macro avg       0.40      0.33      0.31      1400
weighted avg       0.40      0.33      0.31      1400

KNN Accuracy: 0.3314285714285714


In [18]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, accuracy_score

# Load features and labels
vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Check dimensions
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined feature shape:", combined_features.shape)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
combined_features = imputer.fit_transform(combined_features)

# Scale features
scaler = StandardScaler()
combined_features = scaler.fit_transform(combined_features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.2, random_state=42, stratify=labels)

# Train Naive Bayes
model = GaussianNB()
model.fit(X_train, y_train)

# Predict and Evaluate
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred))


VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined feature shape: (7000, 25106)

Classification Report:
              precision    recall  f1-score   support

           0       0.36      0.35      0.35       200
           1       0.49      0.45      0.47       200
           2       0.31      0.32      0.31       200
           3       0.40      0.30      0.35       200
           4       0.59      0.28      0.38       200
           5       0.32      0.49      0.39       200
           6       0.22      0.29      0.25       200

    accuracy                           0.36      1400
   macro avg       0.38      0.36      0.36      1400
weighted avg       0.38      0.36      0.36      1400

Naive Bayes Accuracy: 0.3557142857142857


In [21]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
from xgboost import XGBClassifier

# Load features and labels
vgg_features = np.load("featuresets/vgg_features.npy")
manual_features = np.load("featuresets/manual_features.npy")
labels = np.load("featuresets/vgg_labels.npy")

# Check dimensions
print("VGG features:", vgg_features.shape)
print("Manual features:", manual_features.shape)
print("Labels:", labels.shape)

# Concatenate features
combined_features = np.hstack((vgg_features, manual_features))
print("Combined feature shape:", combined_features.shape)

# Handle missing values
imputer = SimpleImputer(strategy='mean')
combined_features = imputer.fit_transform(combined_features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.2, random_state=42)

# Train XGBoost classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))


VGG features: (7000, 25088)
Manual features: (7000, 18)
Labels: (7000,)
Combined feature shape: (7000, 25106)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


XGBoostError: [23:28:12] C:\actions-runner\_work\xgboost\xgboost\src\common\io.h:389: bad_malloc: Failed to allocate 4709570688 bytes.