# This notebook consists of loading extracted HoG features , applying PCA on it and then some models for testing and then storing in .pkl files


### Loading PCA transformed data

In [1]:
# Load the PCA model
import joblib
pca_loaded = joblib.load('pca_model.joblib')

# Load the transformed data
transformed_data_32 = joblib.load('HoG_transformed_data_32.joblib')
transformed_data_64 = joblib.load('HoG_transformed_data_64.joblib')
transformed_data_128 = joblib.load('HoG_transformed_data_128.joblib')

# Now, transformed_data_32, transformed_data_64, and transformed_data_128 can be used as needed
y = joblib.load('labels.joblib')

### Random Forest Model

In [2]:
from sklearn.ensemble import RandomForestClassifier
import joblib
accuracies_RF = {}
import time
# Initialize the Random Forest model
# You can adjust n_estimators and max_features to balance between performance and training time
rf_model = RandomForestClassifier(n_estimators=10, max_depth = 20 , max_features='sqrt', random_state=42, n_jobs=-1)

# Train the Random Forest model on the dataset
# Use transformed_data_32, transformed_data_64, or transformed_data_128 depending on the PCA components you want to use
start_time = time.time()
rf_model.fit(transformed_data_128, y)
end_time = time.time()
elapsed_time =  end_time - start_time
print("Time taken to fit model :" ,elapsed_time)
# Save the trained Random Forest model
joblib.dump(rf_model, 'HoG_random_forest_model_128_features.pkl')
# Print accuracy on the training dataset
accuracies_RF['128'] = rf_model.score(transformed_data_128, y)

Time taken to fit model : 225.38625168800354


In [3]:
print(accuracies_RF['128'])

0.3667346784553767


### XGBoost Model

In [4]:
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib
import time
# Assuming transformed_data_32, transformed_data_64, and transformed_data_128 are your feature sets
# and 'y' is your target variable with class labels

# Encode the class labels in 'y'
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Initialize a dictionary to store accuracies for different feature sets
accuracies_XGB = {}

# Set n_jobs to -1 to use all available cores
xgb_model_128 = xgb.XGBClassifier(n_estimators=20, max_depth=20, random_state=42, n_jobs=-1)
start_time = time.time()
xgb_model_128.fit(transformed_data_128, y_encoded)
end_time = time.time()
elapsed_time =  end_time - start_time
print("Time taken to fit model :" ,elapsed_time)
joblib.dump(xgb_model_128, 'HoG_xgb_model_128_features.pkl')
predictions_128 = xgb_model_128.predict(transformed_data_128)
accuracies_XGB['128'] = accuracy_score(y_encoded, predictions_128)

Time taken to fit model : 554.7408919334412


In [5]:
print(accuracies_XGB['128'])

0.230559963727046


### Logistic Regression Model

In [6]:
from sklearn.linear_model import LogisticRegression
import joblib
import time
accuracies = {}

# Train logistic regression for 128 PCA components
clf_128 = LogisticRegression(solver='saga', penalty='l2', max_iter=1000, tol=0.01, n_jobs=-1)
start_time = time.time()
clf_128.fit(transformed_data_128, y)
end_time = time.time()
elapsed_time =  end_time - start_time
print("Time taken to fit model :" ,elapsed_time)
joblib.dump(clf_128, 'HoG_logistic_regression_128_features.pkl')
accuracies['128'] = clf_128.score(transformed_data_128, y)

Time taken to fit model : 3149.9349443912506


In [7]:
print(accuracies['128'])

0.966976498148568


### LinearSVC Model

In [8]:
from sklearn.svm import LinearSVC
import joblib
import time

accuracies_SVM = {}

# Initialize the LinearSVC
linear_svm_model = LinearSVC(C=1.0, random_state=42, max_iter=1000)

# Start timing
start_time = time.time()

# Train the LinearSVC model on the dataset with 128 PCA components
linear_svm_model.fit(transformed_data_128, y)

# Calculate elapsed time
end_time = time.time()
elapsed_time =  end_time - start_time
print("Time taken to fit model :" ,elapsed_time)

# Save the trained SVM model
joblib.dump(linear_svm_model, 'HoG_linear_svc_128_features.pkl')

# Print accuracy on the training dataset
accuracies_SVM['128'] = linear_svm_model.score(transformed_data_128, y)

# Print the time taken to train the model




Time taken to fit model : 2170.88875412941


In [9]:
print(accuracies_SVM['128'])

0.965313987757878


### KNN Model

In [10]:
from sklearn.neighbors import KNeighborsClassifier
import joblib

accuracies_KNN = {}

# Initialize the KNN model
# You can adjust 'n_neighbors' and other parameters to balance between performance and training time
knn_model = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)

# Train the KNN model on the dataset using 32 PCA components
# You can change to transformed_data_64 or transformed_data_128 as needed
knn_model.fit(transformed_data_128, y)

# Save the trained KNN model
joblib.dump(knn_model, 'HoG_knn_model_128_features.pkl')

# Print accuracy on the training dataset
accuracies_KNN['128'] = knn_model.score(transformed_data_128, y)

In [11]:
print(accuracies_KNN['128'])

0.27189601753192777
