In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
import numpy as np

# Load VGG16 pre-trained on ImageNet without the top classification layer
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

# Freeze the layers of the base_model
for layer in base_model.layers:
    layer.trainable = False

# Add GlobalAveragePooling2D layer to the model
gap_layer = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
model = Model(inputs=base_model.input, outputs=gap_layer)

# Prepare your data generators
datagen = ImageDataGenerator(rescale=1./255)
generator = datagen.flow_from_directory(
    'E:\\College\\SEMESTER 4\\MACHINE LEARNING\\Archive',  # Update this path
    target_size=(150, 150),
    batch_size=32,
    class_mode=None,  # No labels needed
    shuffle=False)

# Use model to predict and extract features
features = model.predict(generator, steps=np.ceil(generator.samples/generator.batch_size))

# Get labels associated with each feature
labels = generator.classes

# Save the extracted features and labels
np.save('extracted_features.npy', features)
np.save('labels.npy', labels)

# Example to load the extracted features and labels
loaded_features = np.load('extracted_features.npy')
loaded_labels = np.load('labels.npy')

# You can now use `loaded_features` and `loaded_labels` for further analysis or machine learning tasks.


In [None]:
import numpy as np

# Load the extracted features from the .npy file
loaded_features = np.load('extracted_features.npy')

# Now, `loaded_features` is a NumPy array containing the data from the file
print("Shape of loaded features:", loaded_features.shape)


In [None]:
import numpy as np
from sklearn.decomposition import PCA

# Load the extracted features and labels
loaded_features = np.load('extracted_features.npy')
loaded_labels = np.load('labels.npy')

# Perform PCA on the loaded features to reduce to 5 features
pca = PCA(n_components=5)
reduced_features = pca.fit_transform(loaded_features)

# Manually defined mapping from class indices to class names
class_indices_to_names = {
    0: 'Academic Art',
    1: 'Art Nouveau',
    2: 'Baroque',
    3: 'Expressionism',
    4: 'Japanese Art',
    5: 'Neoclassism',
    6: 'Primitivism',
    7: 'Realism',
    8: 'Renaissance',
    9: 'Rococo',
    10: 'Romanticism',
    11: 'Symbolism',
    12: 'Western Medieval'
}

# Initialize a dictionary to hold reduced features by class names
reduced_features_by_class = {name: [] for name in class_indices_to_names.values()}

# Populate the dictionary with reduced features
for feature, label in zip(reduced_features, loaded_labels):
    class_name = class_indices_to_names[label]
    reduced_features_by_class[class_name].append(feature)

# Print the reduced feature vectors organized by class names
for class_name, features in reduced_features_by_class.items():
    print(f"Class: {class_name}, Number of Feature Vectors: {len(features)}")
    for feature_vector in features:
        print(feature_vector)
    print("\n")  # Adds a newline for readability between classes


In [None]:
import numpy as np
from numpy.linalg import norm

# Initialize dictionaries to hold centroids and spreads for each class
class_centroids = {}
class_spreads = {}

for class_name, features in reduced_features_by_class.items():
    # Convert list of features to numpy array for easier calculations
    features_np = np.array(features)
    # Calculate the centroid (mean vector) for each class
    class_centroids[class_name] = np.mean(features_np, axis=0)
    # Calculate the spread (standard deviation) for each class
    class_spreads[class_name] = np.std(features_np, axis=0)

# Print the centroids and spreads for each class
for class_name in class_centroids:
    print(f"Class: {class_name}")
    print(f"  Centroid (Mean): {class_centroids[class_name]}")
    print(f"  Spread (Standard Deviation): {class_spreads[class_name]}\n")
# Calculate the distance between centroids of two classes
distance = norm(class_centroids['Academic Art'] - class_centroids['Art Nouveau'])
print(f"Distance between centroids of 'Academic Art' and 'Art Nouveau': {distance}")


In [None]:
import matplotlib.pyplot as plt

# Further reduce the PCA-reduced features to 2 dimensions for visualization
pca_2d = PCA(n_components=2)
reduced_features_2d = pca_2d.fit_transform(reduced_features)

# Calculate 2D centroids for visualization
centroids_2d = {}
for class_name, features in reduced_features_by_class.items():
    features_2d = pca_2d.transform(np.array(features))  # Transform to 2D
    centroids_2d[class_name] = np.mean(features_2d, axis=0)

# Plot the 2D centroids of 'Academic Art' and 'Art Nouveau'
plt.figure(figsize=(8, 6))
for class_name, centroid in centroids_2d.items():
    plt.scatter(centroid[0], centroid[1], label=class_name)

# Draw a line between the centroids of 'Academic Art' and 'Art Nouveau'
plt.plot([centroids_2d['Academic Art'][0], centroids_2d['Art Nouveau'][0]],
         [centroids_2d['Academic Art'][1], centroids_2d['Art Nouveau'][1]],
         'k--', linewidth=1)

plt.xlabel('PCA Feature 1')
plt.ylabel('PCA Feature 2')
plt.title('Centroids of Classes in 2D PCA-Reduced Feature Space')
plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming reduced_features contains the PCA-reduced features of your dataset
# Select the first feature for all samples
selected_feature = reduced_features[:, 0]

# Calculate the histogram data
hist_data, bin_edges = np.histogram(selected_feature, bins='auto')

# Calculate mean and variance
mean = np.mean(selected_feature)
variance = np.var(selected_feature)

# Plot the histogram
plt.figure(figsize=(10, 6))
plt.hist(selected_feature, bins='auto', alpha=0.7, color='blue', edgecolor='black')
plt.title('Histogram of the Selected Feature')
plt.xlabel('Feature Value')
plt.ylabel('Frequency')

# Annotate mean and variance on the plot
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1)
plt.text(mean, plt.ylim()[1]*0.9, f'Mean: {mean:.2f}', color = 'red')
plt.axvline(mean - np.sqrt(variance), color='g', linestyle='dashed', linewidth=1)
plt.axvline(mean + np.sqrt(variance), color='g', linestyle='dashed', linewidth=1)
plt.text(mean + np.sqrt(variance), plt.ylim()[1]*0.9, f'Std Dev: {np.sqrt(variance):.2f}', color = 'green')

plt.show()

# Print the mean and variance
print(f"Mean of the selected feature: {mean}")
print(f"Variance of the selected feature: {variance}")


In [None]:
import numpy as np
from scipy.spatial.distance import minkowski
import matplotlib.pyplot as plt

# Assuming reduced_features contains the PCA-reduced features of your dataset
# Let's take two feature vectors, for example, the first and second feature vectors in the dataset
feature_vector_1 = reduced_features[0]
feature_vector_2 = reduced_features[1]

# Calculate the Minkowski distance for r values from 1 to 10
r_values = range(1, 11)
distances = [minkowski(feature_vector_1, feature_vector_2, r) for r in r_values]

# Plot the Minkowski distance as a function of r
plt.figure(figsize=(10, 6))
plt.plot(r_values, distances, marker='o', linestyle='-', color='b')
plt.title('Minkowski Distance Between Two Feature Vectors')
plt.xlabel('r value')
plt.ylabel('Minkowski Distance')
plt.xticks(r_values)
plt.grid(True)
plt.show()


In [None]:
from sklearn.model_selection import train_test_split

# Define the indices for the selected classes
selected_class_indices = [0, 1]  # Example: 0 for 'Academic Art', 1 for 'Art Nouveau'

# Filter features and labels for the selected classes
selected_features = loaded_features[np.isin(loaded_labels, selected_class_indices)]
selected_labels = loaded_labels[np.isin(loaded_labels, selected_class_indices)]

# Convert labels to a binary format for simplicity (0 and 1)
selected_labels = np.array([0 if label == selected_class_indices[0] else 1 for label in selected_labels])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(selected_features, selected_labels, test_size=0.3, random_state=42)

print("Training set size:", X_train.shape)
print("Testing set size:", X_test.shape) 


In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# Initialize the kNN classifier with k=3
neigh = KNeighborsClassifier(n_neighbors=3)

# Train the classifier using the training set
neigh.fit(X_train, y_train)


In [None]:
accuracy = neigh.score(X_test, y_test)
print(accuracy)


In [None]:
# Assuming class_indices_to_names maps numerical labels to class names
predicted_class_name = class_indices_to_names[predicted_class[0]]
print(predicted_class_name)


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Initialize lists to store accuracies for different values of k
k_values = range(1, 12)
accuracies = []

for k in k_values:
    # Initialize the kNN classifier with the current value of k
    neigh = KNeighborsClassifier(n_neighbors=k)
    # Train the classifier on the training set
    neigh.fit(X_train, y_train)
    # Predict the class labels for the test set
    y_pred = neigh.predict(X_test)
    # Calculate the accuracy of the predictions
    accuracy = accuracy_score(y_test, y_pred)
    # Store the accuracy in the list
    accuracies.append(accuracy)

# Plot the accuracy for different values of k
plt.figure(figsize=(10, 6))
plt.plot(k_values, accuracies, marker='o', linestyle='-', color='b')
plt.title('k-NN Classifier Accuracy')
plt.xlabel('k')
plt.ylabel('Accuracy')
plt.xticks(k_values)
plt.grid(True)
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

# Assuming neigh is your trained kNN classifier with the chosen k value
# Predict the class labels for the test set
y_pred_test = neigh.predict(X_test)
y_pred_train = neigh.predict(X_train)

# Evaluate confusion matrix for test data
conf_matrix_test = confusion_matrix(y_test, y_pred_test)
print("Confusion Matrix (Test Data):")
print(conf_matrix_test)

# Evaluate confusion matrix for training data
conf_matrix_train = confusion_matrix(y_train, y_pred_train)
print("\nConfusion Matrix (Training Data):")
print(conf_matrix_train)

# Calculate precision, recall, and F1-score for both training and test data
print("\nClassification Report (Test Data):")
print(classification_report(y_test, y_pred_test))

print("Classification Report (Training Data):")
print(classification_report(y_train, y_pred_train))
