In [None]:
import pandas as pd

data = pd.read_csv('../2_Feature_Extraction/BEiT/feature_extract_BEiT_features.csv')

print(data.head())

In [None]:
import pandas as pd
import umap.umap_ as umap
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
import warnings

In [None]:
warnings.filterwarnings("ignore", category=FutureWarning)

# Load the dataset
file_path = '../2_Feature_Extraction/BEiT/feature_extract_BEiT_features.csv'
data = pd.read_csv(file_path)

# Extract features and labels
features = data.iloc[:, :-3]  # All columns except the last three (label, class, filename)
labels = data['class']

In [None]:
# Apply UMAP
reducer = umap.UMAP()
umap_embedding = reducer.fit_transform(features)

# Create a DataFrame for the UMAP result
umap_df = pd.DataFrame(umap_embedding, columns=['UMAP1', 'UMAP2'])
umap_df['class'] = labels

# Plot the UMAP representation
plt.figure(figsize=(12, 8))
plt.rcParams['font.family'] = 'Times New Roman'
sns.scatterplot(
    x='UMAP1', 
    y='UMAP2', 
    hue='class', 
    data=umap_df, 
    palette='tab10',  # Changed to 'tab10' for more distinct colors
    s=100,           # Increased scatter point size
    legend='full'
)
plt.title('UMAP Representation of BEiT', fontsize=28, pad=20)
plt.xlabel('UMAP1', fontsize=22)
plt.ylabel('UMAP2', fontsize=22)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

# Add border (spines)
ax = plt.gca()
ax.set_facecolor('white')
for spine in ax.spines.values():
    spine.set_visible(True)
    spine.set_linewidth(1.5)
    spine.set_edgecolor('black')

# Legend with white background
legend = plt.legend(loc='best', fontsize=18, markerscale=2)
legend.get_frame().set_facecolor('white')
legend.get_frame().set_edgecolor('black')
legend.get_frame().set_linewidth(1.0)

# Save
plt.savefig('UMAP_representation_BEiT_v1.png', dpi=1000, bbox_inches='tight', transparent=True)
plt.savefig('UMAP_representation_BEiT_v1.pdf', dpi=1000, bbox_inches='tight', transparent=True)
plt.show()
plt.close()

In [None]:
# Apply t-SNE
tsne = TSNE(n_components=2, perplexity=30, n_iter=1000, random_state=42)
tsne_embedding = tsne.fit_transform(features)

# Create a DataFrame for the t-SNE result
tsne_df = pd.DataFrame(tsne_embedding, columns=['tSNE1', 'tSNE2'])
tsne_df['class'] = labels

# Plot the t-SNE representation
plt.figure(figsize=(12, 8))
plt.rcParams['font.family'] = 'Times New Roman'
sns.scatterplot(
    x='tSNE1', 
    y='tSNE2', 
    hue='class', 
    data=tsne_df, 
    palette='tab10', 
    s=100,
    legend='full'
)
plt.title('t-SNE Representation of BEiT', fontsize=28, pad=20)
plt.xlabel('tSNE1', fontsize=22)
plt.ylabel('tSNE2', fontsize=22)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)


# Add border (spines)
ax = plt.gca()
ax.set_facecolor('white')
for spine in ax.spines.values():
    spine.set_visible(True)
    spine.set_linewidth(1.5)
    spine.set_edgecolor('black')

# Legend with white background
legend = plt.legend(loc='best', fontsize=18, markerscale=2)
legend.get_frame().set_facecolor('white')
legend.get_frame().set_edgecolor('black')
legend.get_frame().set_linewidth(1.0)

# Save
plt.savefig('tSNE_representation_BEiT_v1.png', dpi=1000, bbox_inches='tight', transparent=True)
plt.savefig('tSNE_representation_BEiT_v1.pdf', dpi=1000, bbox_inches='tight', transparent=True)
plt.show()
plt.close()