<a href="https://colab.research.google.com/github/potat10125/Comp6271/blob/main/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from PIL import Image
from torchvision import transforms
from sklearn.semi_supervised import SelfTrainingClassifier

# Define paths to your data
data_folder = 'content'
data_paths = {
	'aquarium': 'data/train/aquarium',
	'hospital': 'data/train/hospital',
	'beach': 'data/train/beach',
	'restaurant': 'data/train/restaurant',
	'volcano': 'data/train/volcano'
}

# Define transformations
transform = transforms.Compose([
	# Resize image to 64x64
	transforms.Resize((64, 64)),
	# Convert image to tensor
	transforms.ToTensor(),
	# Mean and std calculated from stats.py
	transforms.Normalize(mean=[0.43092114, 0.43950402, 0.43928512], std=[0.21397243, 0.21081277, 0.22242009])
])

# Load your data
data = []
labels = []
for label, path in enumerate(data_paths):
	for filename in os.listdir(data_paths[path]):
		# Skip ds_store files
		if filename.startswith('.'):
			continue
		img = Image.open(os.path.join(data_paths[path], filename))
		img = transform(img)
		data.append(img.numpy().flatten())
		labels.append(label)

# Convert labels to numpy array
labels = np.array(labels)

# Set some labels to -1 (unlabeled)
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(labels)) < 0.5
labels[random_unlabeled_points] = -1

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Define the DT model. Tune the hyperparameters to adjust model accuracy.
tree_model = DecisionTreeClassifier(max_depth=5, min_samples_split=10, min_samples_leaf=5)

# Define the self-training classifier
self_training_clf = SelfTrainingClassifier(tree_model)

# Train the classifier
self_training_clf.fit(X_train, y_train)

# Predict on test data
y_pred = self_training_clf.predict(X_test)

# Print accuracy
print("Semi-supervised model accuracy:", metrics.accuracy_score(y_test, y_pred))


In [1]:
pwd

'/content'