In [None]:
import pandas as pd 
import os 
from skimage.transform import resize 
from skimage.io import imread 
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn import svm 
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from joblib import dump, load
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
Categories=['happy','sad'] 
flat_data_arr=[] #input array 
target_arr=[] #output array 
datadir='train/'
# path which contains all the categories of images
for i in Categories:
    print(f'loading... category : {i}')
    path = os.path.join(datadir, f'{i}_train')  # corrected directory path
    for img in os.listdir(path):
        img_array = imread(os.path.join(path, img))
        img_resized = resize(img_array, (48, 48, 3))
        flat_data_arr.append(img_resized.flatten())
        target_arr.append(Categories.index(i))
    print(f'loaded category:{i} successfully')

flat_data = np.array(flat_data_arr)
target = np.array(target_arr)


In [None]:
#dataframe 
df=pd.DataFrame(flat_data) 
df['Target']=target 
df.shape


In [None]:
#input data 
x=df.iloc[:,:-1] 
#output data 
y=df.iloc[:,-1]


In [None]:
# Splitting the data into training and testing sets 
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20, 
											random_state=123, 
											stratify=y) 


In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_dist = {'C': [0.1, 1, 10, 100],
              'gamma': [0.0001, 0.001, 0.1, 1],
              'kernel': ['rbf', 'poly']}

model = svm.SVC(probability=True)
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=5, cv=5, random_state=123)
random_search.fit(x_train, y_train)
print("Best Parameters: ", random_search.best_params_)

# Access the best model from the randomized search
best_model_randomized = random_search.best_estimator_

In [None]:
# Ensure the directory exists
os.makedirs('svm', exist_ok=True)

# Save the model to a file in the svm directory
dump(best_model_randomized, 'svm/model.svm_model') 


In [None]:
# Later, you can load the model from the file
model = load('svm/model.svm_model') 

In [None]:
# Use the best model for predictions
y_pred = model.predict(x_test)

# Evaluate accuracy on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f'Test Accuracy: {accuracy}')

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Assuming 'model' is your SVM model
train_accuracy_history = []
test_accuracy_history = []

# Train the model for different subsets of the training data
for subset_size in range(10, len(x_train), 10):
    # Use a subset of the training data
    subset_x_train = x_train[:subset_size]
    subset_y_train = y_train[:subset_size]

    # Train the model on the subset
    model.fit(subset_x_train, subset_y_train)

    # Evaluate on the training set
    y_train_pred = model.predict(x_train)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_accuracy_history.append(train_accuracy)

    # Evaluate on the testing set
    y_test_pred = model.predict(x_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_accuracy_history.append(test_accuracy)

# Plot the learning curve
plt.figure(figsize=(10, 6))
plt.plot(range(10, len(x_train), 10), train_accuracy_history, label='Training Accuracy')
plt.plot(range(10, len(x_train), 10), test_accuracy_history, label='Testing Accuracy')
plt.xlabel('Training Set Size')
plt.ylabel('Accuracy')
plt.title('Learning Curve')
plt.legend()
plt.show()

print (len(x_train))
print (len(x_test))


In [None]:


def predict_on_test_dataset(model, Categories, test_dir='test/'):
    true_labels = []
    predicted_labels = []

    for category in Categories:
        category_path = os.path.join(test_dir, f'{category}_test')
        for img_name in os.listdir(category_path):
            img_path = os.path.join(category_path, img_name)
            
            # Read the image
            img = imread(img_path)
            
            # Display the image
            plt.imshow(img)
            plt.show()
            
            # Resize the image
            img_resize = resize(img, (48, 48, 3))
            
            # Flatten the image data
            img_flat = img_resize.flatten()
            
            # Make a prediction
            l = [img_flat]
            prediction = model.predict(l)[0]
            probability = model.predict_proba(l)
            
            # Display the prediction probabilities
            for ind, val in enumerate(Categories):
                print(f'{val} = {probability[0][ind] * 100}%')
            
            # Append true and predicted labels
            true_labels.append(Categories.index(category))
            predicted_labels.append(prediction)

            # Display the predicted image category
            print("The predicted image is: " + Categories[model.predict(l)[0]])

    from sklearn.metrics import confusion_matrix, accuracy_score    
    # Calculate confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)

    # Calculate accuracy
    accuracy = accuracy_score(true_labels, predicted_labels)

    # Display confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', xticklabels=Categories, yticklabels=Categories)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()

    # Display classification report
    report = classification_report(true_labels, predicted_labels, target_names=Categories)
    print('Classification Report:\n', report)

    print(f'Accuracy: {accuracy * 100:.2f}%')
                
# Call the function with your model and Categories
predict_on_test_dataset(model, Categories)
