## Abstract: This notebook is used for the Final capstone project


In [129]:
import os
import io
import shutil

import pandas as pd
import numpy as np
from scipy.stats import mode

from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler


import base64

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from PIL import Image

import seaborn as sns


import time
import warnings


from skimage import feature

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score



# Utlity functios


In [130]:
class FileWriter:
    def __init__(self, filename):
        self.filename = filename

    def __enter__(self):
        self.file = open(self.filename, 'a')
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.file.close()

    def append_to_file(self, text):
        self.file.write(text + '\n')

    # def Creat_file(self,text)
    #      with open(output_file, "w") as outfile:

In [131]:
def Write_Readme_file():
    
    # names of the files to be concatenated
    file1 = "README_Org.md"
    file2 = "output.md"
    file3 = "Recomdation.md"

    # name of the output file
    output_file = "README.md"

    # open the output file in write mode
    with open(output_file, "w") as outfile:
        # open the first file in read mode
        with open(file1, "r") as infile:
            # write its contents to the output file
            content = infile.read()
            # print(f"Content of {file1}:")
            # print(content)
            outfile.write(content)

        # write a newline to separate the contents of the two files
        outfile.write("\n")

        # open the second file in read mode
        with open(file2, "r") as infile:
            # write its contents to the output file
            content = infile.read()
            # print(f"Content of {file2}:")
            # print(content)
            outfile.write(content)

        # open the thred file in read mode
        with open(file3, "r") as infile:
            # write its contents to the output file
            content = infile.read()
            # print(f"Content of {file2}:")
            # print(content)
            outfile.write(content)
    outfile.close()
    


A balanced dataset for image categorization means that there are roughly equal numbers of images for each class or category in the dataset. You can check the balance of your dataset by counting the number of instances of each class




### Examine the dataset and balance the dataset.

In [132]:
class BlanceDataste:
    def __init__(self, df, output_csv, featuresList):
        self.df = df
        self.output_csv = output_csv
        self.featuresList = featuresList
        
    def process_Data(self):
        try:
            # Separate features and target variables
             features = self.df.drop(self.featuresList, axis=1)  # Replace with your actual column names
             targets = self.df[self.featuresList]  # Replace with your actual column names
             
        # Perform balancing for each target variable separately
             balanced_features = []
             balanced_targets = []

             for column in targets.columns:
                target = targets[column]
                
                # Perform undersampling
                undersampler = RandomUnderSampler(sampling_strategy='majority')
                features_undersampled, target_undersampled = undersampler.fit_resample(features, target)
                
                # Perform oversampling on the undersampled data
                oversampler = RandomOverSampler(sampling_strategy='minority')
                features_balanced, target_balanced = oversampler.fit_resample(features_undersampled, target_undersampled)
                
                balanced_features.append(features_balanced)
                balanced_targets.append(target_balanced)

                # Create a new DataFrame with balanced data
                balanced_df = pd.concat([pd.DataFrame(feature) for feature in balanced_features] + [pd.DataFrame(target) for target in balanced_targets], axis=1)

                # Write the balanced data to a new CSV file
                balanced_df.to_csv(self.output_csv, index=False)
                
        except IOError:
                    print(f"Error creating image {self.output_csv}")
                    return False
        return True
    





In [133]:
import base64

class DataCleaner:
    def __init__(self, df):
        self.df = df
        self.scaler = StandardScaler()
        self.encoder = LabelEncoder()
        self.outfutfile = "output.md"
     
    def report_and_recommend(self):
        
        title = ("\n Recommendations:")
        with FileWriter(self.outfutfile ) as writer:
            writer.append_to_file(title)
            for column in self.df.columns:
                if self.df[column].isnull().sum() > 0:
                        writer.append_to_file(f" \n  Column '{column}' has missing values. Consider using dropna() or fillna().")
                            

                if self.df[column].dtype == 'object':
                        writer.append_to_file(f" \n  Column '{column}' is categorical. Consider encoding it using label encoding or one-hot encoding.")


                elif self.df[column].dtype in ['int64', 'float64']:
                        writer.append_to_file(f" \n  Column '{column}' is numerical. Consider scaling it using standard scaling or min-max scaling.")
        
            writer.append_to_file("\n Recommendations - END -:")        
       
   
    def drop_missing(self):
        self.df.dropna(inplace=True)

    def fill_missing(self, column, value):
        self.df[column].fillna(value, inplace=True)

    def encode_categorical(self, column):
        self.df[column] = self.encoder.fit_transform(self.df[column])

    def scale_numerical(self, column):
        self.df[column] = self.scaler.fit_transform(self.df[column].values.reshape(-1, 1))

    def fill_missing_numeric_With_mean(self, column1, column2):
        # Calculate the mean of 'column1' for each category in 'column2'
        mean_values = self.df.groupby(column2)[column1].mean()

        # Fill the missing values in 'column1' with the mean of 'column1' in the same category in 'column2'
        self.df[column1] = self.df.apply(
             lambda row: mean_values[row[column2]] if pd.isnull(row[column1]) else row[column1],axis=1)
        
    def fill_missing_Category_values(self, column1, column2):
       
        # # Define a lambda function to compute the mode
        fill_mode = lambda x: x.fillna(x.mode().iloc[0])

        # # Fill missing values in column1 column with the mode of the corresponding group in column2
        self.df[column1] = self.df.groupby(column2)[column1].transform(fill_mode)
        
        
       

       


In [134]:
class BlanceImages:
    def __init__(self, Image_fiel_path, df, FinalImage_dir, output_csv,  size=(128, 128) ):
        self.df = df
        self.FinalImage_dir = FinalImage_dir
        self.output_csv = output_csv
        self.Image_fiel_path = Image_fiel_path
        self.size = size 

    def process_image(self, file_path, newfile):

        try:
            img = Image.open(file_path)
            img = img.resize(self.size)  # Resize image
            # Convert image to numpy array
            img_array = np.array(img)

            # Normalize to [0,1]
            img_normalized = img_array / 255.
           # Convert back to image
            img = Image.fromarray((img_normalized * 255).astype(np.uint8))
            img.save(newfile)
            
        except IOError:
            print(f"Error processing image {file_path}")
            return False
        return True 

    def process_images(self): 
        
        for index, row in self.df.iterrows():
            file_name = row['image_name'] +'.jpg'
            row_image_file = os.path.join(self.Image_fiel_path, file_name)
            newfile =  os.path.join(self.FinalImage_dir, file_name)

            self.process_image(row_image_file, newfile) 

                              
            pass
        
        self.df.to_csv(self.output_csv,index=False) 

In [135]:
class removeCorruptImage:
    def __init__(self, Image_fiel_path, df, corrupt_dir, output_csv ):
        self.df = df
        self.corrupt_dir = corrupt_dir
        self.output_csv = output_csv
        self.Image_fiel_path = Image_fiel_path
        self.outfutfile = "output.md"
        self.nonCorruptoutput_csv = 'nonCorruptoutput'
        
    def is_corrupt(self, file_path):
        try:
            Image.open(file_path)
        except IOError:
            return True
        return False
    
    def process_images(self): 
        
        count = 0
        ccount = 0
        for index, row in self.df.iterrows():
            count = count + 1
            file_name = row['image_name'] +'.jpg'
            row_image_file = os.path.join(self.Image_fiel_path, file_name)
            if self.is_corrupt(row_image_file):
                cccount = cccount +1
                shutil.move(row_image_file, self.corrupt_dir)  # Move corrupt files to a separate directory
                self.df.drop(index, inplace=True)  # Remove the row from the DataFrame
               
                        
            pass
        
        self.df.to_csv(self.nonCorruptoutput_csv,index=False)

        with FileWriter(self.outfutfile ) as writer:
                    writer.append_to_file("\n ------------  -----------------  --------------   --------------\n")
                    Message1 = f" ### proces directery to see if there is any currept imges"
                    writer.append_to_file(Message1)
                    Message = f" \n Total Image file count is {count} \n Total currept imge files count is {ccount}\n"
                    writer.append_to_file(Message)

        
    

In [136]:
class DF_Comparison:
    def __init__(self, df1, df2):
        self.df1 = df1
        self.df2 = df2
        self.outfutfile = "output.md"
        headers = ['BeforImg', 'AfterImg']
        # Create an empty DataFrame with headers
        self.ImageDF = pd.DataFrame(columns=headers, index=[0])

       
    def report(self):
        
        self.print_info(self.df1,"\n ## DataFrame info befor process :")
        self.print_Missing_values(self.df1,"\n ## Missing values befor & aftre  process :")
             
        # print("\nUnique values df1 - df2:")
        title = "\n ## Unique values befor & aftre  process :"
        self.print_unique_values(title)

        self.print_value_count("\n ## Value counts befor & aftre  process :")
        title = "\n ## Descriptive statistics befor and after the process:"



        # Getting the description of both DataFrames
        desc_df1 = self.df1.describe()
        desc_df2 = self.df2.describe()

        # Combine the two descriptions into one DataFrame
        combined_desc = pd.concat([desc_df1, desc_df2], axis=1)

        markdown_str = combined_desc.to_markdown()

        with FileWriter(self.outfutfile ) as writer:
            writer.append_to_file(title)
            writer.append_to_file(markdown_str)
            writer.append_to_file('\n')

      

        
    def print_info(self,df,title):

        
        # Capture df.info() output in a string
        buf = io.StringIO()
        df.info(buf=buf)
        info_str = buf.getvalue()

        # Create a summary DataFrame
        info_list = info_str.split('\n')[5:-3]  # Remove first and last two lines
        info_data = [line.split() for line in info_list]  # Split each line into list of words
        # Create dataframe from info_data
        info_df = pd.DataFrame(info_data, columns=['index', 'Name', 'Count', 'Non-Null','Dtype'])

        # Print the summary DataFrame in markdown format
        # print(info_df.to_markdown(index=False))
        # print("\n")

        markdown_str = info_df.to_markdown()

        # Write the markdown string to a text file
        with FileWriter(self.outfutfile ) as writer:
            writer.append_to_file(title)
            writer.append_to_file(markdown_str)
            writer.append_to_file('\n')
    
    
    def print_Missing_values(self,df,title):

        # Get the count of null values in each column
        null_counts1 = self.df1.isnull().sum()
        null_counts2 = self.df2.isnull().sum()
        null_counts = pd.DataFrame({'Befor':null_counts1,'After':null_counts2})
        
        with FileWriter(self.outfutfile ) as writer:
             writer.append_to_file(title)
             writer.append_to_file(null_counts.to_markdown())
             writer.append_to_file('\n')

    def print_unique_values(self,title):

        unique_counts1 = self.df1.nunique()
        unique_counts2 = self.df2.nunique()
        unique_counts = pd.DataFrame({'Befor':unique_counts1,'After':unique_counts2})

        #print(unique_counts)    
        # Write the markdown string to a text file
        with FileWriter(self.outfutfile ) as writer:
             writer.append_to_file(title)
             writer.append_to_file(unique_counts.to_markdown())
             writer.append_to_file('\n')

    def print_value_count(self,title):

        # Create an empty DataFrame to store the results
        result = pd.DataFrame()
        
        # Iterate over each column
        for column in self.df1.columns:

            if self.df1[column].nunique() > 8:
                continue

            df1_counts = self.df1[column].value_counts()
            df2_counts = self.df2[column].value_counts()

            # Combine the two Series into a DataFrame
            temp_df = pd.concat([df1_counts, df2_counts], axis=1, keys=['DF1', 'DF2'])

            # Add the column name to the DataFrame
            temp_df['Column'] = column

            # Append the temporary DataFrame to the result DataFrame
            
            result = pd.concat([result, temp_df])
            

        # Reset the index of the result DataFrame
        result.reset_index(inplace=True)

        # Rename the columns for clarity
        result.columns = ['Value', 'Count Befor', 'Count After', 'Column']

        # Rearrange the columns
        result = result[['Column', 'Value', 'Count Befor', 'Count After']]

        with FileWriter(self.outfutfile ) as writer:
             writer.append_to_file(title)
             writer.append_to_file(result.to_markdown())
             writer.append_to_file('\n')

    def visualize(self, column):
            
            label1 = 'BfeorImg'
            label2 = 'AfterImg'
    
            if self.df1[column].dtype in ['int64', 'float64']:

                fname1 = f"media//Histogram_of_{label1}_{column}.png" 
                
                ax = sns.histplot(self.df1[column])
                ax.set_title(f"Histogram of _{label1}_{column}")
                

                plt.xticks(rotation=90) 
                plt.xlabel(column)

                # Adjust the plot limits and aspect ratio
                plt.tight_layout()
                
                plt.savefig(fname1)

                fname2 = f"media//Histogram_of_{label2}_{column}.png" 
                ax = sns.histplot(self.df2[column])
                ax.set_title(f"Histogram of _{label2}_{column}")

                plt.xticks(rotation=90) 
                plt.xlabel(column)
                # Adjust the plot limits and aspect ratio
                plt.tight_layout()
                plt.savefig(fname2)
              

                # Create a new DataFrame with the entry to be added
                new_entry = pd.DataFrame({'BeforImg': fname1, 'AfterImg': fname2},index=[0])

                # Concatenate the new DataFrame with the existing DataFrame
                self.ImageDF = pd.concat([self.ImageDF, new_entry], ignore_index=True)

             
                
            else:

                fname1 = f"media//Count_plot_of_{label1}_{column}.png"
                ax = sns.countplot(x=column, data=self.df1)
                ax.set_title(f"Count plot of _{label1}_{column}")
                plt.xticks(rotation=90) 
                # Adjust the plot limits and aspect ratio
                plt.tight_layout()
                plt.savefig(fname1)

                fname2 = f"media//Count_plot_of_{label2}_{column}.png"
                ax = sns.countplot(x=column, data=self.df2)
                ax.set_title(f"Count plot of _{label2}_{column}")
                
                # Adjust the plot limits and aspect ratio
                
                plt.xticks(rotation=90)
                plt.tight_layout() 
                plt.savefig(fname2)
                

                # Create a new DataFrame with the entry to be added
                new_entry = pd.DataFrame({'BeforImg': fname1, 'AfterImg': fname2}, index=[0])

                # Concatenate the new DataFrame with the existing DataFrame
                self.ImageDF = pd.concat([self.ImageDF, new_entry], ignore_index=True)

            plt.close()

    def writeImages(self):

        self.ImageDF = self.ImageDF.drop(self.ImageDF.index[0])
        markup_code =  '\n'
        markup_code += '|:-------------------------------------:|:-----------------------------------:|\n'
        markup_code += '|:---------  Befor --------------------:|:------------ Aftre ----------------:|\n'
        markup_code += '\n'
        markup_code +=  f'| ![Befor]({self.ImageDF.iloc[0,0]}) | ![After]({self.ImageDF.iloc[0,1]})|\n'
        markup_code += '|:------------------------------------:|:--------------------------------------:|\n'
        markup_code +=  f'| ![Befor]({self.ImageDF.iloc[1,0]}) | ![After]({self.ImageDF.iloc[1,1]})|\n'
        markup_code += '|:------------------------------------:|:--------------------------------------:|\n'
        markup_code +=  f'| ![Befor]({self.ImageDF.iloc[2,0]}) | ![After]({self.ImageDF.iloc[2,1]})|\n'
        markup_code += '|:------------------------------------:|:--------------------------------------:|\n'
        markup_code +=  f'| ![Befor]({self.ImageDF.iloc[3,0]}) | ![After]({self.ImageDF.iloc[3,1]})|\n'
        markup_code += '|:------------------------------------:|:--------------------------------------:|\n'        
        markup_code +=  f'| ![Befor]({self.ImageDF.iloc[4,0]}) | ![After]({self.ImageDF.iloc[4,1]})|\n'
        markup_code += '|:------------------------------------:|:--------------------------------------:|\n'
        markup_code +=  f'| ![Befor]({self.ImageDF.iloc[5,0]}) | ![After]({self.ImageDF.iloc[5,1]})|\n'  
        markup_code += '|:------------------------------------:|:--------------------------------------:|\n'  

        with FileWriter(self.outfutfile ) as writer:
            writer.append_to_file("Bfeor and after ")
            writer.append_to_file(markup_code)
            writer.append_to_file('\n')
        

    


apply LabelEncoder 

In [137]:

class DF_apply_LabelEncoder:
    def __init__(self, df, colName):
        self.df = df
        self.colName = colName
        self.outfutfile = "output.md"

    def encode(self):
    # Label Encoding
        label_encoder = LabelEncoder()
        Label = self.colName + "_" + "encoded"
        self.df[Label] = label_encoder.fit_transform(self.df[self.colName])
    # Return the transformed DataFrame
        return self.df


In [138]:
class DF_apply_StandardScaler:
    def __init__(self, df, colName):
        self.df = df
        self.colName = colName
        self.outfutfile = "output.md"

    def encode(self):
    # Label Encoding
        scaler = StandardScaler()
        Label = self.colName + "_" + "encoded"
        self.df[Label] = scaler.fit_transform(self.df[self.colName])
    # Return the transformed DataFrame
        return self.df

In [139]:
# Define row CSV file name and path

# Current directory
current_dir = os.getcwd()
# Parent directory
parent_dir = os.path.dirname(current_dir)

Data_path_Name= 'melanoma-classification_data/'
Train_File_name_pre = 'train_pre.csv'
Train_Uncurept_Image_File_Name = 'train_UImage.csv'
Test_File_name_pre = 'test_pre.csv'

Image_File_Path_pre_train = os.path.join(parent_dir, '\\Train')
Image_File_Path_post_train = os.path.join(current_dir, 'melanoma-classification_data\jpeg\Train')

Org_csv_file = os.path.join(Data_path_Name, Train_File_name_pre)
Post_scv_file = os.path.join(Data_path_Name, Train_Uncurept_Image_File_Name)
md_file = os.path.join(current_dir, 'output.md')

UCorrupt_csv_file_name = 'nonCorruptoutput.csv'
UCorrupt_csv_file = os.path.join(Data_path_Name,UCorrupt_csv_file_name)

try:
    os.remove(md_file)
except IOError:
    pass
            

Blanced_csv_file_name = 'Blanced.csv'
Blanced_csv_file = os.path.join(Data_path_Name, Blanced_csv_file_name)

df_org  = pd.read_csv(Org_csv_file)

# # rename long column name anatom_site_general_challenge to short name anatomy_sites
df_org.rename(columns={"anatom_site_general_challenge": "anatomy_sites"}, inplace=True) # rename column name

# # # #  if there any currupt Image files Remove them and cetar new csv file
processor = removeCorruptImage(Image_File_Path_pre_train , df_org , 'corrupt_files', UCorrupt_csv_file)
processor.process_images()

df_UImage = pd.read_csv(UCorrupt_csv_file)
cleanData = DataCleaner(df_UImage)
cleanData.report_and_recommend()
# fill in miising values
cleanData.fill_missing_numeric_With_mean('age_approx', 'diagnosis')
cleanData.fill_missing_Category_values('anatomy_sites', 'diagnosis')
cleanData.fill_missing_Category_values('sex', 'diagnosis')

df_UImage = cleanData.df

# # Create a blanced dataset arount benign_malignant feature

featuresList = ['benign_malignant']
Blancer = BlanceDataste(df_UImage,Blanced_csv_file,featuresList)
Blancer.process_Data()

df_Blanced  = pd.read_csv(Blanced_csv_file)

DF_ComparisonX = DF_Comparison(df_UImage, df_Blanced)
DF_ComparisonX.report()
DF_ComparisonX.visualize('sex')
DF_ComparisonX.visualize('age_approx')
DF_ComparisonX.visualize('anatomy_sites')
DF_ComparisonX.visualize('target')
DF_ComparisonX.visualize('benign_malignant')
DF_ComparisonX.visualize('diagnosis')

DF_ComparisonX.writeImages()



Based on the course material, my limited knowledge, and extensive research, I have chosen to compare the results of the following machine learning models: Deep Learning-based Features, Gradient-based Features, Color Histograms, Local Binary Patterns (LBP), and Texture Descriptors

In [140]:

df_Blanced =  DF_apply_LabelEncoder(df_Blanced, 'diagnosis').encode()
df_Blanced=  DF_apply_LabelEncoder(df_Blanced, 'anatomy_sites').encode()
df_Blanced =  DF_apply_LabelEncoder(df_Blanced, 'sex').encode()
df_Blanced =  DF_apply_LabelEncoder(df_Blanced, 'benign_malignant').encode()
df_Blanced['age_approx'] = df_Blanced['age_approx'].astype(int)



In [141]:

df_Blanced  = pd.read_csv(Post_scv_file)
# df_Blanced.info()


In [142]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class plot_Socres:
# Assuming you have a model (clf) and data (X, y)
    def __init__(self, clf,X,y,num_iterations,filelabel):
        self.clf = clf
        self.X = X
        self.y = y
        self.num_iterations = num_iterations
        self.filelabel = filelabel
        self.outfutfile = "output.md"

    def PlotScoer(self):

            execution_times = []
            accuracy_scores = []
            precision_scores = []
            recall_scores = []
            f1_scores = []

            for i in range(self.num_iterations):
                start_time = time.time()
                
                # Split the data into training and test sets
                X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=i)
                
                # Fit the model and make predictions
                self.clf.fit(X_train, y_train)
                y_pred = self.clf.predict(X_test)
                
                # Calculate performance metrics
                accuracy = accuracy_score(y_test, y_pred)
                precision = precision_score(y_test, y_pred)
                recall = recall_score(y_test, y_pred)
                f1 = f1_score(y_test, y_pred)
                
                # Record execution time and performance metrics
                execution_times.append(time.time() - start_time)
                accuracy_scores.append(accuracy)
                precision_scores.append(precision)
                recall_scores.append(recall)
                f1_scores.append(f1)

            # Plot the results
            x_values = np.arange(1, self.num_iterations+1)

            plt.figure(figsize=(10, 6))

            plt.subplot(2, 2, 1)
            plt.plot(x_values, execution_times, marker='o')
            plt.xlabel('Iteration')
            plt.ylabel('Execution Time (s)')
            plt.title('Model Execution Time')

            plt.subplot(2, 2, 2)
            plt.plot(x_values, accuracy_scores, marker='o')
            plt.xlabel('Iteration')
            plt.ylabel('Accuracy')
            plt.title('Accuracy Score')

            plt.subplot(2, 2, 3)
            plt.plot(x_values, precision_scores, marker='o')
            plt.xlabel('Iteration')
            plt.ylabel('Precision')
            plt.title('Precision Score')

            plt.subplot(2, 2, 4)
            plt.plot(x_values, recall_scores, marker='o')
            plt.xlabel('Iteration')
            plt.ylabel('Recall')
            plt.title('Recall Score')

                      
            plt.tight_layout()
            fname2 = f"media//Score_plot_of_{self.filelabel}.png"
            plt.savefig(fname2)

            plt.close()

             # Add text annotations for the average scores
            avg_accuracy_score = np.mean(accuracy_scores)
            avg_precision_score = np.mean(precision_scores)
            avg_recall_score = np.mean(recall_scores)
            avg_f1_score = np.mean(f1_scores)


            markup_code =  '\n'
            markup_code += '\n'
            markup_code +=  f'| ![filelabel]({fname2}) |\n'
            markup_code += '\n'
            markup_code +=  f'avg_accuracy_score : {avg_accuracy_score}\n'
            markup_code += '\n'
            markup_code +=  f'avg_precision_score : {avg_precision_score}\n'
            markup_code += '\n'
            markup_code +=  f'avg_recall_score : {avg_recall_score}\n'
            markup_code += '\n'
            markup_code +=  f'avg_f1_score : {avg_f1_score}\n'
            markup_code += '\n'
            

            with FileWriter(self.outfutfile ) as writer:
                writer.append_to_file(self.filelabel)
                writer.append_to_file(markup_code)
                writer.append_to_file('\n')
            
    def writeImages(self):

        markup_code =  '\n'
        markup_code += '\n'
        markup_code +=  f'| ![filelabel]({self.filelabel}) ||\n'

        with FileWriter(self.outfutfile ) as writer:
            writer.append_to_file("Bfeor and after ")
            writer.append_to_file(markup_code)
            writer.append_to_file('\n')
        



store results for visualization, create an empty array

In [143]:


from skimage import io

# Load the dataset (assuming you have X as image paths and y as corresponding labels)
X = np.array([io.imread(os.path.join(Image_File_Path_post_train, path + '.jpg'), as_gray=True) for path in df_Blanced['image_name']])
y = np.array(df_Blanced['target'])




Color Histograms: model

In [144]:


import warnings
from sklearn.exceptions import UndefinedMetricWarning

# Suppress the warning
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)

# Extract color histograms as features
X_features = np.array([np.histogram(image.flatten(), bins=256, range=(0, 256))[0] for image in X])

# Train the classifier (assuming SVC)
svm_model = SVC()

plotScore = plot_Socres(svm_model,X_features,y,5,'Color_Histograms')
plotScore.PlotScoer()



Texture Descriptor Model

In [145]:


# Suppress the UserWarning
warnings.filterwarnings("ignore", category=UserWarning, module="skimage.feature.texture")

# Redirect warning messages to null file descriptor
with open(os.devnull, "w") as devnull:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")


# Extract texture descriptors (e.g., LBP) as features
X_features = np.array([feature.local_binary_pattern(image, P=8, R=1).flatten() for image in X])


plotScore = plot_Socres(svm_model,X_features,y,5,'Texture_Descriptor')
plotScore.PlotScoer()




Gradient-based Features Model

In [146]:

# Extract gradient-based features (e.g., HOG) as features
X_features = np.array([feature.hog(image) for image in X])

# Train the classifier (assuming SVC)
svm_model = SVC()

plotScore = plot_Socres(svm_model,X_features,y,5,'Gradient-based_Features')
plotScore.PlotScoer()

 

Deep Learning-based Features:

I was not abel install tensorflow on my computer so I did not evaluate this model

In [147]:
# import numpy as np
# from skimage import io

# from tensorflow.keras.applications import VGG16
# from tensorflow.keras.applications.vgg16 import preprocess_input
# from tensorflow.keras.models import Model
# from sklearn.model_selection import train_test_split
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score


# # Preprocess the images (assuming resizing and normalization)
# # X_preprocessed = np.array([preprocess_image(image) for image in X])  # Replace 'preprocess_image' with your preprocessing function
# X_preprocessed = preprocess_input(X)
# # Load pre-trained VGG16 model without the top (fully connected) layers
# base_model = VGG16(weights='imagenet', include_top=False)
# model = Model(inputs=base_model.input, outputs=base_model.output)

# # Extract deep learning-based features using VGG16
# X_features = model.predict(X_preprocessed)

# # Flatten the features
# X_features = X_features.reshape(X_features.shape[0], -1)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, random_state=42)

# # Train the classifier (assuming SVC)
# svm_model = SVC()
# svm_model.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred = svm_model.predict(X_test)

# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy}")


Local Binary Patterns (LBP) Model

In [148]:
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc

# Suppress the UserWarning
warnings.filterwarnings("ignore", category=UserWarning, module="skimage.feature.texture")

#  Extract LBP features as features
X_features = np.array([feature.local_binary_pattern(image, P=8, R=1).flatten() for image in X])

# Train the classifier (assuming SVC)
svm_model = SVC()

plotScore = plot_Socres(svm_model,X_features,y,5,'Local_Binary_atterns')
plotScore.PlotScoer()




In [149]:
# print(Execution_Times)
# print(accuracys)
# print(precisions)
# print(recalls)

In [150]:
Write_Readme_file()