In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
# PCA transformation function
def pca_transform(X, n_components):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    return X_pca, pca  # Returning the PCA object for later use

# Data scaling function
def scale_data(indep_X):
    sc = StandardScaler()
    scaled_X = sc.fit_transform(indep_X)
    return scaled_X



# Confusion matrix and prediction function
def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report, cm

In [3]:
# Logistic Regression function
def logistic(X_train, y_train, X_test, y_test):       
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm      

# SVM Linear function
def svm_linear(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm

# SVM Non-linear function
def svm_NL(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm

# Naive Bayes function
def Navie(X_train, y_train, X_test, y_test):       
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm         

# KNN function
def knn(X_train, y_train, X_test, y_test):
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm

# Decision Tree function
def Decision(X_train, y_train, X_test, y_test):
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm      

# Random Forest function
def random(X_train, y_train, X_test, y_test):
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    accuracy, report, cm = cm_prediction(classifier, X_test, y_test)
    return classifier, accuracy, report, cm

# Function to select classification results
def selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf): 
    dataframe = pd.DataFrame(index=['ChiSquare'], columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    for number, idex in enumerate(dataframe.index):      
        dataframe['Logistic'][idex] = acclog[number]       
        dataframe['SVMl'][idex] = accsvml[number]
        dataframe['SVMnl'][idex] = accsvmnl[number]
        dataframe['KNN'][idex] = accknn[number]
        dataframe['Navie'][idex] = accnav[number]
        dataframe['Decision'][idex] = accdes[number]
        dataframe['Random'][idex] = accrf[number]
    return dataframe

In [4]:
# Load dataset
dataset1 = pd.read_csv('Wine.csv')

# One-hot encode categorical variables, if any
df2 = pd.get_dummies(dataset1, drop_first=True)

# Feature set
indep_X = df2.iloc[:, 0:13].values  
dep_Y = df2.iloc[:, 13].values 

In [17]:
# Scale the data
scaled_X = scale_data(indep_X)

# Apply PCA transformation
X_pca, pca = pca_transform(scaled_X, 7)

# Split the PCA-transformed data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, dep_Y, test_size=0.2, random_state=42)

# Initialize accuracy logs
acclog = []
accsvml = []
accsvmnl = []
accknn = []
accnav = []
accdes = []
accrf = []


In [18]:
# Logistic Regression
classifier, Accuracy, report, cm = logistic(X_train, y_train, X_test, y_test)
acclog.append(Accuracy)

# SVM Linear
classifier, Accuracy, report, cm = svm_linear(X_train, y_train, X_test, y_test)  
accsvml.append(Accuracy)

# SVM Non-linear
classifier, Accuracy, report, cm = svm_NL(X_train, y_train, X_test, y_test)  
accsvmnl.append(Accuracy)

# KNN
classifier, Accuracy, report, cm = knn(X_train, y_train, X_test, y_test)  
accknn.append(Accuracy)

# Naive Bayes
classifier, Accuracy, report, cm = Navie(X_train, y_train, X_test, y_test)  
accnav.append(Accuracy)

# Decision Tree
classifier, Accuracy, report, cm = Decision(X_train, y_train, X_test, y_test)  
accdes.append(Accuracy)

# Random Forest
classifier, Accuracy, report, cm = random(X_train, y_train, X_test, y_test)  
accrf.append(Accuracy)

# Compile results
result = selectk_Classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accrf)


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  dataframe['Logistic'][idex] = acclog[number]
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFra

In [None]:
df2

In [13]:
result
#5

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
ChiSquare,1.0,1.0,1.0,0.944444,1.0,1.0,1.0


In [10]:
result
#6

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
ChiSquare,1.0,1.0,1.0,0.972222,1.0,0.972222,1.0


In [16]:
result
#7

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
ChiSquare,1.0,1.0,1.0,0.972222,1.0,0.972222,1.0


In [19]:
result
#8

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
ChiSquare,1.0,1.0,1.0,0.972222,1.0,0.972222,1.0
