In [None]:
import pandas as pd  # Importing the pandas library, which is used for handling and processing structured data in the form of dataframes.
from sklearn.linear_model import LogisticRegression  # Importing the LogisticRegression model from scikit-learn to perform classification.
from sklearn.metrics import accuracy_score  # Importing accuracy_score to evaluate how well the model performs on test data.
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures  # Importing preprocessing tools: LabelEncoder for categorical labels, StandardScaler for feature scaling, and PolynomialFeatures to generate polynomial terms.

# Load the datasets
train_df = pd.read_csv('/content/colour_Histogram_Training.csv')  # Reading the training dataset from a CSV file and storing it in a pandas dataframe.
test_df = pd.read_csv('/content/colour_Histogram_Testing.csv')  # Reading the testing dataset from a CSV file and storing it in another pandas dataframe.

# Prepare feature matrix and target variable
X_train = train_df.drop(columns=['filename', 'class'])  # Removing the 'filename' column because it is not a useful feature for classification. 
# Also, removing the 'class' column from the feature matrix since it is the target variable (i.e., the label we want to predict).

y_train = train_df['class']  # Extracting the 'class' column as the target variable for training.

X_test = test_df.drop(columns=['filename', 'class'])  # Doing the same process for the test dataset: removing the 'filename' and 'class' columns.
y_test = test_df['class']  # Extracting the 'class' column as the target variable for testing.

# Encode class labels
label_encoder = LabelEncoder()  # Creating an instance of LabelEncoder, which converts categorical labels into numerical values.
y_train_encoded = label_encoder.fit_transform(y_train)  # Encoding the class labels of the training data into numerical format so the model can understand them.
y_test_encoded = label_encoder.transform(y_test)  # Encoding the class labels of the test data using the same encoder to maintain consistency.

# Standardize the features
scaler = StandardScaler()  # Creating an instance of StandardScaler, which normalizes features so they have a mean of 0 and a standard deviation of 1.
X_train_scaled = scaler.fit_transform(X_train)  # Applying the scaler to the training data to bring all feature values to the same scale.
X_test_scaled = scaler.transform(X_test)  # Applying the same transformation to the test data to ensure consistency between training and testing data.

# Generate Polynomial Features (degree=2)
poly = PolynomialFeatures(degree=2, interaction_only=True)  # Creating an instance of PolynomialFeatures to generate polynomial terms up to degree 2.
X_train_poly = poly.fit_transform(X_train_scaled)  # Expanding the training dataset by adding interaction terms between features.
X_test_poly = poly.transform(X_test_scaled)  # Expanding the test dataset with the same transformation to match the training data.

# Train and evaluate Logistic Regression model
model = LogisticRegression(max_iter=1000)  # Initializing the Logistic Regression model and setting max_iter=1000 to allow more iterations for optimization.
model.fit(X_train_poly, y_train_encoded)  # Training the model using the polynomial-transformed training data and corresponding class labels.
y_pred_poly = model.predict(X_test_poly)  # Using the trained model to predict the class labels of the test dataset.

accuracy_poly = accuracy_score(y_test_encoded, y_pred_poly)  # Calculating the accuracy of the model by comparing predicted labels with actual labels from the test set.
print("Accuracy with Polynomial Features:", accuracy_poly)  # Printing the accuracy score to see how well the model performs.