In [38]:
import os
import cv2
import numpy as np

# Path to the folder containing the images
folder_path = 'train'

# Initialize lists to store the images and their corresponding labels (if available)
images = []
labels = []

# Loop through each image file in the folder
for filename in os.listdir(folder_path):
    # Construct the full path to the image file
    file_path = os.path.join(folder_path, filename)
    
    # Load the image using OpenCV
    image = cv2.imread(file_path)
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # blur
    blur = cv2.GaussianBlur(gray, (0,0), sigmaX=33, sigmaY=33)

    # divide
    divide = cv2.divide(gray, blur, scale=255)

    # otsu threshold
    thresh = cv2.threshold(divide, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]

    # apply morphology
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    
    (h,w)=morph.shape[:2]
    temp=(14*w//15)
    temp_image=morph[0:h,0:temp]
    morph=temp_image
    (h,w)=morph.shape[:2]
    third=(3*w//4)
    image=morph[0:h,third:w]
    # Convert the image to a feature vector (e.g., flatten it)
    flattened_image = image.flatten()
    
   
    # Append the feature vector to the images list
    images.append(flattened_image)
    
    # Optional: Extract the label from the filename or any other method
    

# Convert the lists to numpy arrays
X_train = np.array(images)



In [39]:
print(X_train.shape)

(2000, 11700)


In [40]:
import pandas as pd

In [41]:
data = pd.read_csv('labels.txt', delimiter='\t')

In [42]:
data

Unnamed: 0,labels
0,ODD
1,ODD
2,ODD
3,EVEN
4,EVEN
...,...
1995,ODD
1996,EVEN
1997,EVEN
1998,ODD


In [43]:
data['labels'] = data['labels'].replace('ODD', '1')

In [44]:
data['labels'] = data['labels'].replace('EVEN', '0')

In [45]:
data

Unnamed: 0,labels
0,1
1,1
2,1
3,0
4,0
...,...
1995,1
1996,0
1997,0
1998,1


In [46]:
Y_train = data['labels'].to_numpy()

In [47]:
print(Y_train.shape)

(2000,)


In [36]:
from sklearn.model_selection import train_test_split

In [48]:
X_train, X_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.3, random_state=42)

In [49]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1400, 11700)
(1400,)
(600, 11700)
(600,)


In [50]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [51]:
clf = DecisionTreeClassifier()

In [52]:
clf.fit(X_train, y_train)

In [53]:
y_pred = clf.predict(X_test)

In [54]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9733333333333334


In [55]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()

In [56]:
clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [57]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [58]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier()

In [59]:
clf.fit(X_train, y_train)

In [60]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [60]:
from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()

In [61]:
clf.fit(X_train, y_train)

In [62]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.4675
