# **CLASSIFICATION USING LOGISTIC REGRESSION**

In [126]:
# importing libraries needed for the classification problem
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from PIL import Image
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
import cv2


# **Dataset reading**

In [127]:
# Initiating Raveling list and Non_raveling list to sort the corresponding images data
Raveling_image_dataset = []
Non_raveling_image_dataset = []
# iterating through each image in Raveling dataset and storing it in Ravelin_data list as an numpy array
for image_file in os.listdir("/kaggle/input/raveling-detection-ce784a-2023/mod_ravelling_dataset/train/Non_raveling"):
        image = cv2.imread("/kaggle/input/raveling-detection-ce784a-2023/mod_ravelling_dataset/train/Non_raveling/"+image_file)
        Non_raveling_image_dataset.append(image)
for image_file in os.listdir("/kaggle/input/raveling-detection-ce784a-2023/mod_ravelling_dataset/train/Raveling"):
        image = cv2.imread("/kaggle/input/raveling-detection-ce784a-2023/mod_ravelling_dataset/train/Raveling/"+image_file)
        Raveling_image_dataset.append(image)

# **Feature generation**

In [128]:
# feature generation for Raveling data using SIFT
Raveling_dataset=[]
Raveling_lable=[]
for image in Raveling_image_dataset:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Create a SIFT object
    sift = cv2.SIFT_create()
# Detect keypoints and compute descriptors
    keypoints, descriptors = sift.detectAndCompute(gray, None)
# Draw the keypoints on the image
    img_with_keypoints = cv2.drawKeypoints(image, keypoints, None)
# Reshaping the array into a 1D vector
    img_array = img_with_keypoints.reshape(-1)
    Raveling_dataset.append(img_array)
    Raveling_lable.append('Raveling')

In [129]:
 # feature generation for Non_raveling data usingSIFT
Non_raveling_dataset=[]
Non_raveling_lable=[]
for image in Non_raveling_image_dataset:
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Create a SIFT object
    sift = cv2.SIFT_create()
# Detect keypoints and compute descriptors
    keypoints, descriptors = sift.detectAndCompute(gray, None)
# Draw the keypoints on the image
    img_with_keypoints = cv2.drawKeypoints(image, keypoints, None)
# Reshaping the array into a 1D vector
    img_array = img_with_keypoints.reshape(-1)
    Non_raveling_dataset.append(img_array)
    Non_raveling_lable.append('Non_raveling')

In [130]:
# combining the both Raveling and Non_raveling feature data to a single dataset
# correspondingly combining the Lable data too
Input_data=Raveling_dataset+Non_raveling_dataset
Target_data=Raveling_lable+Non_raveling_lable
# print(Input_data)

In [131]:
# Splitting the data sets into training and testing datasets
X_train,X_test,y_train,y_test=train_test_split(Input_data, Target_data,test_size=0.2,shuffle=True,random_state=42)

In [132]:
# Normalizing the data
X_train=[x/255 for x in X_train]
X_train=np.array(X_train)
X_test=[x/255 for x in X_test]
X_test=np.array(X_test)

# ** Model selection and training**

In [133]:
# training the data using Logistic Regression model
model = LogisticRegression(C=0.01,max_iter=1000)
model.fit(X_train, y_train)

LogisticRegression(C=0.01, max_iter=1000)

In [134]:
# Predicting the target values for the normalized test data
y_predicted = model.predict(X_test)

# **Accuracy**

In [135]:
# Printing the classification report
score = accuracy_score(y_test, y_predicted)
print(score)

0.6714285714285714


# ** Class prediction for test images**

In [136]:
# extracting each image of the test data
New_Dataset=[]
image_name=[]
for image_file in os.listdir("/kaggle/input/raveling-detection-ce784a-2023/mod_ravelling_dataset/test"):
    with Image.open("/kaggle/input/raveling-detection-ce784a-2023/mod_ravelling_dataset/test/" + image_file) as img:
        # Converting the image into a numpy array
        img_array = np.array(img)
        # Reshaping the array into a 1D vector
        img_array = img_array.reshape(-1)
        New_Dataset.append(img_array)
        image_name.append(image_file)
# Normalizing the data
New_X_value=[x/255 for x in New_Dataset]
# converting the New_data list into array
New_X_value=np.array(New_Dataset)
# predicting the target lables of the test data
Predicted_y=model.predict(New_X_value)

In [137]:
# converting the perdicted data into a excel file
results = pd.DataFrame({'filename': image_name ,'class': Predicted_y})
results['filename']=results['filename'].str.replace('.jpg','').astype(int)
results=results.sort_values('filename')
results['filename']=results['filename'].astype(str)
results['filename']=results['filename']+'.jpg'
results.to_csv('output.csv', index=False)

  This is separate from the ipykernel package so we can avoid doing imports until
