In [58]:
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as img
import os
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

In [59]:
#Function to:
# 1. Reduce the Resolution of Image from 1200 x 900 to 64 x 48
# 2. Convert the Image from colored to Black & White (Removing the Third Dimension of the Image)
# 3. Saving the new Images in a separate folder which can be accessed independently later on &
# 4. Saving the reduced quality images in a single variable for easier Analysis

def modify_image(folder):
    images = []
    new_folder = r"C:/Users/rhainej.williams/Downloads/Final Project Files/Images_new" #This is the file path for the new images to be stored
    
    for filename in os.listdir(folder):
        image = Image.open(os.path.join(folder,filename))  #Opening the images from the original folder
        new_image = image.convert("L")                     #Converting from Color to B&W Images
        neww_image = new_image.resize((45, 34))            #Reducing the Resolution of Image from 1200 x 900 to 64 x 48
        neww_image.save(os.path.join(new_folder,filename)) #Saving the edited images in the new folder
        if img is not None:
            images.append(neww_image)                      #Placing the new images in the variable for analysis
    return images


In [62]:
pictures = modify_image(os.chdir("/Users/Downloads/Final Project Files/Capital Letters")) #Calling the above function and saving the new images 

FileNotFoundError: [Errno 2] No such file or directory: '/Users/Downloads/Final Project Files/Capital Letters'

In [63]:
os.chdir("/Downloads/Final Project Files/Capital Letters")

with open("amaranth-stirfry.txt","r") as txtfile: #"r" indicates that we are reading the textfile and not writing to it
    recipe=txtfile.read() #.read() retrieves raw text information from the file we opened
    
print(recipe)

FileNotFoundError: [Errno 2] No such file or directory: '/Downloads/Final Project Files/Capital Letters'

In [None]:
len(pictures)

## Preparing the Features

In [None]:
#Checking the data type of the images after being uploaded with the function above
type(pictures[0])

In [None]:
#Converting the Pictures from PIL Images to Arrays for easier Data Analysis

#Converting from PIL Image to List
images1 = []
for i in range(len(pictures)):
    pix = np.array(pictures[i])
    images1.append(pix)

In [None]:
images1

In [None]:
#Converting from List to Array
image_array = np.array(images1)
image_array

In [None]:
len(image_array[0].flatten())

In [None]:
#Creating a FLATTENED array for all samples where each column will be represented by the pixels (features) of the images 

#Creating List
pixels=[]
for j in range(len(image_array)):
    all_pix = image_array[j].flatten()
    pixels.append(all_pix)

In [None]:
#Converting from List to Array
final_features = np.array(pixels)
final_features

In [None]:
#Checking the shape of the array
final_features.shape  

This shape implies that there are 3072 (64x48) features, and 3410 samples. 

## Preparing the Data Labels 

In [None]:
data = np.array(pd.read_csv(r"C:\Users\rhainej.williams\Downloads\Final Project Files\capitallabels.csv"))
len(data)

In [None]:
data_labels = data.tolist()

In [None]:
count_values=pd.Series(data_labels).value_counts()
print(*count_values)

In [None]:
final_labels=data.squeeze()
final_labels.shape

## Data Preprocessing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(final_features, final_labels, stratify = final_labels, test_size=0.2, random_state=42)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

## Fitting the Logistic Regression Algorithm

In [None]:
log = LogisticRegression(multi_class='multinomial',C =1e-2, max_iter=1e6, penalty='l2', class_weight = 'balanced', random_state=42).fit(X_train, y_train)

**Testing Accuracy**

In [None]:
logistic_predictions_test = log.predict(X_test)
print(*logistic_predictions_test)

In [None]:
print(*y_test)

In [None]:
logistic_accuracy_test = accuracy_score(y_test, logistic_predictions_test)
logistic_accuracy_test

**Training Accuracy**

In [None]:
logistic_predictions_train = log.predict(X_train)
print(*logistic_predictions_train)

In [None]:
print(*y_train)

In [None]:
logistic_accuracy_train = accuracy_score(y_train, logistic_predictions_train)
logistic_accuracy_train

## Fitting the Random Forest Algorithm

In [None]:
from sklearn.ensemble import RandomForestClassifier

random_for = RandomForestClassifier(random_state=42,criterion='gini',max_features='log2',class_weight='balanced').fit(X_train, y_train)

**Testing Accuracy:**

In [None]:
random_predictions_test = random_for.predict(X_test)
print(*random_predictions_test)

In [None]:
print(*y_test)

In [None]:
random_accuracy_test = accuracy_score(y_test, random_predictions_test)
random_accuracy_test

**Training Accuracy:**

In [None]:
random_predictions_train = random_for.predict(X_train)
print(*random_predictions_train)

In [None]:
print(*y_train)

In [None]:
random_accuracy_train = accuracy_score(y_train, random_predictions_train)
random_accuracy_train

## Fitting the KNN Algorithm

In [None]:
from sklearn.neighbors import KNeighborsClassifier

KNN = KNeighborsClassifier().fit(X_train, y_train)

**Testing Accuracy:**

In [None]:
KNN_predictions_test = KNN.predict(X_test)
print(*KNN_predictions_test)

In [None]:
KNN_accuracy_test = accuracy_score(y_test, KNN_predictions_test)
KNN_accuracy_test

**Training Accuracy:**

In [None]:
KNN_predictions_train = KNN.predict(X_train)
print(*KNN_predictions_train)

In [None]:
KNN_accuracy_train = accuracy_score(y_train, KNN_predictions_train)
KNN_accuracy_train

### Logistic Regression Optimization:

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = [{'penalty':['l1','l2','None','elasticnet'],'C':[1 50 100],'solver':[‘lbfgs’, ‘liblinear’, ‘newton-cg’, ‘newton-cholesky’, ‘sag’, ‘saga’],
              'max_iter':[750],'multi_class':'multinomial','tol':[1e-4]}]

In [None]:
log_opt = LogisticRegression(n_jobs = -1, random_state =42)

In [None]:
log_GridSearchCV = GridSearchCV(log_opt, param_grid, scoring = 'accuracy')
log_GridSearchCV.fit(X_train,y_train)

In [None]:
log_opt_pred = log_GridSearchCV.predict(X_test)
log_opt_pred

In [None]:
accuracy_score(y_test,log_opt_pred)