In [1]:

import os
import itertools
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import extract_v3
import categorize
import cv2

# Initialize lists for features and labels
X_baseline_angle = []
X_top_margin = []
X_letter_size = []
X_line_spacing = []
X_word_spacing = []
X_pen_pressure = []
X_slant_angle = []
y_t1 = []
y_t2 = []
y_t3 = []
y_t4 = []
y_t5 = []
y_t6 = []
y_t7 = []
y_t8 = []
page_ids = []

label_list_path = r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Graphology\Handwriting_Analysis\label_list"

if os.path.isfile(label_list_path):
    print("Info: label_list found.")
    with open(label_list_path, "r") as labels:
        lines = labels.readlines()
        for i in range(0, len(lines), 2):
            feature_line = lines[i].strip().split()
            label_line = lines[i+1].strip().split()
            
            if len(feature_line) != 8 or len(label_line) != 8:
                print(f"Skipping line pair due to incorrect format: {lines[i].strip()} and {lines[i+1].strip()}")
                continue

            X_baseline_angle.append(float(feature_line[0]))
            X_top_margin.append(float(feature_line[1]))
            X_letter_size.append(float(feature_line[2]))
            X_line_spacing.append(float(feature_line[3]))
            X_word_spacing.append(float(feature_line[4]))
            X_pen_pressure.append(float(feature_line[5]))
            X_slant_angle.append(float(feature_line[6]))
            page_ids.append(feature_line[7])

            y_t1.append(float(label_line[0]))
            y_t2.append(float(label_line[1]))
            y_t3.append(float(label_line[2]))
            y_t4.append(float(label_line[3]))
            y_t5.append(float(label_line[4]))
            y_t6.append(float(label_line[5]))
            y_t7.append(float(label_line[6]))
            y_t8.append(float(label_line[7]))

    # Create datasets for each trait
    X_t1 = list(zip(X_baseline_angle, X_slant_angle))
    X_t2 = list(zip(X_letter_size, X_pen_pressure))
    X_t3 = list(zip(X_letter_size, X_top_margin))
    X_t4 = list(zip(X_line_spacing, X_word_spacing))
    X_t5 = list(zip(X_slant_angle, X_top_margin))
    X_t6 = list(zip(X_letter_size, X_line_spacing))
    X_t7 = list(zip(X_letter_size, X_word_spacing))
    X_t8 = list(zip(X_line_spacing, X_word_spacing))

    classifiers = []
    datasets = [
        (X_t1, y_t1),
        (X_t2, y_t2),
        (X_t3, y_t3),
        (X_t4, y_t4),
        (X_t5, y_t5),
        (X_t6, y_t6),
        (X_t7, y_t7),
        (X_t8, y_t8),
    ]
    random_states = [8, 16, 32, 64, 42, 52, 21, 73]

    for i, (X, y) in enumerate(datasets):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=random_states[i])
        if len(set(y_train)) > 1:  # Ensure there is more than one class
            clf = SVC(kernel='rbf')
            clf.fit(X_train, y_train)
            accuracy = accuracy_score(clf.predict(X_test), y_test)
            print(f"Classifier {i+1} accuracy: {accuracy}")
            classifiers.append(clf)
        else:
            print(f"Classifier {i+1} was not trained due to insufficient class diversity in training data.")
            classifiers.append(None)  # Append None to maintain the correct index

    # Prediction loop
    while True:
        file_name = input("Enter file name to predict or 'z' to exit: ")
        if file_name == 'z':
            break

        # Construct the full path to the image file
        image_path = os.path.join(r'C:\Users\parth\OneDrive\Desktop\IBM_Work\Graphology\Handwriting_Analysis\images\images', file_name)
        image = cv2.imread(image_path)
        
        if image is None:
            print(f"Error: Image file '{file_name}' not found or could not be loaded.")
            continue

        raw_features = extract_v3.start(image_path)  # Use the correct path

        raw_baseline_angle = raw_features[0]
        baseline_angle, comment = categorize.determine_baseline_angle(raw_baseline_angle)
        print("Baseline Angle: " + comment)

        raw_top_margin = raw_features[1]
        top_margin, comment = categorize.determine_top_margin(raw_top_margin)
        print("Top Margin: " + comment)

        raw_letter_size = raw_features[2]
        letter_size, comment = categorize.determine_letter_size(raw_letter_size)
        print("Letter Size: " + comment)

        raw_line_spacing = raw_features[3]
        line_spacing, comment = categorize.determine_line_spacing(raw_line_spacing)
        print("Line Spacing: " + comment)

        raw_word_spacing = raw_features[4]
        word_spacing, comment = categorize.determine_word_spacing(raw_word_spacing)
        print("Word Spacing: " + comment)

        raw_pen_pressure = raw_features[5]
        pen_pressure, comment = categorize.determine_pen_pressure(raw_pen_pressure)
        print("Pen Pressure: " + comment)

        raw_slant_angle = raw_features[6]
        slant_angle, comment = categorize.determine_slant_angle(raw_slant_angle)
        print("Slant: " + comment)

        # Check if each classifier exists before attempting to use it
        if classifiers[0]:
            print("Emotional Stability: ", classifiers[0].predict([[baseline_angle, slant_angle]]))
        if classifiers[1]:
            print("Mental Energy or Will Power: ", classifiers[1].predict([[letter_size, pen_pressure]]))
        if classifiers[2]:
            print("Modesty: ", classifiers[2].predict([[letter_size, top_margin]]))
        if classifiers[3]:
            print("Personal Harmony and Flexibility: ", classifiers[3].predict([[line_spacing, word_spacing]]))
        if classifiers[4]:
            print("Lack of Discipline: ", classifiers[4].predict([[slant_angle, top_margin]]))
        if classifiers[5]:
            print("Poor Concentration: ", classifiers[5].predict([[letter_size, line_spacing]]))
        if classifiers[6]:
            print("Non Communicativeness: ", classifiers[6].predict([[letter_size, word_spacing]]))
        if classifiers[7]:
            print("Social Isolation: ", classifiers[7].predict([[line_spacing, word_spacing]]))
        
        print("---------------------------------------------------")
else:
    print("Error: label_list file not found.")
 

Info: label_list found.
Classifier 1 was not trained due to insufficient class diversity in training data.
Classifier 2 accuracy: 1.0
Classifier 3 accuracy: 1.0
Classifier 4 accuracy: 1.0
Classifier 5 accuracy: 1.0
Classifier 6 accuracy: 1.0
Classifier 7 accuracy: 1.0
Classifier 8 accuracy: 1.0


Enter file name to predict or 'z' to exit:  Canvas.png


Baseline Angle: DESCENDING
Top Margin: MEDIUM OR BIGGER
Letter Size: SMALL
Line Spacing: BIG
Word Spacing: BIG
Pen Pressure: HEAVY
Slant: EXTREMELY RECLINED
Mental Energy or Will Power:  [1.]
Modesty:  [1.]
Personal Harmony and Flexibility:  [0.]
Lack of Discipline:  [0.]
Poor Concentration:  [0.]
Non Communicativeness:  [1.]
Social Isolation:  [1.]
---------------------------------------------------


Enter file name to predict or 'z' to exit:  z


In [None]:
import os
import itertools
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import extract_v3
import categorize
import cv2

# Initialize lists for features and labels
X_baseline_angle = []
X_top_margin = []
X_letter_size = []
X_line_spacing = []
X_word_spacing = []
X_pen_pressure = []
X_slant_angle = []
y_t1 = []
y_t2 = []
y_t3 = []
y_t4 = []
y_t5 = []
y_t6 = []
y_t7 = []
y_t8 = []
page_ids = []

label_list_path = r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Graphology\Handwriting_Analysis\label_list"

if os.path.isfile(label_list_path):
    print("Info: label_list found.")
    with open(label_list_path, "r") as labels:
        lines = labels.readlines()
        for i in range(0, len(lines), 2):
            feature_line = lines[i].strip().split()
            label_line = lines[i+1].strip().split()
            
            if len(feature_line) != 8 or len(label_line) != 8:
                print(f"Skipping line pair due to incorrect format: {lines[i].strip()} and {lines[i+1].strip()}")
                continue

            X_baseline_angle.append(float(feature_line[0]))
            X_top_margin.append(float(feature_line[1]))
            X_letter_size.append(float(feature_line[2]))
            X_line_spacing.append(float(feature_line[3]))
            X_word_spacing.append(float(feature_line[4]))
            X_pen_pressure.append(float(feature_line[5]))
            X_slant_angle.append(float(feature_line[6]))
            page_ids.append(feature_line[7])

            y_t1.append(float(label_line[0]))
            y_t2.append(float(label_line[1]))
            y_t3.append(float(label_line[2]))
            y_t4.append(float(label_line[3]))
            y_t5.append(float(label_line[4]))
            y_t6.append(float(label_line[5]))
            y_t7.append(float(label_line[6]))
            y_t8.append(float(label_line[7]))

    # Create datasets for each trait
    X_t1 = list(zip(X_baseline_angle, X_slant_angle))
    X_t2 = list(zip(X_letter_size, X_pen_pressure))
    X_t3 = list(zip(X_letter_size, X_top_margin))
    X_t4 = list(zip(X_line_spacing, X_word_spacing))
    X_t5 = list(zip(X_slant_angle, X_top_margin))
    X_t6 = list(zip(X_letter_size, X_line_spacing))
    X_t7 = list(zip(X_letter_size, X_word_spacing))
    X_t8 = list(zip(X_line_spacing, X_word_spacing))

    classifiers = []
    datasets = [
        (X_t1, y_t1),
        (X_t2, y_t2),
        (X_t3, y_t3),
        (X_t4, y_t4),
        (X_t5, y_t5),
        (X_t6, y_t6),
        (X_t7, y_t7),
        (X_t8, y_t8),
    ]
    random_states = [8, 16, 32, 64, 42, 52, 21, 73]

    for i, (X, y) in enumerate(datasets):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=random_states[i])
        if len(set(y_train)) > 1:  # Ensure there is more than one class

            # Using RandomForestClassifier
            clf_rf = RandomForestClassifier(n_estimators=100, random_state=42)
            clf_rf.fit(X_train, y_train)
            accuracy_rf = accuracy_score(clf_rf.predict(X_test), y_test)
            print(f"RandomForestClassifier {i+1} accuracy: {accuracy_rf}")

            # Using DecisionTreeClassifier
            clf_dt = DecisionTreeClassifier(random_state=42)
            clf_dt.fit(X_train, y_train)
            accuracy_dt = accuracy_score(clf_dt.predict(X_test), y_test)
            print(f"DecisionTreeClassifier {i+1} accuracy: {accuracy_dt}")

            # Using KNeighborsClassifier
            clf_knn = KNeighborsClassifier(n_neighbors=3)
            clf_knn.fit(X_train, y_train)
            accuracy_knn = accuracy_score(clf_knn.predict(X_test), y_test)
            print(f"KNeighborsClassifier {i+1} accuracy: {accuracy_knn}")

            # Using LogisticRegression
            clf_lr = LogisticRegression(max_iter=1000, random_state=42)
            clf_lr.fit(X_train, y_train)
            accuracy_lr = accuracy_score(clf_lr.predict(X_test), y_test)
            print(f"LogisticRegression {i+1} accuracy: {accuracy_lr}")

            # Using GradientBoostingClassifier
            clf_gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
            clf_gb.fit(X_train, y_train)
            accuracy_gb = accuracy_score(clf_gb.predict(X_test), y_test)
            print(f"GradientBoostingClassifier {i+1} accuracy: {accuracy_gb}")

            # Using AdaBoostClassifier
            clf_ab = AdaBoostClassifier(n_estimators=100, random_state=42)
            clf_ab.fit(X_train, y_train)
            accuracy_ab = accuracy_score(clf_ab.predict(X_test), y_test)
            print(f"AdaBoostClassifier {i+1} accuracy: {accuracy_ab}")

            # Using GaussianNB
            clf_nb = GaussianNB()
            clf_nb.fit(X_train, y_train)
            accuracy_nb = accuracy_score(clf_nb.predict(X_test), y_test)
            print(f"GaussianNB {i+1} accuracy: {accuracy_nb}")

            classifiers.append((clf_rf, clf_dt, clf_knn, clf_lr, clf_gb, clf_ab, clf_nb))
        else:
            print(f"Classifier {i+1} was not trained due to insufficient class diversity in training data.")
            classifiers.append((None, None, None, None, None, None, None))  # Append None to maintain the correct index

    # Prediction loop
    while True:
        file_name = input("Enter file name to predict or 'z' to exit: ")
        if file_name == 'z':
            break

        # Construct the full path to the image file
        image_path = os.path.join(r'C:\Users\parth\Downloads\images\images', file_name)
        image = cv2.imread(image_path)
        
        if image is None:
            print(f"Error: Image file '{file_name}' not found or could not be loaded.")
            continue

        raw_features = extract_v3.start(image_path)  # Use the correct path

        raw_baseline_angle = raw_features[0]
        baseline_angle, comment = categorize.determine_baseline_angle(raw_baseline_angle)
        print("Baseline Angle: " + comment)

        raw_top_margin = raw_features[1]
        top_margin, comment = categorize.determine_top_margin(raw_top_margin)
        print("Top Margin: " + comment)

        raw_letter_size = raw_features[2]
        letter_size, comment = categorize.determine_letter_size(raw_letter_size)
        print("Letter Size: " + comment)

        raw_line_spacing = raw_features[3]
        line_spacing, comment = categorize.determine_line_spacing(raw_line_spacing)
        print("Line Spacing: " + comment)

        raw_word_spacing = raw_features[4]
        word_spacing, comment = categorize.determine_word_spacing(raw_word_spacing)
        print("Word Spacing: " + comment)

        raw_pen_pressure = raw_features[5]
        pen_pressure, comment = categorize.determine_pen_pressure(raw_pen_pressure)
        print("Pen Pressure: " + comment)

        raw_slant_angle = raw_features[6]
        slant_angle, comment = categorize.determine_slant_angle(raw_slant_angle)
        print("Slant: " + comment)

        # Check if each classifier exists before attempting to use it
        if classifiers[0][0]:
            print("Emotional Stability (RF): ", classifiers[0][0].predict([[baseline_angle, slant_angle]]))
        if classifiers[0][1]:
            print("Emotional Stability (DT): ", classifiers[0][1].predict([[baseline_angle, slant_angle]]))
        if classifiers[0][2]:
            print("Emotional Stability (KNN): ", classifiers[0][2].predict([[baseline_angle, slant_angle]]))
        if classifiers[0][3]:
            print("Emotional Stability (LR): ", classifiers[0][3].predict([[baseline_angle, slant_angle]]))
        if classifiers[0][4]:
            print("Emotional Stability (GB): ", classifiers[0][4].predict([[baseline_angle, slant_angle]]))
        if classifiers[0][5]:
            print("Emotional Stability (AB): ", classifiers[0][5].predict([[baseline_angle, slant_angle]]))
        if classifiers[0][6]:
            print("Emotional Stability (NB): ", classifiers[0][6].predict([[baseline_angle, slant_angle]]))

        if classifiers[1][0]:
            print("Mental Energy or Will Power (RF): ", classifiers[1][0].predict([[letter_size, pen_pressure]]))
        if classifiers[1][1]:
            print("Mental Energy or Will Power (DT): ", classifiers[1][1].predict([[letter_size, pen_pressure]]))
        if classifiers[1][2]:
            print("Mental Energy or Will Power (KNN): ", classifiers[1][2].predict([[letter_size, pen_pressure]]))
        if classifiers[1][3]:
            print("Mental Energy or Will Power (LR): ", classifiers[1][3].predict([[letter_size, pen_pressure]]))
        if classifiers[1][4]:
            print("Mental Energy or Will Power (GB): ", classifiers[1][4].predict([[letter_size, pen_pressure]]))
        if classifiers[1][5]:
            print("Mental Energy or Will Power (AB): ", classifiers[1][5].predict([[letter_size, pen_pressure]]))
        if classifiers[1][6]:
            print("Mental Energy or Will Power (NB): ", classifiers[1][6].predict([[letter_size, pen_pressure]]))

        if classifiers[2][0]:
            print("Modesty (RF): ", classifiers[2][0].predict([[letter_size, top_margin]]))
        if classifiers[2][1]:
            print("Modesty (DT): ", classifiers[2][1].predict([[letter_size, top_margin]]))
        if classifiers[2][2]:
            print("Modesty (KNN): ", classifiers[2][2].predict([[letter_size, top_margin]]))
        if classifiers[2][3]:
            print("Modesty (LR): ", classifiers[2][3].predict([[letter_size, top_margin]]))
        if classifiers[2][4]:
            print("Modesty (GB): ", classifiers[2][4].predict([[letter_size, top_margin]]))
        if classifiers[2][5]:
            print("Modesty (AB): ", classifiers[2][5].predict([[letter_size, top_margin]]))
        if classifiers[2][6]:
            print("Modesty (NB): ", classifiers[2][6].predict([[letter_size, top_margin]]))

        if classifiers[3][0]:
            print("Personal Harmony and Flexibility (RF): ", classifiers[3][0].predict([[line_spacing, word_spacing]]))
        if classifiers[3][1]:
            print("Personal Harmony and Flexibility (DT): ", classifiers[3][1].predict([[line_spacing, word_spacing]]))
        if classifiers[3][2]:
            print("Personal Harmony and Flexibility (KNN): ", classifiers[3][2].predict([[line_spacing, word_spacing]]))
        if classifiers[3][3]:
            print("Personal Harmony and Flexibility (LR): ", classifiers[3][3].predict([[line_spacing, word_spacing]]))
        if classifiers[3][4]:
            print("Personal Harmony and Flexibility (GB): ", classifiers[3][4].predict([[line_spacing, word_spacing]]))
        if classifiers[3][5]:
            print("Personal Harmony and Flexibility (AB): ", classifiers[3][5].predict([[line_spacing, word_spacing]]))
        if classifiers[3][6]:
            print("Personal Harmony and Flexibility (NB): ", classifiers[3][6].predict([[line_spacing, word_spacing]]))

        if classifiers[4][0]:
            print("Lack of Discipline (RF): ", classifiers[4][0].predict([[slant_angle, top_margin]]))
        if classifiers[4][1]:
            print("Lack of Discipline (DT): ", classifiers[4][1].predict([[slant_angle, top_margin]]))
        if classifiers[4][2]:
            print("Lack of Discipline (KNN): ", classifiers[4][2].predict([[slant_angle, top_margin]]))
        if classifiers[4][3]:
            print("Lack of Discipline (LR): ", classifiers[4][3].predict([[slant_angle, top_margin]]))
        if classifiers[4][4]:
            print("Lack of Discipline (GB): ", classifiers[4][4].predict([[slant_angle, top_margin]]))
        if classifiers[4][5]:
            print("Lack of Discipline (AB): ", classifiers[4][5].predict([[slant_angle, top_margin]]))
        if classifiers[4][6]:
            print("Lack of Discipline (NB): ", classifiers[4][6].predict([[slant_angle, top_margin]]))

        if classifiers[5][0]:
            print("Poor Concentration (RF): ", classifiers[5][0].predict([[letter_size, line_spacing]]))
        if classifiers[5][1]:
            print("Poor Concentration (DT): ", classifiers[5][1].predict([[letter_size, line_spacing]]))
        if classifiers[5][2]:
            print("Poor Concentration (KNN): ", classifiers[5][2].predict([[letter_size, line_spacing]]))
        if classifiers[5][3]:
            print("Poor Concentration (LR): ", classifiers[5][3].predict([[letter_size, line_spacing]]))
        if classifiers[5][4]:
            print("Poor Concentration (GB): ", classifiers[5][4].predict([[letter_size, line_spacing]]))
        if classifiers[5][5]:
            print("Poor Concentration (AB): ", classifiers[5][5].predict([[letter_size, line_spacing]]))
        if classifiers[5][6]:
            print("Poor Concentration (NB): ", classifiers[5][6].predict([[letter_size, line_spacing]]))

        if classifiers[6][0]:
            print("Non Communicativeness (RF): ", classifiers[6][0].predict([[letter_size, word_spacing]]))
        if classifiers[6][1]:
            print("Non Communicativeness (DT): ", classifiers[6][1].predict([[letter_size, word_spacing]]))
        if classifiers[6][2]:
            print("Non Communicativeness (KNN): ", classifiers[6][2].predict([[letter_size, word_spacing]]))
        if classifiers[6][3]:
            print("Non Communicativeness (LR): ", classifiers[6][3].predict([[letter_size, word_spacing]]))
        if classifiers[6][4]:
            print("Non Communicativeness (GB): ", classifiers[6][4].predict([[letter_size, word_spacing]]))
        if classifiers[6][5]:
            print("Non Communicativeness (AB): ", classifiers[6][5].predict([[letter_size, word_spacing]]))
        if classifiers[6][6]:
            print("Non Communicativeness (NB): ", classifiers[6][6].predict([[letter_size, word_spacing]]))

        if classifiers[7][0]:
            print("Social Isolation (RF): ", classifiers[7][0].predict([[line_spacing, word_spacing]]))
        if classifiers[7][1]:
            print("Social Isolation (DT): ", classifiers[7][1].predict([[line_spacing, word_spacing]]))
        if classifiers[7][2]:
            print("Social Isolation (KNN): ", classifiers[7][2].predict([[line_spacing, word_spacing]]))
        if classifiers[7][3]:
            print("Social Isolation (LR): ", classifiers[7][3].predict([[line_spacing, word_spacing]]))
        if classifiers[7][4]:
            print("Social Isolation (GB): ", classifiers[7][4].predict([[line_spacing, word_spacing]]))
        if classifiers[7][5]:
            print("Social Isolation (AB): ", classifiers[7][5].predict([[line_spacing, word_spacing]]))
        if classifiers[7][6]:
            print("Social Isolation (NB): ", classifiers[7][6].predict([[line_spacing, word_spacing]]))
        
        print("---------------------------------------------------")
else:
    print("Error: label_list file not found.")


Info: label_list found.
Classifier 1 was not trained due to insufficient class diversity in training data.
RandomForestClassifier 2 accuracy: 1.0
DecisionTreeClassifier 2 accuracy: 1.0
KNeighborsClassifier 2 accuracy: 1.0
LogisticRegression 2 accuracy: 0.9313725490196079
GradientBoostingClassifier 2 accuracy: 1.0
AdaBoostClassifier 2 accuracy: 1.0
GaussianNB 2 accuracy: 1.0
RandomForestClassifier 3 accuracy: 1.0
DecisionTreeClassifier 3 accuracy: 1.0
KNeighborsClassifier 3 accuracy: 1.0
LogisticRegression 3 accuracy: 1.0
GradientBoostingClassifier 3 accuracy: 1.0
AdaBoostClassifier 3 accuracy: 1.0
GaussianNB 3 accuracy: 1.0
RandomForestClassifier 4 accuracy: 1.0
DecisionTreeClassifier 4 accuracy: 1.0
KNeighborsClassifier 4 accuracy: 1.0
LogisticRegression 4 accuracy: 0.9901960784313726
GradientBoostingClassifier 4 accuracy: 1.0
AdaBoostClassifier 4 accuracy: 1.0
GaussianNB 4 accuracy: 1.0
RandomForestClassifier 5 accuracy: 1.0
DecisionTreeClassifier 5 accuracy: 1.0
KNeighborsClassifier

Enter file name to predict or 'z' to exit:  000-19.png



************************************************
Slant determined to be straight.
