# glaucoma disease prediction

In [36]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [37]:
df = pd.read_csv('glaucoma.csv')

In [38]:
import warnings
warnings.filterwarnings('ignore')

In [39]:
df.duplicated().sum()

0

In [40]:
df.isnull().sum()

Patient ID                                       0
Age                                              0
Gender                                           0
Visual Acuity Measurements                       0
Intraocular Pressure (IOP)                       0
Cup-to-Disc Ratio (CDR)                          0
Family History                                   0
Medical History                               2547
Medication Usage                              1231
Visual Field Test Results                        0
Optical Coherence Tomography (OCT) Results       0
Pachymetry                                       0
Cataract Status                                  0
Angle Closure Status                             0
Visual Symptoms                                  0
Diagnosis                                        0
Glaucoma Type                                    0
dtype: int64

In [41]:
df = df.drop(['Patient ID'], axis = 1)

In [42]:
object_columns = df.select_dtypes(include=['object','bool']).columns
print("Object type columns:")
print(object_columns)

numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns
print("\nNumerical type columns:")
print(numerical_columns)

Object type columns:
Index(['Gender', 'Visual Acuity Measurements', 'Family History',
       'Medical History', 'Medication Usage', 'Visual Field Test Results',
       'Optical Coherence Tomography (OCT) Results', 'Cataract Status',
       'Angle Closure Status', 'Visual Symptoms', 'Diagnosis',
       'Glaucoma Type'],
      dtype='object')

Numerical type columns:
Index(['Age', 'Intraocular Pressure (IOP)', 'Cup-to-Disc Ratio (CDR)',
       'Pachymetry'],
      dtype='object')


In [7]:
df.head()

Unnamed: 0,Patient ID,Age,Gender,Visual Acuity Measurements,Intraocular Pressure (IOP),Cup-to-Disc Ratio (CDR),Family History,Visual Field Test Results,Optical Coherence Tomography (OCT) Results,Pachymetry,Cataract Status,Angle Closure Status,Visual Symptoms,Diagnosis,Glaucoma Type
0,62431,69,Male,LogMAR 0.1,19.46,0.42,No,"Sensitivity: 0.54, Specificity: 0.75","RNFL Thickness: 86.48 µm, GCC Thickness: 64.14...",541.51,Present,Open,"Tunnel vision, Eye pain, Nausea",No Glaucoma,Primary Open-Angle Glaucoma
1,68125,69,Female,LogMAR 0.1,18.39,0.72,No,"Sensitivity: 0.72, Specificity: 0.88","RNFL Thickness: 96.88 µm, GCC Thickness: 56.48...",552.77,Absent,Open,"Redness in the eye, Vision loss, Tunnel vision",No Glaucoma,Juvenile Glaucoma
2,63329,67,Female,20/40,23.65,0.72,No,"Sensitivity: 0.56, Specificity: 0.8","RNFL Thickness: 89.81 µm, GCC Thickness: 59.05...",573.65,Absent,Closed,"Halos around lights, Vision loss, Redness in t...",No Glaucoma,Juvenile Glaucoma
3,47174,23,Male,LogMAR 0.0,18.04,0.61,No,"Sensitivity: 0.6, Specificity: 0.93","RNFL Thickness: 87.25 µm, GCC Thickness: 63.98...",590.67,Absent,Closed,"Nausea, Nausea, Halos around lights",No Glaucoma,Congenital Glaucoma
4,67361,21,Male,LogMAR 0.1,15.87,0.3,No,"Sensitivity: 0.82, Specificity: 0.9","RNFL Thickness: 82.61 µm, GCC Thickness: 66.01...",588.41,Absent,Closed,"Eye pain, Eye pain, Tunnel vision",No Glaucoma,Primary Open-Angle Glaucoma


In [9]:
df.columns


Index(['Patient ID', 'Age', 'Gender', 'Visual Acuity Measurements',
       'Intraocular Pressure (IOP)', 'Cup-to-Disc Ratio (CDR)',
       'Family History', 'Visual Field Test Results',
       'Optical Coherence Tomography (OCT) Results', 'Pachymetry',
       'Cataract Status', 'Angle Closure Status', 'Visual Symptoms',
       'Diagnosis', 'Glaucoma Type'],
      dtype='object')

In [43]:
def classify_features(df):
    categorical_features = []
    non_categorical_features = []
    discrete_features = []
    continuous_features = []

    for column in df.columns:
        if df[column].dtype in ['object','bool']:
            if df[column].nunique() < 30:
                categorical_features.append(column)
            else:
                non_categorical_features.append(column)
        elif df[column].dtype in ['int64', 'float64']:
            if df[column].nunique() < 30:
                discrete_features.append(column)
            else:
                continuous_features.append(column)

    return categorical_features, non_categorical_features, discrete_features, continuous_features

In [44]:
categorical, non_categorical, discrete, continuous = classify_features(df)

In [45]:
print("Categorical Features:", categorical)
print("Non-Categorical Features:", non_categorical)
print("Discrete Features:", discrete)
print("Continuous Features:", continuous)

Categorical Features: ['Gender', 'Visual Acuity Measurements', 'Family History', 'Medical History', 'Cataract Status', 'Angle Closure Status', 'Diagnosis', 'Glaucoma Type']
Non-Categorical Features: ['Medication Usage', 'Visual Field Test Results', 'Optical Coherence Tomography (OCT) Results', 'Visual Symptoms']
Discrete Features: []
Continuous Features: ['Age', 'Intraocular Pressure (IOP)', 'Cup-to-Disc Ratio (CDR)', 'Pachymetry']


In [46]:
df = df.drop(['Medication Usage', 'Visual Field Test Results', 'Optical Coherence Tomography (OCT) Results', 'Visual Symptoms'], axis = 1)

In [47]:
df = df.drop(['Medical History'], axis = 1)

In [48]:
categorical, non_categorical, discrete, continuous = classify_features(df)

In [49]:
print("Categorical Features:", categorical)
print("Non-Categorical Features:", non_categorical)
print("Discrete Features:", discrete)
print("Continuous Features:", continuous)

Categorical Features: ['Gender', 'Visual Acuity Measurements', 'Family History', 'Cataract Status', 'Angle Closure Status', 'Diagnosis', 'Glaucoma Type']
Non-Categorical Features: []
Discrete Features: []
Continuous Features: ['Age', 'Intraocular Pressure (IOP)', 'Cup-to-Disc Ratio (CDR)', 'Pachymetry']


In [50]:
categorical_cols = [
    'Gender',
    'Visual Acuity Measurements',
    'Family History',
    'Cataract Status',
    'Angle Closure Status',
    'Diagnosis'
]

# Create dummy variables
df = pd.get_dummies(df, columns=categorical_cols, drop_first=False)


In [51]:
df

Unnamed: 0,Age,Intraocular Pressure (IOP),Cup-to-Disc Ratio (CDR),Pachymetry,Glaucoma Type,Gender_Female,Gender_Male,Visual Acuity Measurements_20/20,Visual Acuity Measurements_20/40,Visual Acuity Measurements_LogMAR 0.0,Visual Acuity Measurements_LogMAR 0.1,Family History_No,Family History_Yes,Cataract Status_Absent,Cataract Status_Present,Angle Closure Status_Closed,Angle Closure Status_Open,Diagnosis_Glaucoma,Diagnosis_No Glaucoma
0,69,19.46,0.42,541.51,Primary Open-Angle Glaucoma,False,True,False,False,False,True,True,False,False,True,False,True,False,True
1,69,18.39,0.72,552.77,Juvenile Glaucoma,True,False,False,False,False,True,True,False,True,False,False,True,False,True
2,67,23.65,0.72,573.65,Juvenile Glaucoma,True,False,False,True,False,False,True,False,True,False,True,False,False,True
3,23,18.04,0.61,590.67,Congenital Glaucoma,False,True,False,False,True,False,True,False,True,False,True,False,False,True
4,21,15.87,0.30,588.41,Primary Open-Angle Glaucoma,False,True,False,False,False,True,True,False,True,False,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,81,22.83,0.49,576.53,Normal-Tension Glaucoma,True,False,False,False,False,True,True,False,True,False,False,True,True,False
9996,65,11.72,0.54,548.10,Normal-Tension Glaucoma,False,True,False,False,True,False,True,False,True,False,False,True,False,True
9997,56,10.67,0.56,513.74,Secondary Glaucoma,True,False,False,True,False,False,False,True,False,True,False,True,False,True
9998,26,23.37,0.74,585.03,Normal-Tension Glaucoma,False,True,False,False,False,True,True,False,True,False,True,False,True,False


In [52]:
df['Glaucoma Type'].value_counts()

Glaucoma Type
Juvenile Glaucoma              1734
Normal-Tension Glaucoma        1699
Primary Open-Angle Glaucoma    1668
Secondary Glaucoma             1657
Congenital Glaucoma            1641
Angle-Closure Glaucoma         1601
Name: count, dtype: int64

In [53]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from imblearn.over_sampling import RandomOverSampler

X = df.drop('Glaucoma Type', axis=1)
y = df['Glaucoma Type']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Before Oversampling:", y_train.value_counts())

Before Oversampling: Glaucoma Type
Juvenile Glaucoma              1387
Normal-Tension Glaucoma        1359
Primary Open-Angle Glaucoma    1334
Secondary Glaucoma             1326
Congenital Glaucoma            1313
Angle-Closure Glaucoma         1281
Name: count, dtype: int64


In [54]:
ros = RandomOverSampler(random_state=42)
X_train_res, y_train_res = ros.fit_resample(X_train, y_train)

print("After Oversampling:", y_train_res.value_counts())

After Oversampling: Glaucoma Type
Juvenile Glaucoma              1387
Primary Open-Angle Glaucoma    1387
Secondary Glaucoma             1387
Angle-Closure Glaucoma         1387
Congenital Glaucoma            1387
Normal-Tension Glaucoma        1387
Name: count, dtype: int64


In [57]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import LabelEncoder

In [59]:
le = LabelEncoder()
df['Glaucoma Type'] = le.fit_transform(df['Glaucoma Type'])

In [60]:
X = pd.get_dummies(df.drop('Glaucoma Type', axis=1), drop_first=True)
y = df['Glaucoma Type']

In [61]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [62]:
ros = RandomOverSampler(random_state=42)
X_train_res, y_train_res = ros.fit_resample(X_train, y_train)

In [63]:
log_reg_model = LogisticRegression(max_iter=1000, solver='liblinear')
log_reg_model.fit(X_train_res, y_train_res)

In [65]:
y_pred = log_reg_model.predict(X_test)

In [66]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.154

Confusion Matrix:
 [[51 48 15 79 62 65]
 [64 46 15 82 67 54]
 [60 59 27 78 64 59]
 [55 60 26 64 74 61]
 [46 64 21 75 67 61]
 [67 44 30 71 66 53]]

Classification Report:
               precision    recall  f1-score   support

           0       0.15      0.16      0.15       320
           1       0.14      0.14      0.14       328
           2       0.20      0.08      0.11       347
           3       0.14      0.19      0.16       340
           4       0.17      0.20      0.18       334
           5       0.15      0.16      0.15       331

    accuracy                           0.15      2000
   macro avg       0.16      0.15      0.15      2000
weighted avg       0.16      0.15      0.15      2000



# using svc

In [67]:
from sklearn import svm


In [68]:
svc = svm.SVC(kernel = 'linear')

In [69]:
svc.fit(X_train, y_train)

In [70]:
y_pred = svc.predict(X_test)

In [71]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.1435

Confusion Matrix:
 [[  8  28  79  86  56  63]
 [ 13  33  70  95  56  61]
 [ 11  44  72 101  65  54]
 [ 10  50  79  72  72  57]
 [ 11  46  85 100  51  41]
 [  7  45  92  78  58  51]]

Classification Report:
               precision    recall  f1-score   support

           0       0.13      0.03      0.04       320
           1       0.13      0.10      0.11       328
           2       0.15      0.21      0.17       347
           3       0.14      0.21      0.17       340
           4       0.14      0.15      0.15       334
           5       0.16      0.15      0.16       331

    accuracy                           0.14      2000
   macro avg       0.14      0.14      0.13      2000
weighted avg       0.14      0.14      0.13      2000

