In [52]:
from keras import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, classification_report

# Encode text values to dummy variables(i.e. [1,0,0],[0,1,0],[0,0,1] for red,green,blue)
def encode_text_dummy(df, name):
    dummies = pd.get_dummies(df[name])
    for x in dummies.columns:
        dummy_name = "{}-{}".format(name, x)
        df[dummy_name] = dummies[x]
    df.drop(name, axis=1, inplace=True)

# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df, name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_

# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
import collections
def to_xy(df, target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)
    # find out the type of the target column. 
    target_type = df[target].dtypes
    target_type = target_type[0] if isinstance(target_type, collections.abc.Sequence) else target_type
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        dummies = pd.get_dummies(df[target])
        return df[result].values.astype(np.float32), dummies.values.astype(np.float32)
    else:
        # Regression
        return df[result].values.astype(np.float32), df[target].values.astype(np.float32)

In [53]:
#Loading the dataset
data_df = pd.read_csv('data/admission_data_set.csv',header='infer')

In [54]:
#Dropping unecessary Columns
data_df = data_df.drop('SOP', axis=1)
data_df = data_df.drop('LOR', axis=1)
data_df = data_df.drop('CGPA', axis=1)

In [55]:
data_df

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,Research,Chance of Admit
0,1,337,118,4,1,0.92
1,2,324,107,4,1,0.76
2,3,316,104,3,1,0.72
3,4,322,110,3,1,0.80
4,5,314,103,2,0,0.65
...,...,...,...,...,...,...
495,496,332,108,5,1,0.87
496,497,337,117,5,1,0.96
497,498,330,120,5,1,0.93
498,499,312,103,4,0,0.73


In [56]:
#Loading the test data
testdata=data_df[0:100]

testdata

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,Research,Chance of Admit
0,1,337,118,4,1,0.92
1,2,324,107,4,1,0.76
2,3,316,104,3,1,0.72
3,4,322,110,3,1,0.80
4,5,314,103,2,0,0.65
...,...,...,...,...,...,...
95,96,304,100,4,0,0.42
96,97,306,100,2,0,0.48
97,98,331,120,3,1,0.86
98,99,332,119,4,1,0.90


In [57]:
#Encoding the data based on the Research Column
encode_text_index(testdata,'Research')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[name] = le.fit_transform(df[name])


array([0, 1], dtype=int64)

In [58]:
#Splitting the data by the Research Column
X,Y = to_xy(data_df,'Research')
testX,testY=to_xy(data_df,'Research')

In [59]:
#Generating the ANN model
model = Sequential()
model.add(Dense(12, input_dim = X.shape[1], activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam')
model.fit(X,Y,verbose=2, epochs=100)

Epoch 1/100


16/16 - 1s - loss: 29.8918 - 1s/epoch - 65ms/step
Epoch 2/100
16/16 - 0s - loss: 17.3643 - 36ms/epoch - 2ms/step
Epoch 3/100
16/16 - 0s - loss: 9.3011 - 43ms/epoch - 3ms/step
Epoch 4/100
16/16 - 0s - loss: 4.2254 - 34ms/epoch - 2ms/step
Epoch 5/100
16/16 - 0s - loss: 1.7390 - 33ms/epoch - 2ms/step
Epoch 6/100
16/16 - 0s - loss: 0.8819 - 33ms/epoch - 2ms/step
Epoch 7/100
16/16 - 0s - loss: 0.7283 - 35ms/epoch - 2ms/step
Epoch 8/100
16/16 - 0s - loss: 0.6925 - 36ms/epoch - 2ms/step
Epoch 9/100
16/16 - 0s - loss: 0.6842 - 35ms/epoch - 2ms/step
Epoch 10/100
16/16 - 0s - loss: 0.6842 - 36ms/epoch - 2ms/step
Epoch 11/100
16/16 - 0s - loss: 0.6854 - 34ms/epoch - 2ms/step
Epoch 12/100
16/16 - 0s - loss: 0.6854 - 35ms/epoch - 2ms/step
Epoch 13/100
16/16 - 0s - loss: 0.6843 - 80ms/epoch - 5ms/step
Epoch 14/100
16/16 - 0s - loss: 0.6837 - 41ms/epoch - 3ms/step
Epoch 15/100
16/16 - 0s - loss: 0.6853 - 41ms/epoch - 3ms/step
Epoch 16/100
16/16 - 0s - loss: 0.6859 - 32ms/epoch - 2ms/step
Epoch 17/100

<keras.src.callbacks.History at 0x1efb14e4a50>

In [60]:
pred = model.predict(testX)
print(pred[0])

[0.3582261  0.64177394]


In [61]:
pred = np.argmax(pred, axis=1)

In [62]:
true = np.argmax(testY, axis=1)

In [63]:
print('Accuracy on test data is %.2f' % (accuracy_score(true, pred)))

Accuracy on test data is 0.57


In [64]:
print(classification_report(true,pred))

              precision    recall  f1-score   support

           0       0.67      0.05      0.09       220
           1       0.57      0.98      0.72       280

    accuracy                           0.57       500
   macro avg       0.62      0.51      0.40       500
weighted avg       0.61      0.57      0.44       500

