In [37]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler


In [38]:
cols=["fLength","fWidth","fSize","fConc","fConc1","fAsym","fM3Long","fM3Trans","fAlpha","fDist","class"]
df = pd.read_csv("magic04.data",names=cols)
df.head()

df["class"]=(df["class"]=="g").astype(int)

train, valid, test = np.split(df.sample(frac=1),[int(0.6*len(df)), int (0.8*len(df))])


  return bound(*args, **kwds)


In [39]:
def scale_dataset(dataframe,oversample=False):
    x=dataframe[dataframe.columns[:-1]].values
    y=dataframe[dataframe.columns[-1]].values

    scaler=StandardScaler()
    x=scaler.fit_transform(x)

    if oversample:
        ros=RandomOverSampler()
        x,y=ros.fit_resample(x,y)

    data=np.hstack((x,np.reshape(y,(-1,1))))

    return data, x, y

In [40]:
train, x_train, y_train = scale_dataset(train, oversample=True)
valid, x_valid, y_valid = scale_dataset(valid, oversample=False)
test, x_test, y_test = scale_dataset(test, oversample=False)

In [41]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

knn_model = KNeighborsClassifier(n_neighbors=10)
knn_model.fit(x_train,y_train)

y_pred = knn_model.predict(x_test)

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.72      0.77      0.75      1352
           1       0.87      0.84      0.85      2452

    accuracy                           0.81      3804
   macro avg       0.80      0.80      0.80      3804
weighted avg       0.82      0.81      0.81      3804



#Naive Bayes

In [42]:
from sklearn.naive_bayes import  GaussianNB

ng_model = GaussianNB()
nb_model = ng_model.fit(x_train,y_train)

y_pred = nb_model.predict(x_test)

print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.69      0.41      0.52      1352
           1       0.73      0.90      0.81      2452

    accuracy                           0.72      3804
   macro avg       0.71      0.65      0.66      3804
weighted avg       0.72      0.72      0.70      3804



In [43]:
#Log Regression

In [44]:
from sklearn.linear_model import LogisticRegression

In [45]:
lg_model = LogisticRegression()
lg_model= lg_model.fit(x_train,y_train)


In [46]:
y_pred= lg_model.predict(x_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.68      0.71      0.69      1352
           1       0.84      0.81      0.82      2452

    accuracy                           0.78      3804
   macro avg       0.76      0.76      0.76      3804
weighted avg       0.78      0.78      0.78      3804



#Support Vector Machines (SVM)

In [47]:
from sklearn.svm import SVC

In [33]:
svm_model = SVC()
svm_model = svm_model.fit(x_train,y_train)


In [34]:
y_pred = svm_model.predict(x_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.81      0.79      0.80      1331
           1       0.89      0.90      0.90      2473

    accuracy                           0.86      3804
   macro avg       0.85      0.85      0.85      3804
weighted avg       0.86      0.86      0.86      3804



#Neural Network
#TensorFLow

In [64]:
def plot_history(history):
    fig, (ax1, ax2)= plt.subplots(1,2, figsize=(10,4))
    ax1.plot(history.history['loss'],label='loss')
    ax1.plot(history.history['val_loss'],label='val_loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Binary Crossentropy')
    ax1.legend()
    ax1.grid(True)
    
    ax2.plot(history.history['accuracy'],label='accuracy')
    ax2.plot(history.history['val_accuracy'],label='val_accuracy')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    ax2.grid(True)

    plt.show()
    

In [65]:
import tensorflow as tf

def train_model(x_train,y_train,num_nodes,dropout_prob,lr,batch_size, epochs):
    nn_model = tf.keras.Sequential([
        tf.keras.layers.Dense(num_nodes, activation='relu', input_shape=(10,)),
        tf.keras.layers.Dropout(dropout_prob),
        tf.keras.layers.Dense(num_nodes, activation='relu'),
        tf.keras.layers.Dropout(dropout_prob),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    nn_model.compile(optimizer=tf.keras.optimizers.Adam(lr),loss= 'binary_crossentropy',metrics=['accuracy'])

    history= nn_model.fit(
        x_train,y_train,epochs=epochs,batch_size= batch_size, validation_split=0.2, verbose=0)

    return nn_model, history

In [None]:
least_val_loss = float('inf')
least_loss_model = None
epochs=100
for num_nodes in [16,32,64]:
    for dropout_prob in [0, 0.2]:
        for lr in [0.1, 0.005, 0.001]:
            for batch_size in [32,64,128]:
                print(f"{num_nodes} nodes, dropout {dropout_prob}, lr {lr}, batch_size {batch_size}")

                model, history = train_model(x_train,y_train, num_nodes,dropout_prob,lr,batch_size,epochs)

                plot_history(history)
                val_loss = model.evaluate(x_valid,y_valid)[0]

                if val_loss< least_val_loss:
                    least_val_loss = val_loss
                    least_loss_model = model
                     


#Linear Regression