In [1]:
from sklearn.naive_bayes import BernoulliNB, GaussianNB
import numpy as np
import pandas as pd
import my_globals
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, SpatialDropout1D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import L1L2
import tensorflow as tf
from keras.losses import SparseCategoricalCrossentropy
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from keras import layers

In [3]:
data = pd.read_csv('feature_space_1.csv', encoding='latin1')
y = data['target']
data = data.drop('target', axis=1)

In [9]:
x_train, x_test, y_train, y_test = train_test_split(data, y, test_size=0.2)

In [17]:
rfc=RandomForestClassifier()

param_grid = { 
    'n_estimators': [400, 600, 800],
    'max_features': ['sqrt', 'log2'],
    'max_depth' : [50, 150, 250]
}

CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=3, verbose=4)
CV_rfc.fit(x_train, y_train)

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 1/3] END max_depth=50, max_features=sqrt, n_estimators=400;, score=0.735 total time= 4.8min
[CV 2/3] END max_depth=50, max_features=sqrt, n_estimators=400;, score=0.736 total time= 5.2min
[CV 3/3] END max_depth=50, max_features=sqrt, n_estimators=400;, score=0.737 total time= 4.8min
[CV 1/3] END max_depth=50, max_features=sqrt, n_estimators=600;, score=0.735 total time= 7.1min
[CV 2/3] END max_depth=50, max_features=sqrt, n_estimators=600;, score=0.733 total time= 7.7min
[CV 3/3] END max_depth=50, max_features=sqrt, n_estimators=600;, score=0.737 total time= 7.8min
[CV 1/3] END max_depth=50, max_features=sqrt, n_estimators=800;, score=0.736 total time= 9.5min
[CV 2/3] END max_depth=50, max_features=sqrt, n_estimators=800;, score=0.734 total time= 9.5min
[CV 3/3] END max_depth=50, max_features=sqrt, n_estimators=800;, score=0.737 total time= 9.9min
[CV 1/3] END max_depth=50, max_features=log2, n_estimators=400;, score=0.75

In [18]:
rfc = CV_rfc.best_estimator_
print(rfc)
rfc.fit(x_train, y_train)
y_pred = rfc.predict(x_test)
print(accuracy_score(y_test, y_pred))

RandomForestClassifier(max_depth=250, max_features='log2', n_estimators=800)
0.77345


In [4]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2)

X_train_3d = np.reshape(X_train.values, (X_train.shape[0], X_train.shape[1], 1))
X_test_3d = np.reshape(X_test.values, (X_test.shape[0], X_test.shape[1], 1))

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)
y_train_full01 = to_categorical(y_train_encoded)
y_test01 = to_categorical(y_test_encoded)

In [13]:
network=Sequential()

act='relu'
# Input layer
network.add(layers.Dense(1280,activation=act,input_shape=(5010,)))
network.add(layers.Dense(640,activation=act))
network.add(layers.Dense(320,activation=act))
# Output layer
network.add(layers.Dense(2,activation='softmax',))
network.compile(optimizer=Adam(),loss='categorical_crossentropy',metrics=['accuracy'])

#Adding an early stopping
es = EarlyStopping(monitor='val_accuracy', 
                   mode='max', 
                   patience=5,
                   restore_best_weights=True)

history=network.fit(X_train, y_train_full01, epochs=30, batch_size=100, callbacks =[es], validation_data=(X_test, y_test01))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


In [5]:
model = Sequential()

model.add(LSTM(16, input_shape=(5010, 1), activation='relu', dropout=0.1, recurrent_dropout=0.1, return_sequences=True))
model.add(SpatialDropout1D(0.1))

model.add(LSTM(16, activation='relu', dropout=0.1, recurrent_dropout=0.1, return_sequences=True))

model.add(LSTM(16, activation='relu', dropout=0.1, recurrent_dropout=0.1))

model.add(Dense(2, activation='softmax'))

# Print model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5010, 16)          1152      
                                                                 
 spatial_dropout1d (SpatialD  (None, 5010, 16)         0         
 ropout1D)                                                       
                                                                 
 lstm_1 (LSTM)               (None, 5010, 16)          2112      
                                                                 
 lstm_2 (LSTM)               (None, 16)                2112      
                                                                 
 dense (Dense)               (None, 2)                 34        
                                                                 
Total params: 5,410
Trainable params: 5,410
Non-trainable params: 0
______________________________________________________

In [None]:
# Compile the model
model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Define early stopping callback
es = EarlyStopping(monitor='val_loss', mode='max', patience=3, restore_best_weights=True)

# Train the model with reduced batch size
model.fit(X_train_3d, y_train_full01, batch_size=1000, epochs=10, callbacks=[es], validation_data=(X_test_3d, y_test01))

Epoch 1/10
 1/80 [..............................] - ETA: 5:21:42 - loss: 0.6933 - accuracy: 0.4970