In [3]:
import numpy as np
import pandas as pd
import sklearn

from numpy import genfromtxt
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, confusion_matrix, f1_score, precision_score, recall_score)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler


feature = genfromtxt('file_less.csv', delimiter=',', usecols=(i for i in range(1,1001)), dtype=int, skip_header=1)
target = genfromtxt('file_less.csv', delimiter=',', usecols=(0), dtype=int, skip_header=1)

print("Feature shape:", feature.shape)
print("Target shape:", target.shape)

labels = LabelEncoder().fit_transform(target)
feature_std = StandardScaler().fit_transform(feature)
x_train, x_test, y_train, y_test = train_test_split(feature_std, labels, test_size=0.25, random_state=0)

print("Training set shape:", x_train.shape)
print("Testing set shape:", x_test.shape)

def print_stats_metrics(y_test, y_pred):
    print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
    confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
    print("Confusion matrix:")
    print(confmat)
    print("\nCrosstab:")
    print(pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], margins=True))
    print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='binary'))
    print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred))
    print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred))

# Logistic Regression
print("\n======================== Logistic Regression ========================")
clfLog = LogisticRegression()
clfLog.fit(x_train, y_train)
predictions = clfLog.predict(x_test)
print_stats_metrics(y_test, predictions)

# Random Forest
print("\n======================== Random Forest ========================")
clfRandForest = RandomForestClassifier()
clfRandForest.fit(x_train, y_train)
predictions = clfRandForest.predict(x_test)
print_stats_metrics(y_test, predictions)

# Decision Tree
print("\n======================== Decision Tree ========================")
clfDT = DecisionTreeRegressor()
clfDT.fit(x_train, y_train)
predictions = clfDT.predict(x_test)
print_stats_metrics(y_test, predictions.round())

# Naive Bayes
print("\n======================== Naive Bayes ========================")
clfNB = GaussianNB()
clfNB.fit(x_train, y_train)
predictions = clfNB.predict(x_test)
print_stats_metrics(y_test, predictions)

import tensorflow as tf

def layer(input, weight_shape, bias_shape):
    weight_stddev = (2.0/weight_shape[0])**0.5
    w_init = tf.random_normal_initializer(stddev=weight_stddev)
    bias_init = tf.constant_initializer(value=0)
    W = tf.get_variable("W", weight_shape, initializer=w_init)
    b = tf.get_variable("b", bias_shape, initializer=bias_init)
    return tf.nn.relu(tf.matmul(input, W) + b)

def inference_deep_layers(x_tf, n_features, n_columns):
    with tf.variable_scope("hidden_1"):
        hidden_1 = layer(x_tf, [n_features, 30], [30])
    with tf.variable_scope("hidden_2"):
        hidden_2 = layer(hidden_1, [30, 25], [25])
    with tf.variable_scope("hidden_3"):
        hidden_3 = layer(hidden_2, [25, 10], [10])
    with tf.variable_scope("hidden_4"):
        hidden_4 = layer(hidden_3, [10, 5], [5])
    with tf.variable_scope("output"):
        output = layer(hidden_4, [5, n_columns], [n_columns])
    return output

def loss_deep(output, y_tf):
    xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y_tf)
    loss = tf.reduce_mean(xentropy)
    return loss

def training(cost, learning_rate):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.minimize(cost)
    return train_op

def evaluate(output, y_tf):
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y_tf, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    return accuracy

# Convert labels to one-hot encoding
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse=False)
y_train_onehot = onehot_encoder.fit_transform(y_train.reshape(-1, 1))
y_test_onehot = onehot_encoder.transform(y_test.reshape(-1, 1))

# Set parameters
learning_rate = 0.001
n_epochs = 5000
batch_size = 32

# Create placeholders
x_tf = tf.placeholder("float", [None, x_train.shape[1]])
y_tf = tf.placeholder("float", [None, y_train_onehot.shape[1]])

# Build the graph
output = inference_deep_layers(x_tf, x_train.shape[1], y_train_onehot.shape[1])
cost = loss_deep(output, y_tf)
train_op = training(cost, learning_rate)
eval_op = evaluate(output, y_tf)

# Initialize variables
init = tf.global_variables_initializer()

# Run the graph
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(n_epochs):
        avg_cost = 0.
        total_batch = int(len(x_train) / batch_size)
        
        for i in range(total_batch):
            batch_x = x_train[i*batch_size:(i+1)*batch_size]
            batch_y = y_train_onehot[i*batch_size:(i+1)*batch_size]
            _, c = sess.run([train_op, cost], feed_dict={x_tf: batch_x, y_tf: batch_y})
            avg_cost += c / total_batch
        
        if epoch % 500 == 0:
            train_acc = sess.run(eval_op, feed_dict={x_tf: x_train, y_tf: y_train_onehot})
            print(f"Epoch {epoch+1}/{n_epochs}, Cost: {avg_cost:.5f}, Train Acc: {train_acc:.3f}")
    
    print("Optimization Finished!")
    
    # Test the model
    test_acc = sess.run(eval_op, feed_dict={x_tf: x_test, y_tf: y_test_onehot})
    print(f"Test Accuracy: {test_acc:.3f}")
    
    # Get predictions
    predictions = sess.run(tf.argmax(output, 1), feed_dict={x_tf: x_test})
    print_stats_metrics(y_test, predictions)
    



FileNotFoundError: file_less.csv not found.

In [1]:
def print_stats_metrics(y_test, y_pred):
    print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
    confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
    print("Confusion matrix:")
    print(confmat)
    print("\nCrosstab:")
    print(pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], margins=True))
    print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='binary'))
    print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred))
    print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred))

In [None]:
import tensorflow as tf