In [54]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split 
import antropy as ant
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report


In [55]:
#Load data set
df = pd.read_csv("synthetic_dataset.csv")

In [56]:
df

Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,...,feature_92,feature_93,feature_94,feature_95,feature_96,feature_97,feature_98,feature_99,feature_100,target
0,0.496714,-0.138264,0.647689,1.523030,-0.234153,-0.234137,1.579213,0.767435,-0.469474,0.542560,...,0.968645,-0.702053,-0.327662,-0.392108,-1.463515,0.296120,0.261055,0.005113,-0.234587,0
1,-1.415371,-0.420645,-0.342715,-0.802277,-0.161286,0.404051,1.886186,0.174578,0.257550,-0.074446,...,0.856399,0.214094,-1.245739,0.173181,0.385317,-0.883857,0.153725,0.058209,-1.142970,1
2,0.357787,0.560785,1.083051,1.053802,-1.377669,-0.937825,0.515035,0.513786,0.515048,3.852731,...,-0.493001,-0.589365,0.849602,0.357015,-0.692910,0.899600,0.307300,0.812862,0.629629,0
3,-0.828995,-0.560181,0.747294,0.610370,-0.020902,0.117327,1.277665,-0.591571,0.547097,-0.202193,...,0.491919,-1.320233,1.831459,1.179440,-0.469176,-1.713135,1.353872,-0.114540,1.237816,1
4,-1.594428,-0.599375,0.005244,0.046981,-0.450065,0.622850,-1.067620,-0.142379,0.120296,0.514439,...,1.479944,0.077368,-0.861284,1.523124,0.538910,-1.037246,-0.190339,-0.875618,-1.382800,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1.546130,-0.341922,0.935659,0.524541,-1.962045,1.120674,-0.476735,0.431624,-0.488302,-1.532980,...,0.828913,0.857793,0.308192,0.689349,0.253375,0.580158,-0.057649,-0.738169,-0.059192,1
996,-0.602636,0.311556,-1.054501,0.009414,1.605308,2.025179,0.959833,1.238023,-0.871019,-0.575881,...,0.394662,0.417811,-1.562289,0.052684,0.744491,-0.211470,-0.861818,-0.115021,0.067493,1
997,-1.393845,-0.549546,0.717000,0.399731,0.284135,1.534209,0.150712,0.145437,1.170655,-0.014066,...,0.806683,-2.122863,0.837365,-0.196445,-1.655225,-0.607038,0.979526,0.004064,-0.646857,0
998,0.100546,-0.141531,-1.206888,-1.230257,0.208568,0.630480,-0.406529,0.242025,-0.780645,1.871417,...,1.222265,-1.459383,0.345770,-0.211672,-1.762003,-0.068708,-0.389929,-0.463581,1.605959,1


In [57]:
X = df.drop(columns = ["target"])
y = df["target"]

In [58]:
y

0      0
1      1
2      0
3      1
4      0
      ..
995    1
996    1
997    0
998    1
999    1
Name: target, Length: 1000, dtype: int64

In [59]:
def compute_nonlinear_features(row):
    row = np.asarray(row, dtype=np.float64).flatten()  # Ensure row is a proper 1D array

    if row.ndim != 1 or len(row) < 2:  # Validate shape
        raise ValueError(f"Invalid input to entropy functions: {row.shape}")

    entropy = ant.sample_entropy(row)
    perm_entropy = ant.perm_entropy(row, normalize=True)
    higuchi_fd = ant.higuchi_fd(row)
    petrosian_fd = ant.petrosian_fd(row)  # Corrected function call

    return [entropy, perm_entropy, higuchi_fd, petrosian_fd]

In [60]:
X_nonlinear = np.apply_along_axis(compute_nonlinear_features, axis=1, arr=X.values)


In [61]:
#convert to dataframe
nonlinear_features_df=pd.DataFrame(X_nonlinear,columns=["Entropy","Perm_Entrpy","Higuchi_FD","Petrosian_FD"])

In [62]:
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report 

In [63]:
X_train , X_test, y_train , y_test = train_test_split(nonlinear_features_df, y , test_size= 0.2 , random_state= 42)

In [64]:
X_train = np.nan_to_num(X_train, nan=0.0, posinf=np.max(X_train[np.isfinite(X_train)]), neginf=np.min(X_train[np.isfinite(X_train)]))
X_test = np.nan_to_num(X_test, nan=0.0, posinf=np.max(X_train[np.isfinite(X_train)]), neginf=np.min(X_train[np.isfinite(X_train)]))


In [65]:
print("Checking for NaN values:", np.isnan(X_train).sum())
print("Checking for Inf values:", np.isinf(X_train).sum())

Checking for NaN values: 0
Checking for Inf values: 0


In [75]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Scaling
    ('svm', SVC(kernel='sigmoid', C=1.0))  # Step 2: SVM classifier    #rbf
])

In [76]:
pipeline.fit(X_train, y_train)


In [77]:
# Make predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, output_dict=True)

print("Model Accuracy:", accuracy)
print("Classification Report:", classification_rep)

Model Accuracy: 0.485
Classification Report: {'0': {'precision': 0.4909090909090909, 'recall': 0.5346534653465347, 'f1-score': 0.5118483412322274, 'support': 101.0}, '1': {'precision': 0.4777777777777778, 'recall': 0.43434343434343436, 'f1-score': 0.455026455026455, 'support': 99.0}, 'accuracy': 0.485, 'macro avg': {'precision': 0.48434343434343435, 'recall': 0.4844984498449845, 'f1-score': 0.4834373981293412, 'support': 200.0}, 'weighted avg': {'precision': 0.4844090909090909, 'recall': 0.485, 'f1-score': 0.4837215075603701, 'support': 200.0}}


In [78]:
import ace_tools as tools

ModuleNotFoundError: No module named 'ace_tools'

In [71]:
#Display result
