In [33]:
from imblearn.over_sampling import RandomOverSampler
import pandas as pd

# Read the CSV file into a DataFrame
df = pd.read_csv('Creditcard_data.csv')

# Separate the feature matrix X and the target variable y
X = df.drop(columns=['Class'])
y = df['Class']

# Apply random oversampling to balance the classes
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

# Create a new DataFrame with the resampled data
resampled_df = pd.DataFrame(X_resampled, columns=X.columns)
resampled_df['Class'] = y_resampled

# Write the resampled DataFrame to a new CSV file
resampled_df.to_csv('balanced_dataf.csv', index=False)


In [35]:
from sklearn.model_selection import train_test_split
import math
data = pd.read_csv('balanced_dataf.csv')

X1 = data.drop('Class', axis=1)
y1 = data['Class']
N = len(data)
p = 0.5
c = 0.95  # desired confidence level
e = 0.05  # desired margin of error

z = 1.96  # z-score for 95% confidence level
n = math.ceil((z**2 * p * (1-p)) / e**2)

# Set the sample size
sample_size = n  # Set the desired sample size
X_sampled = X1.sample(n=sample_size, random_state=0)
y_sampled = y1[X_sampled.index]  # Match the sampled output variable with the sampled input variables
    
# Combine the sampled input and output variables into a single DataFrame
sampled_df = pd.concat([X_sampled, y_sampled], axis=1)
    
# Save the sampled DataFrame to a CSV file
sampled_df.to_csv(f'simple_random_dataset.csv', index=False)

Required sample size: 384


In [36]:
import pandas as pd
import numpy as np
import math

# Read the CSV file into a DataFrame
df = pd.read_csv('balanced_dataf.csv')

# Separate the feature matrix X and the target variable y
X = df.drop(columns=['Class'])
y = df['Class']

# Determine the number of strata (in this case, we use a binary target variable, so there are two strata)
num_strata = 2

# Initialize an empty list to store the stratified samples
samples = []

# Loop over each stratum
for i in range(num_strata):
    # Subset the data to include only the observations in the current stratum
    stratum_data = df[df['Class'] == i]
    
    # Calculate the sample size for the current stratum
    stratum_size = len(stratum_data)
    population_size = len(df)
    desired_margin_of_error = 0.05
    confidence_level = 0.95
    z_score = 1.96  # for a 95% confidence level
    p = stratum_size / population_size
    q = 1 - p
    n = (z_score**2 * p * q * population_size) / ((z_score**2 * p * q) + (desired_margin_of_error**2 * (population_size-1)))
    n = math.ceil(n)
    
    # If the calculated sample size for the current stratum is greater than the number of observations in the stratum, set the sample size to the number of observations
    if n > stratum_size:
        n = stratum_size
    
    # Randomly select observations from the current stratum to include in the sample
    sample_indices = np.random.choice(stratum_data.index, size=n, replace=False)
    stratum_sample = stratum_data.loc[sample_indices]
    
    # Add the current stratum sample to the list of stratified samples
    samples.append(stratum_sample)

# Combine the stratified samples into a single DataFrame
stratified_sample = pd.concat(samples)

# Write the stratified sample to a new CSV file
stratified_sample.to_csv('stratified_sample.csv', index=False)


In [48]:
import pandas as pd
import numpy as np
import math
from sklearn.cluster import KMeans

# Load the data
data = pd.read_csv('balanced_dataf.csv')

# Define the cluster size
C = 100

# Calculate the number of clusters
n_clusters = math.ceil(len(data)/C)

# Create a KMeans object with the calculated number of clusters
kmeans = KMeans(n_clusters=n_clusters)

# Fit the KMeans object to the data
kmeans.fit(data.drop('Class', axis=1))

# Add the cluster labels to the data
data['Cluster'] = kmeans.labels_

# Calculate the sample size
z = 1.96
p = 0.5
e = 0.05
s = 1
N = len(data)
C = 2
n = math.ceil((z**2 * p * (1-p) * (N/C)) / ((e**2) + ((z**2 * p * (1-p))/(C-1))))

# Create an empty dataframe to hold the sample
sample = pd.DataFrame(columns=data.columns)

# Loop through each cluster
for i in range(n_clusters):
    # Get the data for the current cluster
    cluster_data = data[data['Cluster'] == i]
    cluster_size = len(cluster_data)
    cluster_sample_size = math.ceil((cluster_size/N)*n)
    if cluster_sample_size > cluster_size:
         cluster_sample_size = cluster_size
        
    # Sample from the current cluster
    cluster_sample = cluster_data.sample(n=cluster_sample_size, replace=False)
    
    # Add the cluster sample to the overall sample dataframe
    sample = pd.concat([sample, cluster_sample])

# Remove the cluster column from the sample
sample = sample.drop('Cluster', axis=1)

# Save the sample to a CSV file
sample.to_csv('cluster_sample_dataset.csv', index=False)

    
    # If the cluster sample size is larger than the cluster size, set it to the cluster size



In [49]:
import pandas as pd
import math

# Load the dataset "Creditcard_data" into a Pandas dataframe
df = pd.read_csv('balanced_dataf.csv')

# Calculate the number of rows in the dataset
n = len(df)

# Set the sampling interval "k" as the square root of the number of rows in the dataset
k = int(math.sqrt(n))

# Select every "k" row starting from a random index in the dataset
sample = df.iloc[::k]

# Print the first few rows of the sample
sample.to_csv('systematic_dataset.csv', index=False)


In [120]:
#convinence sampling
import csv
import random

# Open the CSV file for reading
with open('balanced_dataf.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    # Skip the header row
    next(reader)
    # Create a list of all the data rows
    data_rows = list(reader)

# Choose a sample size of 100
sample_size = 100

# Use convenience sampling to randomly select the desired number of rows
sample = random.sample(data_rows, sample_size)

s=pd.DataFrame(sample)
s.to_csv('convinence_dataset.csv', header=['Time','V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14','V15','V16','V17','V18','V19','V20','V21','V22','V23','V24','V25','V26','V27','V28','Amount','Class'],index=False)



In [125]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# load the CSV file into a pandas dataframe
data = pd.read_csv('simple_random_dataset.csv')
list1=[]
# split the data into training and testing sets
train, test = train_test_split(data, test_size=0.3)

# create a random forest classifier model and fit it to the training data
model = RandomForestClassifier(n_estimators=100)
model.fit(train.drop('Class', axis=1), train['Class'])

# predict the output values for the testing data
predictions = model.predict(test.drop('Class', axis=1))

# evaluate the accuracy of the model
accuracy = accuracy_score(test['Class'], predictions)
print('Accuracy:', accuracy)
list1.append(accuracy)


Accuracy: 0.9913793103448276


In [126]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# load the CSV file into a pandas dataframe
data = pd.read_csv('stratified_sample.csv')
# split the data into training and testing sets
train, test = train_test_split(data, test_size=0.3)

# create a random forest classifier model and fit it to the training data
model = RandomForestClassifier(n_estimators=100)
model.fit(train.drop('Class', axis=1), train['Class'])

# predict the output values for the testing data
predictions = model.predict(test.drop('Class', axis=1))

# evaluate the accuracy of the model
accuracy = accuracy_score(test['Class'], predictions)
print('Accuracy:', accuracy)
list1.append(accuracy)


Accuracy: 0.9891891891891892


In [127]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# load the CSV file into a pandas dataframe
data = pd.read_csv('systematic_dataset.csv')
# split the data into training and testing sets
train, test = train_test_split(data, test_size=0.3)

# create a random forest classifier model and fit it to the training data
model = RandomForestClassifier(n_estimators=100)
model.fit(train.drop('Class', axis=1), train['Class'])

# predict the output values for the testing data
predictions = model.predict(test.drop('Class', axis=1))

# evaluate the accuracy of the model
accuracy = accuracy_score(test['Class'], predictions)
print('Accuracy:', accuracy)
list1.append(accuracy)


Accuracy: 0.9166666666666666


In [147]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# load the CSV file into a pandas dataframe
data = pd.read_csv('cluster_sample_dataset.csv')
# split the data into training and testing sets
train, test = train_test_split(data, test_size=0.3)

# create a random forest classifier model and fit it to the training data
model = RandomForestClassifier(n_estimators=100)
model.fit(train.drop('Class', axis=1), train['Class'])

# predict the output values for the testing data
predictions = model.predict(test.drop('Class', axis=1))

# evaluate the accuracy of the model
accuracy = accuracy_score(test['Class'], predictions)
print('Accuracy:', accuracy)
list1.append(accuracy)


Accuracy: 0.9956709956709957


In [152]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# load the CSV file into a pandas dataframe
data = pd.read_csv('convinence_dataset.csv')
# split the data into training and testing sets
train, test = train_test_split(data, test_size=0.3)

# create a random forest classifier model and fit it to the training data
model = RandomForestClassifier(n_estimators=100)
model.fit(train.drop('Class', axis=1), train['Class'])

# predict the output values for the testing data
predictions = model.predict(test.drop('Class', axis=1))

# evaluate the accuracy of the model
accuracy = accuracy_score(test['Class'], predictions)
print('Accuracy:', accuracy)
list1.append(accuracy)


Accuracy: 0.9666666666666667


In [154]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('simple_random_dataset.csv')
list2=[]
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list2.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.948051948051948


In [155]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('stratified_sample.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list2.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.9435483870967742


In [156]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('systematic_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list2.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.5


In [157]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('cluster_sample_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list2.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.961038961038961


In [158]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('convinence_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier with k=5
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list2.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.75


In [160]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('simple_random_dataset.csv')
list3=[]
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Train the model on the training data
svm.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list3.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.935064935064935


In [161]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('stratified_sample.csv')
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Train the model on the training data
svm.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list3.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.9354838709677419


In [162]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('systematic_dataset.csv')
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Train the model on the training data
svm.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list3.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.75


In [163]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('cluster_sample_dataset.csv')
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Train the model on the training data
svm.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list3.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.9090909090909091


In [164]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('convinence_dataset.csv')
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM classifier with a linear kernel
svm = SVC(kernel='linear')

# Train the model on the training data
svm.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = svm.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list3.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.85


In [165]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('simple_random_dataset.csv')
list4=[]
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naive Bayes classifier
nb = GaussianNB()

# Train the model on the training data
nb.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list4.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.7792207792207793


In [166]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('stratified_sample.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naive Bayes classifier
nb = GaussianNB()

# Train the model on the training data
nb.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list4.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.6935483870967742


In [167]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('systematic_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naive Bayes classifier
nb = GaussianNB()

# Train the model on the training data
nb.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list4.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.875


In [168]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('cluster_sample_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naive Bayes classifier
nb = GaussianNB()

# Train the model on the training data
nb.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list4.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.7532467532467533


In [170]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('convinence_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naive Bayes classifier
nb = GaussianNB()

# Train the model on the training data
nb.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list4.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.7


In [171]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('simple_random_dataset.csv')
list5=[]
# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree classifier
dt = DecisionTreeClassifier()

# Train the model on the training data
dt.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = dt.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list5.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.987012987012987


In [172]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('stratified_sample.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree classifier
dt = DecisionTreeClassifier()

# Train the model on the training data
dt.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = dt.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list5.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.9919354838709677


In [173]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('systematic_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree classifier
dt = DecisionTreeClassifier()

# Train the model on the training data
dt.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = dt.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list5.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.875


In [176]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('cluster_sample_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree classifier
dt = DecisionTreeClassifier()

# Train the model on the training data
dt.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = dt.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list5.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.9935064935064936


In [177]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the data from a CSV file into a Pandas dataframe
data = pd.read_csv('convinence_dataset.csv')

# Separate the features (X) from the target variable (y)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Decision Tree classifier
dt = DecisionTreeClassifier()

# Train the model on the training data
dt.fit(X_train, y_train)

# Use the model to make predictions on the testing data
y_pred = dt.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
list5.append(accuracy)
print("Accuracy:", accuracy)


Accuracy: 0.85


In [191]:
df = pd.DataFrame([list1,list2,list3,list4,list5],columns=['Random sampling','Stratified sampling','Systematic sampling','Cluster sampling','Convenience sampling'],index=['Random forest','K-nearest neighour(KNN)','support vector machine(SVM)','Naive bayes','Decision Tree'])




In [192]:
df



Unnamed: 0,Random sampling,Stratified sampling,Systematic sampling,Cluster sampling,Convenience sampling
Random forest,0.991379,0.989189,0.916667,0.995671,0.966667
K-nearest neighour(KNN),0.948052,0.943548,0.5,0.961039,0.75
support vector machine(SVM),0.935065,0.935484,0.75,0.909091,0.85
Naive bayes,0.779221,0.693548,0.875,0.753247,0.7
Decision Tree,0.987013,0.991935,0.875,0.993506,0.85


In [194]:
def highlight_max(data):
    is_max = data == np.nanmax(df.values)
    return ['background-color: red' if v else '' for v in is_max]

# Apply the custom function to the entire dataframe and display the styled dataframe
df.style.apply(highlight_max)

Unnamed: 0,Random sampling,Stratified sampling,Systematic sampling,Cluster sampling,Convenience sampling
Random forest,0.991379,0.989189,0.916667,0.995671,0.966667
K-nearest neighour(KNN),0.948052,0.943548,0.5,0.961039,0.75
support vector machine(SVM),0.935065,0.935484,0.75,0.909091,0.85
Naive bayes,0.779221,0.693548,0.875,0.753247,0.7
Decision Tree,0.987013,0.991935,0.875,0.993506,0.85


In [195]:
df.to_csv('finaltable.csv',index=False)