In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tkinter as tk
import joblib
from tkinter import filedialog



ModuleNotFoundError: No module named 'tkinter'

In [None]:
# Create a Tkinter window
root = tk.Tk()
root.withdraw()  # Hide the root window

# Open a file dialog to select the CSV file
file_path = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])

# Load the data from CSV file
data = pd.read_csv(file_path)



In [None]:
# Convert device serial number to numerical values
le = LabelEncoder()
data['deviceSerialNumber'] = le.fit_transform(data['deviceSerialNumber'])

# Split the data into features and target
X = data[['deviceSerialNumber', 'beaconId', 'rssiCh37', 'rssiCh38', 'rssiCh39','seqNo']]
y = data['Block'].astype(int)  # Update target to be of type int

# Scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=72)



In [None]:
# Define the parameter grid for random search
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 4, 6],
    'min_samples_leaf': [1, 2, 3],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Create the random forest classifier
rf = RandomForestClassifier(random_state=42)

# Perform random search with cross-validation
random_search = RandomizedSearchCV(estimator=rf, param_distributions=param_grid, n_iter=10, cv=5, scoring='accuracy')
random_search.fit(X_train, y_train)


In [None]:

# Get the best parameters and best score
best_params = random_search.best_params_
best_score = random_search.best_score_

# Print the best parameters and best score
print('Best Parameters:', best_params)
print('Best Score:', best_score)



In [None]:
# Train the model with the best parameters
best_rf = RandomForestClassifier(**best_params)
best_rf.fit(X_train, y_train)

# Evaluate the model
y_pred = best_rf.predict(X_test)



In [None]:
# Calculate classification metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print the evaluation metrics
print('Accuracy:', accuracy * 100)
print('Precision:', precision * 100)
print('Recall:', recall * 100)
print('F1-score:', f1 * 100)


In [None]:
# Save the model to a file
joblib.dump(rf, 'randomforest_classifier.pkl')