In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score, auc, roc_curve
import pickle

class RFClassifier:
  def __init__(self,models_path,estimators_n,jobs_n,average='binary'):
    self.models_path = models_path
    self.rf_classifier = RandomForestClassifier(n_estimators=estimators_n,  n_jobs=jobs_n,random_state=42)
    self.average=average
  def multiple_training(self,estimators_n,jobs_n, x_train, y_train, x_test, y_test):
    test_score_RFC=[]
    RFCs=[]
    for n in estimators_n:
        clf = RandomForestClassifier(n_estimators= int(n), n_jobs= jobs_n,random_state=42)
        clf.fit(x_train, np.ravel(y_train))
        y_pred = clf.predict(x_test)
        scores = self.scores(np.ravel(y_test),np.ravel(y_pred))
        test_score_RFC.append(scores)  
        RFCs.append(clf)
    for neighbor, tr_sc in zip((estimators_n),test_score_RFC): 
        print(f"Estimator = {neighbor}")
        print('Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1: {:.4f}'.format(tr_sc[0], tr_sc[1] ,tr_sc[2], tr_sc[3]))
    return RFCs

  def tain(self, x_train, y_train):
    # Train a Random Forest classifier on the training set
    self.rf_classifier.fit(x_train, np.ravel(y_train))
     
  def predict(self, x_test):
    # Evaluate the performance of the classifier on the testing set
    y_pred = self.rf_classifier.predict(x_test)
    return y_pred

  def scores(self, y_test, y_pred):     
    prec = precision_score(y_test, y_pred,average=self.average)
    rec = recall_score(y_test, y_pred,average=self.average)
    f1 = f1_score(y_test, y_pred,average=self.average)
    accuracy = accuracy_score(y_test, y_pred)
    return [accuracy,prec,rec,f1]

  def print_scores(self,acc,prec,rec,f1):
    print("Accuracy: {}".format(acc))
    print("Precision: {}".format(prec))
    print("Recall: {}".format(rec))
    print("F1: {}".format(f1))
     
  def save_model(self, file_name):
    # save the model
    with open(self.models_path+file_name, 'wb') as f:
        pickle.dump(self.rf_classifier, f)

  def load_model(self,file_name):
    # Load the model                       
    f = open(self.models_path+file_name, 'rb')
    self.rf_classifier = pickle.load(f)   

    #with open(self.models_path+file_name) as f:
     #   self.rf_classifier = pickle.load(f)
    return self.rf_classifier

## Example

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import sys
sys.path.insert(0,"/content/drive/MyDrive/ITMO-Master's/Thesis/3rd_semester")

In [4]:
import DataClass 
from DataClass import Data
parquet_path = "/content/drive/MyDrive/ITMO-Master's/Thesis/3rd_semester/Data/parquet_files/"
# read prepare data
data = Data( 0, "", "",parquet_path)

In [5]:
from sklearn.model_selection import train_test_split
d=data.return_array_df(data.scaled_right_stances,'new_y',['r_swings_time','max_peak','fwhm_value'])
y = data.scaled_right_stances[[ 'y']]
X_train, X_test, y_train, y_test = train_test_split(d, y, random_state=42)

In [6]:
rfc = RFClassifier("/content/drive/MyDrive/ITMO-Master's/Thesis/3rd_semester/Models/",200,2)
rfc.tain(X_train,y_train)
y_pred=rfc.predict(X_test)
scores=rfc.scores(y_test, y_pred)

In [7]:
rfc.print_scores(scores[0],scores[1],scores[2],scores[3])

Accuracy: 0.9280559807566149
Precision: 0.920311563810665
Recall: 0.9799043062200957
F1: 0.9491734898810443


In [None]:
rfc.save_model("try")

In [None]:
rfcc = RFClassifier("/content/drive/MyDrive/ITMO-Master's/Thesis/3rd_semester/Models/",200,2)
rfcc.load_model("try")


In [None]:
y_pred=rfcc.predict(X_test)
scores=rfc.scores(y_test, y_pred)
rfc.print_scores(scores[0],scores[1],scores[2],scores[3])

Accuracy: 0.9280559807566149
Precision: 0.920311563810665
Recall: 0.9799043062200957
F1: 0.9491734898810443


In [None]:
rfc = RFClassifier("",200,2)
ccs=rfc.multiple_training([5, 10, 20, 40, 100, 200],2,X_train, y_train, X_test, y_test)

Estimator = 5
Accuracy: 0.8830, Precision: 0.8985, Recall: 0.9349, F1: 0.9164
Estimator = 10
Accuracy: 0.9042, Precision: 0.9207, Recall: 0.9413, F1: 0.9309
Estimator = 20
Accuracy: 0.9204, Precision: 0.9218, Recall: 0.9659, F1: 0.9433
Estimator = 40
Accuracy: 0.9211, Precision: 0.9198, Recall: 0.9694, F1: 0.9439
Estimator = 100
Accuracy: 0.9263, Precision: 0.9189, Recall: 0.9789, F1: 0.9480
Estimator = 200
Accuracy: 0.9281, Precision: 0.9203, Recall: 0.9799, F1: 0.9492
