## 	Association Rule

### a.List all frequent (large) TWO itemsets, together with their support

In [None]:
from itertools import combinations

# Define the transactions from the database
transactions = [
    ['B', 'E'],
    ['E', 'F'],
    ['A', 'B', 'C', 'F'],
    ['B', 'E', 'F'],
    ['C', 'D', 'E', 'F']
]

# Define the minimum support threshold
min_support_threshold = 0.4

# Calculate total number of transactions
total_transactions = len(transactions)

# Generate all possible two-itemsets
itemsets = {}
for transaction in transactions:
    for itemset in combinations(sorted(transaction), 2):
        itemsets[itemset] = itemsets.get(itemset, 0) + 1

# Filter itemsets by minimum support threshold
frequent_itemsets = {itemset: count for itemset, count in itemsets.items() if count/total_transactions >= min_support_threshold}

# Print the frequent itemsets and their support
for itemset, count in frequent_itemsets.items():
    print(f"Itemset: {itemset}, Support: {count/total_transactions:.2f}")

# If you want to see the support as a percentage of transactions, multiply by 100
print("\nFrequent two-itemsets with their support percentage:")
for itemset, count in frequent_itemsets.items():
    print(f"Itemset: {itemset}, Support: {count/total_transactions * 100:.2f}%")

Itemset: ('B', 'E'), Support: 0.40
Itemset: ('E', 'F'), Support: 0.60
Itemset: ('B', 'F'), Support: 0.40
Itemset: ('C', 'F'), Support: 0.40

Frequent two-itemsets with their support percentage:
Itemset: ('B', 'E'), Support: 40.00%
Itemset: ('E', 'F'), Support: 60.00%
Itemset: ('B', 'F'), Support: 40.00%
Itemset: ('C', 'F'), Support: 40.00%


### b. For all frequent itemsets of maximal length, list all corresponding association rules satisfying the requirements on (minimum support and) minimum confidence, together with their confidence.

In [None]:
from itertools import combinations

# Define the minimum confidence threshold
min_confidence_threshold = 0.6

# Find the maximal length of the itemsets
maximal_length = max(len(transaction) for transaction in transactions)

# Find all itemsets of maximal length that are frequent
frequent_maximal_itemsets = []
for transaction in transactions:
    if len(transaction) == maximal_length:
        frequent_maximal_itemsets.append(transaction)

# Generate all non-empty subsets for each maximal itemset
rules = []
for itemset in frequent_maximal_itemsets:
    for i in range(1, len(itemset)):
        for subset in combinations(itemset, i):
            remaining = set(itemset) - set(subset)
            if remaining:
                rules.append((subset, tuple(remaining)))

# Function to calculate support
def support(itemset, transactions):
    count = sum(1 for transaction in transactions if set(itemset).issubset(transaction))
    return count / len(transactions)

# Function to calculate confidence
def confidence(rule, transactions):
    return support(rule[0] + rule[1], transactions) / support(rule[0], transactions)

# Filter rules by minimum confidence threshold and print them
print("Association rules with their confidence:")
for rule in rules:
    conf = confidence(rule, transactions)
    if conf >= min_confidence_threshold:
        print(f"Rule: {rule[0]} -> {rule[1]}, Confidence: {conf:.2f}")

# If you want to see the confidence as a percentage, you can multiply by 100
print("\nAssociation rules with their confidence percentage:")
for rule in rules:
    conf = confidence(rule, transactions)
    if conf >= min_confidence_threshold:
        print(f"Rule: {rule[0]} -> {rule[1]}, Confidence: {conf * 100:.2f}%")


Association rules with their confidence:
Rule: ('A',) -> ('F', 'C', 'B'), Confidence: 1.00
Rule: ('A', 'B') -> ('F', 'C'), Confidence: 1.00
Rule: ('A', 'C') -> ('F', 'B'), Confidence: 1.00
Rule: ('A', 'F') -> ('C', 'B'), Confidence: 1.00
Rule: ('B', 'C') -> ('F', 'A'), Confidence: 1.00
Rule: ('A', 'B', 'C') -> ('F',), Confidence: 1.00
Rule: ('A', 'B', 'F') -> ('C',), Confidence: 1.00
Rule: ('A', 'C', 'F') -> ('B',), Confidence: 1.00
Rule: ('B', 'C', 'F') -> ('A',), Confidence: 1.00
Rule: ('D',) -> ('E', 'F', 'C'), Confidence: 1.00
Rule: ('C', 'D') -> ('E', 'F'), Confidence: 1.00
Rule: ('C', 'E') -> ('D', 'F'), Confidence: 1.00
Rule: ('D', 'E') -> ('F', 'C'), Confidence: 1.00
Rule: ('D', 'F') -> ('E', 'C'), Confidence: 1.00
Rule: ('C', 'D', 'E') -> ('F',), Confidence: 1.00
Rule: ('C', 'D', 'F') -> ('E',), Confidence: 1.00
Rule: ('C', 'E', 'F') -> ('D',), Confidence: 1.00
Rule: ('D', 'E', 'F') -> ('C',), Confidence: 1.00

Association rules with their confidence percentage:
Rule: ('A',) -

### c.	Compute the lift for the association rule  A -> B and C -> F

In [None]:
from itertools import combinations

# Function to calculate support
def support(itemset, transactions):
    count = sum(1 for transaction in transactions if set(itemset).issubset(set(transaction)))
    return count / len(transactions)

# Function to calculate confidence
def confidence(rule, transactions):
    return support(rule[0] + rule[1], transactions) / support(rule[0], transactions)

# Function to calculate lift
def lift(rule, transactions):
    return confidence(rule, transactions) / support(rule[1], transactions)

# Calculate lift for the rule A -> B
rule_ab = (['A'], ['B'])
lift_ab = lift(rule_ab, transactions)
print(f"Lift(A -> B): {lift_ab:.2f}")

# Calculate lift for the rule C -> F
rule_cf = (['C'], ['F'])
lift_cf = lift(rule_cf, transactions)
print(f"Lift(C -> F): {lift_cf:.2f}")


Lift(A -> B): 1.67
Lift(C -> F): 1.25


## Clustering

In [None]:
def manhattan_distance(p1, p2):
    """Calculate the Manhattan distance between two points."""
    return sum(abs(val1-val2) for val1, val2 in zip(p1, p2))

def assign_clusters(data, centroids):
    """Assign each data point to the cluster with the nearest centroid."""
    clusters = {i: [] for i in range(len(centroids))}
    for point in data:
        distances = [manhattan_distance(point, centroid) for centroid in centroids]
        closest_centroid = distances.index(min(distances))
        clusters[closest_centroid].append(point)
    return clusters

# Initial centroids
C1 = (2, 0)
C2 = (5, 3)
C3 = (8, 7)

# Dataset
data_points = [(2, 5), (3, 4), (7, 3), (4, 4), (9, 8), (0, 4), (2, 0), (5, 6), (7, 8), (1, 1), (6, 7)]

# Assign data points to clusters
initial_centroids = [C1, C2, C3]
clusters = assign_clusters(data_points, initial_centroids)

# Print the clusters
for i, cluster in clusters.items():
    print(f"Cluster {i + 1} – C{i + 1}: {cluster}")


Cluster 1 – C1: [(2, 5), (0, 4), (2, 0), (1, 1)]
Cluster 2 – C2: [(3, 4), (7, 3), (4, 4), (5, 6)]
Cluster 3 – C3: [(9, 8), (7, 8), (6, 7)]


The new centroids are calculated by averaging the coordinates of the points in each cluster

In [None]:
def calculate_new_centroid(cluster):
    """Calculate the new centroid for a given cluster."""
    x_sum = sum(point[0] for point in cluster)
    y_sum = sum(point[1] for point in cluster)
    return (x_sum / len(cluster), y_sum / len(cluster))

# Clusters
cluster_1 = [(2, 5), (0, 4), (2, 0), (1, 1)]
cluster_2 = [(3, 4), (7, 3), (4, 4), (5, 6)]
cluster_3 = [(9, 8), (7, 8), (6, 7)]

# Calculate new centroids
new_centroid_1 = calculate_new_centroid(cluster_1)
new_centroid_2 = calculate_new_centroid(cluster_2)
new_centroid_3 = calculate_new_centroid(cluster_3)

print("New Centroid for cluster 1:", new_centroid_1)
print("New Centroid for cluster 2:", new_centroid_2)
print("New Centroid for cluster 3:", new_centroid_3)

New Centroid for cluster 1: (1.25, 2.5)
New Centroid for cluster 2: (4.75, 4.25)
New Centroid for cluster 3: (7.333333333333333, 7.666666666666667)


## Classification

In [None]:
from collections import Counter
import pandas as pd

# Provided data
data = {
    'Color': ['Red', 'Red', 'Red', 'Yellow', 'Yellow', 'Yellow', 'Yellow', 'Yellow', 'Red', 'Red'],
    'Type': ['Sports', 'Sports', 'Sports', 'Sports', 'Sports', 'SUV', 'SUV', 'SUV', 'SUV', 'Sports'],
    'Origin': ['Domestic', 'Domestic', 'Domestic', 'Domestic', 'Imported', 'Imported', 'Imported', 'Domestic', 'Imported', 'Imported'],
    'Stolen': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes']
}

df = pd.DataFrame(data)

# Calculate prior probabilities
p_stolen_yes = Counter(df['Stolen'])['Yes'] / len(df)
p_stolen_no = Counter(df['Stolen'])['No'] / len(df)

# Calculate likelihoods
# For Stolen = Yes
p_red_given_stolen_yes = len(df[(df['Color'] == 'Red') & (df['Stolen'] == 'Yes')]) / Counter(df['Stolen'])['Yes']
p_suv_given_stolen_yes = len(df[(df['Type'] == 'SUV') & (df['Stolen'] == 'Yes')]) / Counter(df['Stolen'])['Yes']
p_domestic_given_stolen_yes = len(df[(df['Origin'] == 'Domestic') & (df['Stolen'] == 'Yes')]) / Counter(df['Stolen'])['Yes']

# For Stolen = No
p_red_given_stolen_no = len(df[(df['Color'] == 'Red') & (df['Stolen'] == 'No')]) / Counter(df['Stolen'])['No']
p_suv_given_stolen_no = len(df[(df['Type'] == 'SUV') & (df['Stolen'] == 'No')]) / Counter(df['Stolen'])['No']
p_domestic_given_stolen_no = len(df[(df['Origin'] == 'Domestic') & (df['Stolen'] == 'No')]) / Counter(df['Stolen'])['No']

# Calculate Naive Bayes probabilities for a red domestic SUV
# P(Stolen = Yes | Red, SUV, Domestic)
p_stolen_yes_given_red_suv_domestic = p_red_given_stolen_yes * p_suv_given_stolen_yes * p_domestic_given_stolen_yes * p_stolen_yes

# P(Stolen = No | Red, SUV, Domestic)
p_stolen_no_given_red_suv_domestic = p_red_given_stolen_no * p_suv_given_stolen_no * p_domestic_given_stolen_no * p_stolen_no

(p_stolen_yes_given_red_suv_domestic, p_stolen_no_given_red_suv_domestic)

(0.024, 0.072)

Based on the calculations:

* The probability that a red domestic SUV is stolen (P(Stolen = Yes | Red, SUV, Domestic)) is approximately 0.024.
* The probability that it is not stolen (P(Stolen = No | Red, SUV, Domestic)) is approximately 0.072.

Since the probability of it not being stolen (0.072) is higher than the probability of it being stolen (0.024), the Naïve Bayes prediction would be that the red domestic SUV is **NOT STOLEN**.

## Decision Trees

In [None]:
import numpy as np

# Define the dataset
examples = [
    {'X1': 0, 'X2': 0, 'X3': 0, 'Y': '+ve'},
    {'X1': 0, 'X2': 0, 'X3': 1, 'Y': '-ve'},
    {'X1': 0, 'X2': 1, 'X3': 0, 'Y': '-ve'},
    {'X1': 0, 'X2': 1, 'X3': 1, 'Y': '+ve'},
    {'X1': 1, 'X2': 0, 'X3': 0, 'Y': '-ve'},
]

# Calculate the Gini index for a given set of examples
def gini(examples):
    total = len(examples)
    if total == 0:
        return 0
    positive = sum(e['Y'] == '+ve' for e in examples)
    negative = total - positive
    gini_index = 1 - (positive / total) ** 2 - (negative / total) ** 2
    return gini_index

# Calculate the weighted Gini index for a split on a given attribute
def weighted_gini(examples, attribute):
    unique_values = set(e[attribute] for e in examples)
    weighted_gini_index = 0
    for value in unique_values:
        subset = [e for e in examples if e[attribute] == value]
        weight = len(subset) / len(examples)
        weighted_gini_index += weight * gini(subset)
    return weighted_gini_index

# Calculate the overall Gini index before any splits
overall_gini = gini(examples)

# Calculate the information gain for each attribute
gains = {}
attributes = ['X1', 'X2', 'X3']
for attribute in attributes:
    attr_gini = weighted_gini(examples, attribute)
    gains[attribute] = overall_gini - attr_gini

# Output the Gini index for each attribute and the one with the highest gain
for attribute, gain in gains.items():
    print(f"Gini index for {attribute}: {1 - gain:.3f}")
    print(f"Information gain for {attribute}: {gain:.3f}")

# Determine the attribute with the highest information gain
best_attribute = max(gains, key=gains.get)
print(f"The attribute with the highest information gain is: {best_attribute}")

Gini index for X1: 0.920
Information gain for X1: 0.080
Gini index for X2: 0.987
Information gain for X2: 0.013
Gini index for X3: 0.987
Information gain for X3: 0.013
The attribute with the highest information gain is: X1


## Case 1

In [None]:
import pandas as pd

# Load the data from the Excel file
file_path = '/content/Integration of Emergency Events in Scenic Areas.xlsx'  # Replace with your file path
data = pd.read_excel(file_path)

# Display the first few rows of the dataframe
data.head()


  and should_run_async(code)


Unnamed: 0,Type,No.,Describe,Mechanism,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29
0,,,,spread mechanism,,coupling mechanism,,,,,...,coupling,,,,,,derivation,,,
1,,,,occupancy type,transitive type,event-event,event-factor,factor-factor,occurrence,acceleration,...,,,,,,,excessive derivation,,harmful derivation,
2,,,,,,,,,,,...,isolation strategy,insurance strategy,domination strategy,reform strategy,hedging strategy,transition strategy,domination strategy,neutral strategy,eliminating strategy,pre-assessment strategy
3,traffic accidents,1.0,Traffic Accident in U.S. Yosemite National Park,1,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,,2.0,Tourist Bus Accident in U.S. Yosemite National...,1,0,0,0,1,1,0,...,0,1,0,0,0,1,0,0,0,0


In [None]:
threshold = 0.5 * len(data)
data = data.dropna(thresh=threshold, axis=1)

# Remove rows with NaNs
data = data.dropna()

# Display the cleaned data
data.head()

  and should_run_async(code)


Unnamed: 0,No.,Describe,Mechanism,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,...,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29
3,1.0,Traffic Accident in U.S. Yosemite National Park,1,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,2.0,Tourist Bus Accident in U.S. Yosemite National...,1,0,0,0,1,1,0,0,...,0,1,0,0,0,1,0,0,0,0
5,3.0,Traffic Accident in London attractions,1,1,0,1,0,0,0,1,...,1,1,0,0,0,1,0,0,0,0
6,4.0,Major Tourism Traffic Accidents in Tibet,0,1,0,0,1,1,0,1,...,0,0,1,1,0,1,0,0,0,0
7,5.0,"Tourist Bus Riverfall Accident in Jingzhou,China",0,1,0,0,1,1,0,0,...,0,0,1,0,0,0,0,0,0,0


In [None]:
from mlxtend.frequent_patterns import apriori, association_rules

# Select only the relevant columns for Association Rule Mining
# Assuming all unnamed columns are relevant for the analysis
data_for_arm = data.filter(regex='^Unnamed')

# Step 1: Selecting Support and Confidence Thresholds
support_threshold = 0.01  # Adjust this based on your dataset
confidence_threshold = 0.5  # Adjust this as well

# Step 2: Find Frequent Itemsets and Association Rules
frequent_itemsets = apriori(data_for_arm, min_support=support_threshold, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=confidence_threshold)

# Step 3: Select and Report Top 5 Rules
# Sorting by confidence and lift, and selecting top 5
top_5_rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False]).head(5)

# Displaying the top 5 rules
top_5_rules

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
915,"(Unnamed: 10, Unnamed: 13)",(Unnamed: 12),0.013333,0.013333,0.013333,1.0,75.0,0.013156,inf,1.0
916,(Unnamed: 12),"(Unnamed: 10, Unnamed: 13)",0.013333,0.013333,0.013333,1.0,75.0,0.013156,inf,1.0
1034,"(Unnamed: 18, Unnamed: 13)",(Unnamed: 12),0.013333,0.013333,0.013333,1.0,75.0,0.013156,inf,1.0
1035,(Unnamed: 12),"(Unnamed: 18, Unnamed: 13)",0.013333,0.013333,0.013333,1.0,75.0,0.013156,inf,1.0
1078,"(Unnamed: 18, Unnamed: 23)",(Unnamed: 12),0.013333,0.013333,0.013333,1.0,75.0,0.013156,inf,1.0


In [None]:
# Function to identify rows that match a rule
def get_rows_for_rule(data, data_for_arm, rule_antecedents):
    # Start with a mask of all True
    mask = pd.Series([True] * len(data))

    # For each item in the rule, update the mask
    for item in rule_antecedents:
        mask = mask & data_for_arm[item]

    # Return the rows where mask is True
    return data[mask]

# Extracting and displaying details for each of the top 5 rules
for index, rule in top_5_rules.iterrows():
    rule_antecedents = rule['antecedents']
    rows_for_rule = get_rows_for_rule(data, data_for_arm, rule_antecedents)
    print(f"Rule {index + 1}: {rule_antecedents}")
    print(f"Matching Rows: \n{rows_for_rule[['Describe', 'Mechanism']]}")
    print("\n")

Rule 916: frozenset({'Unnamed: 10', 'Unnamed: 13'})
Matching Rows: 
                           Describe Mechanism
9  Taiwan Tourist Bus Fire Accident         0


Rule 917: frozenset({'Unnamed: 12'})
Matching Rows: 
                           Describe Mechanism
9  Taiwan Tourist Bus Fire Accident         0


Rule 1035: frozenset({'Unnamed: 18', 'Unnamed: 13'})
Matching Rows: 
                           Describe Mechanism
9  Taiwan Tourist Bus Fire Accident         0


Rule 1036: frozenset({'Unnamed: 12'})
Matching Rows: 
                           Describe Mechanism
9  Taiwan Tourist Bus Fire Accident         0


Rule 1079: frozenset({'Unnamed: 18', 'Unnamed: 23'})
Matching Rows: 
                           Describe Mechanism
9  Taiwan Tourist Bus Fire Accident         0




  and should_run_async(code)
  return data[mask]
  return data[mask]
  return data[mask]
  return data[mask]
  return data[mask]


## Case 2

In [None]:
import pandas as pd

# Load the dataset
file_path = '/content/forbes_billionaires.csv'
data = pd.read_csv(file_path)

# Displaying the first few rows of the dataset to understand its structure
data.head()

  and should_run_async(code)


Unnamed: 0,Name,NetWorth,Country,Source,Rank,Age,Residence,Citizenship,Status,Children,Education,Self_made
0,Jeff Bezos,177.0,United States,Amazon,1,57.0,"Seattle, Washington",United States,In Relationship,4.0,"Bachelor of Arts/Science, Princeton University",True
1,Elon Musk,151.0,United States,"Tesla, SpaceX",2,49.0,"Austin, Texas",United States,In Relationship,7.0,"Bachelor of Arts/Science, University of Pennsy...",True
2,Bernard Arnault & family,150.0,France,LVMH,3,72.0,"Paris, France",France,Married,5.0,"Bachelor of Arts/Science, Ecole Polytechnique ...",False
3,Bill Gates,124.0,United States,Microsoft,4,65.0,"Medina, Washington",United States,Divorced,3.0,"Drop Out, Harvard University",True
4,Mark Zuckerberg,97.0,United States,Facebook,5,36.0,"Palo Alto, California",United States,Married,2.0,"Drop Out, Harvard University",True


In [None]:
data.describe()

  and should_run_async(code)


Unnamed: 0,NetWorth,Rank,Age,Children
count,2755.0,2755.0,2630.0,1552.0
mean,4.74922,1345.663521,63.2673,2.978093
std,9.615358,772.669811,13.47916,1.618569
min,1.0,1.0,18.0,1.0
25%,1.5,680.0,54.0,2.0
50%,2.3,1362.0,63.0,3.0
75%,4.2,2035.0,73.0,4.0
max,177.0,2674.0,99.0,23.0


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
file_path = '/content/U76P9_forbes_billionaires.csv'
data = pd.read_csv(file_path)

# Selecting relevant columns and dropping rows with missing values in these columns
data = data[['NetWorth', 'Country', 'Source', 'Rank', 'Age', 'Residence', 'Citizenship', 'Status', 'Children', 'Education', 'Self_made']].dropna()

# Encoding categorical variables
label_encoders = {}
for column in ['Country', 'Source', 'Residence', 'Citizenship', 'Status', 'Education']:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Defining the feature matrix (X) and target vector (y)
X = data.drop('Self_made', axis=1)
y = data['Self_made'].astype(int)  # Ensure it is an integer

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Creating a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Training the model
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


  and should_run_async(code)


Accuracy: 0.7135678391959799
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.39      0.46        62
           1       0.76      0.86      0.81       137

    accuracy                           0.71       199
   macro avg       0.66      0.62      0.63       199
weighted avg       0.69      0.71      0.70       199



In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV

model = RandomForestClassifier(random_state=42)

# Define a grid of hyperparameters to search
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Setup GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best parameters
print("Best Parameters:", grid_search.best_params_)

# Use the best model to make predictions
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


  and should_run_async(code)


Best Parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 150}
Accuracy: 0.7236180904522613
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.31      0.41        62
           1       0.74      0.91      0.82       137

    accuracy                           0.72       199
   macro avg       0.68      0.61      0.61       199
weighted avg       0.70      0.72      0.69       199

