In [2]:
!pip install kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d uciml/breast-cancer-wisconsin-data
!unzip breast-cancer-wisconsin-data.zip

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data
License(s): CC-BY-NC-SA-4.0
Downloading breast-cancer-wisconsin-data.zip to /content
  0% 0.00/48.6k [00:00<?, ?B/s]
100% 48.6k/48.6k [00:00<00:00, 2.36MB/s]
Archive:  breast-cancer-wisconsin-data.zip
  inflating: data.csv                


In [5]:
import numpy as np
import pandas as pd
import random

# تعریف قوانین ممدانی

def generate_rule(features, output_labels):
    rule_conditions = []
    selected_features = random.sample(features, 3)  # انتخاب سه ویژگی به صورت تصادفی
    for feature in selected_features:
        rule_conditions.append({
            "feature": feature,
            "fuzzy": random.choice(["low", "medium", "high"]),
        })
    output = random.choice(output_labels)
    return {"conditions": rule_conditions, "output": output}

# ایجاد جمعیت اولیه
def initialize_population(pop_size, num_rules, features, output_labels):
    population = []
    for _ in range(pop_size):
        rules = [generate_rule(features, output_labels) for _ in range(num_rules)]
        population.append(rules)
    return population

# ارزیابی قوانین
def evaluate_rule(rule, data, labels):
    correct_predictions = 0
    for idx, data_point in data.iterrows():
        match = all(
            data_point[cond["feature"]] == cond["fuzzy"]
            for cond in rule["conditions"]
        )
        if match and rule["output"] == labels.iloc[idx]:
            correct_predictions += 1
    return correct_predictions / len(data)

# ارزیابی جمعیت
def evaluate_population(population, data, labels):
    fitness_scores = []
    for rules in population:
        fitness = np.mean([evaluate_rule(rule, data, labels) for rule in rules])
        fitness_scores.append(fitness)
    return fitness_scores

# کراس‌اور
def crossover(parent1, parent2):
    point = random.randint(1, len(parent1) - 1)
    child1 = parent1[:point] + parent2[point:]
    child2 = parent2[:point] + parent1[point:]
    return child1, child2

# جهش
def mutate(rules, mutation_rate, features, output_labels):
    for rule in rules:
        if random.random() < mutation_rate:
            rule["conditions"][random.randint(0, len(rule["conditions"]) - 1)]["fuzzy"] = random.choice(
                ["low", "medium", "high"]
            )
        if random.random() < mutation_rate:
            rule["output"] = random.choice(output_labels)
    return rules

# الگوریتم ژنتیک
def genetic_algorithm(data, labels, features, output_labels, pop_size=30, num_rules=20, num_generations=100, mutation_rate=0.4):
    population = initialize_population(pop_size, num_rules, features, output_labels)
    best_fitness = 0
    best_solution = None

    for generation in range(num_generations):
        fitness_scores = evaluate_population(population, data, labels)
        sorted_indices = np.argsort(fitness_scores)[::-1]

        if fitness_scores[sorted_indices[0]] > best_fitness:
            best_fitness = fitness_scores[sorted_indices[0]]
            best_solution = population[sorted_indices[0]]

        print(f"Generation {generation + 1}: Best Fitness = {best_fitness}")

        # نمایش بهترین کروموزوم در هر نسل
        best_rule_set = population[sorted_indices[0]]
        print("Best Rule Set:")
        for rule in best_rule_set:
            conditions = " AND ".join(
                [f"{cond['feature']} IS {cond['fuzzy']}" for cond in rule["conditions"]]
            )
            print(f"IF {conditions}, THEN OUTPUT IS {rule['output']}")

        new_population = []
        for i in range(0, pop_size, 2):
            parent1 = population[sorted_indices[i]]
            parent2 = population[sorted_indices[i + 1]]
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([mutate(child1, mutation_rate, features, output_labels), mutate(child2, mutation_rate, features, output_labels)])
        population = new_population

    return best_solution, best_fitness

# بارگذاری داده واقعی
data = pd.read_csv("data.csv")
data.rename(columns={'concave points_mean': 'concave_points_mean'}, inplace=True)

labels = data["diagnosis"].map({"B": "benign", "M": "malignant"})
data = data.drop(columns=["diagnosis"])

# ویژگی‌ها و مقادیر فازی
features = [
    "radius_mean",
    "texture_mean",
    "perimeter_mean",
    "area_mean",
    "smoothness_mean",
    "compactness_mean",
    "concavity_mean",
    "concave_points_mean",
]
data = data[features]

# تولید مقادیر فازی برای داده‌ها (نمونه)
def fuzzify(value):
    if value < 0.33:
        return "low"
    elif value < 0.66:
        return "medium"
    else:
        return "high"

for feature in features:
    data[feature] = data[feature].apply(lambda x: fuzzify(x / data[feature].max()))

# اجرای الگوریتم ژنتیک
output_labels = ["benign", "malignant"]
best_solution, best_fitness = genetic_algorithm(data, labels, features, output_labels)

print("Best Solution:")
for rule in best_solution:
    conditions = " AND ".join(
        [f"{cond['feature']} IS {cond['fuzzy']}" for cond in rule["conditions"]]
    )
    print(f"IF {conditions}, THEN OUTPUT IS {rule['output']}")
print(f"Best Fitness: {best_fitness}")



# Calculate Accuracy
accuracy = accuracy_score(labels, final_predictions)
print(f"Accuracy: {accuracy:.4f}")

# Mean Squared Error
mse = mean_squared_error(labels, final_predictions)
print(f"Mean Squared Error (MSE): {mse:.4f}")

# Root Mean Squared Error
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Area Under the Curve (AUC)
auc = roc_auc_score(labels, final_predictions)
print(f"Area Under the Curve (AUC): {auc:.4f}")

Generation 1: Best Fitness = 0.06098418277680142
Best Rule Set:
IF radius_mean IS medium AND compactness_mean IS high AND perimeter_mean IS low, THEN OUTPUT IS benign
IF concave_points_mean IS medium AND area_mean IS high AND compactness_mean IS medium, THEN OUTPUT IS benign
IF perimeter_mean IS medium AND concave_points_mean IS high AND area_mean IS medium, THEN OUTPUT IS benign
IF radius_mean IS high AND area_mean IS low AND texture_mean IS medium, THEN OUTPUT IS benign
IF concave_points_mean IS low AND smoothness_mean IS medium AND radius_mean IS medium, THEN OUTPUT IS benign
IF area_mean IS medium AND compactness_mean IS high AND concave_points_mean IS high, THEN OUTPUT IS malignant
IF concave_points_mean IS low AND perimeter_mean IS medium AND radius_mean IS medium, THEN OUTPUT IS benign
IF concavity_mean IS medium AND concave_points_mean IS medium AND radius_mean IS high, THEN OUTPUT IS malignant
IF area_mean IS medium AND perimeter_mean IS low AND texture_mean IS low, THEN OUTPU

In [11]:
import numpy as np
import pandas as pd
import random
from sklearn.metrics import accuracy_score, mean_squared_error, roc_auc_score


def generate_rule(features, output_labels):
    rule_conditions = []
    selected_features = random.sample(features, 3)
    for feature in selected_features:
        rule_conditions.append({
            "feature": feature,
            "fuzzy": random.choice(["low", "medium", "high"]),
        })
    output = random.choice(output_labels)
    return {"conditions": rule_conditions, "output": output}

def initialize_population(pop_size, num_rules, features, output_labels):
    population = []
    for _ in range(pop_size):
        rules = [generate_rule(features, output_labels) for _ in range(num_rules)]
        population.append(rules)
    return population

def evaluate_rule(rule, data, labels):
    correct_predictions = 0
    for idx, data_point in data.iterrows():
        match = all(
            data_point[cond["feature"]] == cond["fuzzy"]
            for cond in rule["conditions"]
        )
        if match and rule["output"] == labels.iloc[idx]:
            correct_predictions += 1
    return correct_predictions / len(data)

def evaluate_population(population, data, labels):
    fitness_scores = []
    for rules in population:
        fitness = np.mean([evaluate_rule(rule, data, labels) for rule in rules])
        fitness_scores.append(fitness)
    return fitness_scores

def crossover(parent1, parent2):
    point = random.randint(1, len(parent1) - 1)
    child1 = parent1[:point] + parent2[point:]
    child2 = parent2[:point] + parent1[point:]
    return child1, child2

def mutate(rules, mutation_rate, features, output_labels):
    for rule in rules:
        if random.random() < mutation_rate:
            rule["conditions"][random.randint(0, len(rule["conditions"]) - 1)]["fuzzy"] = random.choice(
                ["low", "medium", "high"]
            )
        if random.random() < mutation_rate:
            rule["output"] = random.choice(output_labels)
    return rules

def genetic_algorithm(data, labels, features, output_labels, pop_size=20, num_rules=10, num_generations=100, mutation_rate=0.4):
    population = initialize_population(pop_size, num_rules, features, output_labels)
    best_fitness = 0
    best_solution = None

    for generation in range(num_generations):
        fitness_scores = evaluate_population(population, data, labels)
        sorted_indices = np.argsort(fitness_scores)[::-1]

        if fitness_scores[sorted_indices[0]] > best_fitness:
            best_fitness = fitness_scores[sorted_indices[0]]
            best_solution = population[sorted_indices[0]]

        print(f"Generation {generation + 1}: Best Fitness = {best_fitness}")

        best_rule_set = population[sorted_indices[0]]
        print("Best Rule Set:")
        for rule in best_rule_set:
            conditions = " AND ".join(
                [f"{cond['feature']} IS {cond['fuzzy']}" for cond in rule["conditions"]]
            )
            print(f"IF {conditions}, THEN OUTPUT IS {rule['output']}")

        new_population = []
        for i in range(0, pop_size, 2):
            parent1 = population[sorted_indices[i]]
            parent2 = population[sorted_indices[i + 1]]
            child1, child2 = crossover(parent1, parent2)
            new_population.extend([mutate(child1, mutation_rate, features, output_labels), mutate(child2, mutation_rate, features, output_labels)])
        population = new_population

    return best_solution, best_fitness

data = pd.read_csv("data.csv")
data.rename(columns={'concave points_mean': 'concave_points_mean'}, inplace=True)

labels = data["diagnosis"].map({"B": "benign", "M": "malignant"})
data = data.drop(columns=["diagnosis"])

features = [
    "radius_mean",
    "texture_mean",
    "perimeter_mean",
    "area_mean",
    "smoothness_mean",
    "compactness_mean",
    "concavity_mean",
    "concave_points_mean",
]
data = data[features]

def fuzzify(value):
    if value < 0.33:
        return "low"
    elif value < 0.66:
        return "medium"
    else:
        return "high"

for feature in features:
    data[feature] = data[feature].apply(lambda x: fuzzify(x / data[feature].max()))

output_labels = ["benign", "malignant"]
best_solution, best_fitness = genetic_algorithm(data, labels, features, output_labels)

print("Best Solution:")
for rule in best_solution:
    conditions = " AND ".join(
        [f"{cond['feature']} IS {cond['fuzzy']}" for cond in rule["conditions"]]
    )
    print(f"IF {conditions}, THEN OUTPUT IS {rule['output']}")
print(f"Best Fitness: {best_fitness}")

final_predictions = []
for _, data_point in data.iterrows():
    prediction = None
    for rule in best_solution:
        match = all(
            data_point[cond['feature']] == cond['fuzzy']
            for cond in rule['conditions']
        )
        if match:
            prediction = 1 if rule['output'] == 'malignant' else 0
            break
    final_predictions.append(prediction if prediction is not None else 0)

final_predictions = ['malignant' if pred == 1 else 'benign' for pred in final_predictions]

accuracy = accuracy_score(labels, final_predictions)
print(f"Accuracy: {accuracy:.4f}")

mse = mean_squared_error(labels.map({'benign': 0, 'malignant': 1}), [1 if pred == 'malignant' else 0 for pred in final_predictions])
print(f"Mean Squared Error (MSE): {mse:.4f}")

rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

auc = roc_auc_score(labels.map({'benign': 0, 'malignant': 1}), [1 if pred == 'malignant' else 0 for pred in final_predictions])
print(f"Area Under the Curve (AUC): {auc:.4f}")


Generation 1: Best Fitness = 0.0725834797891037
Best Rule Set:
IF texture_mean IS medium AND smoothness_mean IS medium AND compactness_mean IS medium, THEN OUTPUT IS malignant
IF smoothness_mean IS high AND compactness_mean IS high AND area_mean IS high, THEN OUTPUT IS malignant
IF compactness_mean IS low AND radius_mean IS low AND area_mean IS medium, THEN OUTPUT IS benign
IF radius_mean IS medium AND perimeter_mean IS medium AND smoothness_mean IS medium, THEN OUTPUT IS benign
IF texture_mean IS high AND concave_points_mean IS low AND radius_mean IS high, THEN OUTPUT IS benign
IF concave_points_mean IS low AND smoothness_mean IS medium AND radius_mean IS low, THEN OUTPUT IS benign
IF perimeter_mean IS low AND concavity_mean IS low AND area_mean IS low, THEN OUTPUT IS benign
IF compactness_mean IS medium AND concavity_mean IS high AND smoothness_mean IS high, THEN OUTPUT IS benign
IF smoothness_mean IS high AND concave_points_mean IS low AND texture_mean IS medium, THEN OUTPUT IS mali