In [None]:
pip install numpy pandas tensorflow scikit-learn deap openpyxl




In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from deap import base, creator, tools, algorithms


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define the file path (Modify it according to your Google Drive structure)
file_path = "/content/drive/My Drive/ohh.xlsx"

# Load dataset
df = pd.read_excel(file_path)

# Encode categorical variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col].astype(str))

# Define features and target
y = df['Compatibility']
X = df.drop(columns=['Compatibility'])

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Fix: Prevent duplicate DEAP class creation
if "FitnessMax" not in creator.__dict__:
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
if "Individual" not in creator.__dict__:
    creator.create("Individual", list, fitness=creator.FitnessMax)

# Define Genetic Algorithm setup
def create_individual():
    return [
        random.uniform(0.0001, 0.01),   # Learning rate
        random.randint(16, 256),       # Hidden layer size
        random.uniform(0.0001, 0.1)     # L2 regularization (Fixed: Avoid negatives)
    ]

def eval_nn(individual):
    lr, hidden_layer_size, alpha = individual
    hidden_layer_size = int(hidden_layer_size)

    # Fix: Ensure L2 regularization is always non-negative
    alpha = max(0.0001, alpha)

    model = Sequential([
        Dense(hidden_layer_size, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(alpha)),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(learning_rate=lr), loss='binary_crossentropy', metrics=['accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5, batch_size=32, verbose=0, callbacks=[early_stopping])

    _, val_acc = model.evaluate(X_val, y_val, verbose=0)

    return (val_acc,)

# Genetic Algorithm parameters
toolbox = base.Toolbox()
toolbox.register("individual", tools.initIterate, creator.Individual, create_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.05, indpb=0.2)  # Fix: Lower mutation range
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", eval_nn)

# Run GA
population = toolbox.population(n=20)
algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=10, verbose=True)

# Best individual
best_ind = tools.selBest(population, k=1)[0]
print("Best Hyperparameters:", best_ind)

# Train Final Model with Best Hyperparameters
lr, hidden_layer_size, alpha = best_ind
hidden_layer_size = int(hidden_layer_size)
alpha = max(0.0001, alpha)  # Fix: Ensure L2 value is valid

final_model = Sequential([
    Dense(hidden_layer_size, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(alpha)),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])
final_model.compile(optimizer=Adam(learning_rate=lr), loss='binary_crossentropy', metrics=['accuracy'])
final_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=5, batch_size=32, verbose=1)

# Save the model to Google Drive
model_save_path = "/content/drive/My Drive/best_cow_breeding_model.h5"
final_model.save(model_save_path)

# Final Evaluation
train_loss, train_acc = final_model.evaluate(X_train, y_train, verbose=0)
val_loss, val_acc = final_model.evaluate(X_val, y_val, verbose=0)
test_loss, test_acc = final_model.evaluate(X_test, y_test, verbose=0)

print(f"Training Accuracy: {train_acc:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")
print(f"Testing Accuracy: {test_acc:.4f}")


Exception ignored in: <function _xla_gc_callback at 0x7ffadb0be980>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
    
KeyboardInterrupt: 


gen	nevals
0  	20    
1  	13    
2  	8     
3  	9     
4  	18    
5  	12    
6  	10    
7  	11    
8  	15    
9  	12    
10 	11    
Best Hyperparameters: [0.010194541239590162, 172.753684594126, 0.11003190291622626]
Epoch 1/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.4965 - loss: -77.4730 - val_accuracy: 0.5008 - val_loss: -854.1169
Epoch 2/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4939 - loss: -1543.5431 - val_accuracy: 0.5008 - val_loss: -3963.3345
Epoch 3/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4874 - loss: -5507.9556 - val_accuracy: 0.5008 - val_loss: -9180.2979
Epoch 4/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.4964 - loss: -11459.4277 - val_accuracy: 0.5008 - val_loss: -16397.8164
Epoch 5/50
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5136 



Training Accuracy: 0.4989
Validation Accuracy: 0.5008
Testing Accuracy: 0.5042


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Assuming X_train, X_test, y_train, y_test are already defined
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

# Accuracy
train_acc = accuracy_score(y_train, y_train_pred)
test_acc = accuracy_score(y_test, y_test_pred)

print(f"Random Forest Training Accuracy: {train_acc:.4f}")
print(f"Random Forest Testing Accuracy: {test_acc:.4f}")


Random Forest Training Accuracy: 1.0000
Random Forest Testing Accuracy: 0.5100
