In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from keras import models, layers
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

# Load the dataset
df = pd.read_csv('data.csv', delimiter=';')

# Display the first few rows to ensure it's loaded correctly
print(df.head())
print(df.info())

# Identify the target variable
target = df['label']

# Identify the feature variables
features = df.drop(columns=['label'])

# Standardize the feature variables
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features_scaled, target, test_size=0.2, random_state=42)

# Initialize the OneHotEncoder
encoder = OneHotEncoder(sparse=False)

# Fit and transform the training labels
y_train_encoded = encoder.fit_transform(y_train.values.reshape(-1, 1))

# Transform the testing labels
y_test_encoded = encoder.transform(y_test.values.reshape(-1, 1))

# Define the model
model = models.Sequential()
model.add(layers.Dense(6, activation='relu', input_shape=(x_train.shape[1],)))
model.add(layers.Dense(5, activation='relu'))
model.add(layers.Dense(y_train_encoded.shape[1], activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
val_epochs = 100
history = model.fit(x_train, y_train_encoded, epochs=val_epochs, batch_size=32, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(x_test, y_test_encoded)
print(f"Test accuracy: {test_accuracy}")

# Plotting training history
history_dict = history.history
epochs = range(1, val_epochs + 1)

plt.plot(epochs, history_dict['loss'], 'r', label='Training loss')
plt.plot(epochs, history_dict['val_loss'], 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, history_dict['accuracy'], 'r', label='Training acc')
plt.plot(epochs, history_dict['val_accuracy'], 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Function to get features from the user using widgets
def get_user_input():
    user_input = []
    input_widgets = []
    for col in features.columns:
        widget = widgets.FloatText(description=col)
        input_widgets.append(widget)
        display(widget)
    
    button = widgets.Button(description="Submit")
    output = widgets.Output()
    display(button, output)
    
    def on_button_clicked(b):
        with output:
            output.clear_output()
            for widget in input_widgets:
                user_input.append(widget.value)
            user_input_array = np.array(user_input).reshape(1, -1)
            predict_user_input(model, scaler, encoder, user_input_array)
    
    button.on_click(on_button_clicked)

# Make predictions based on user input
def predict_user_input(model, scaler, encoder, user_input):
    user_input_scaled = scaler.transform(user_input)
    prediction = model.predict(user_input_scaled)
    predicted_label = encoder.inverse_transform(prediction)
    print(f"Predicted label: {predicted_label[0][0]}")

# Save the model and scaler
model.save('tree_classifier_model.h5')
import joblib
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(encoder, 'encoder.pkl')

# Load the model and scaler (for future use)
# model = models.load_model('tree_classifier_model.h5')
# scaler = joblib.load('scaler.pkl')
# encoder = joblib.load('encoder.pkl')

# Get user input and predict
get_user_input()


   label  Area(cm^2)    W/L  Patiole Length(mm)  Number of Pairs  Ratio
0      1        43.4  0.485                 5.0               22   66.0
1      1        40.0  0.454                 5.0               16   62.5
2      1        35.1  0.439                 5.0               17   60.0
3      1        50.1  0.442                 5.0               18   59.0
4      1        40.3  0.463                 5.0               19   60.0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   label               120 non-null    int64  
 1   Area(cm^2)          120 non-null    float64
 2   W/L                 120 non-null    float64
 3   Patiole Length(mm)  120 non-null    float64
 4   Number of Pairs     120 non-null    int64  
 5   Ratio               120 non-null    float64
dtypes: float64(4), int64(2)
memory usage: 5.8 KB
None


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'