In [1]:
import pandas as pd

# Example of loading the kyphosis data
file_path = r'C:\Users\USER\Desktop\data mining.git\python\data-Kyphosis .csv'
data = pd.read_csv(file_path)  # Replace with your dataset path

# Ensure your dataset has the same structure
print(data.head())


  Kyphosis  Age  Number  start
0   absent   71       3      5
1   absent  158       3     14
2  present  128       4      5
3   absent    2       5      1
4   absent    1       4     15


In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import numpy as np

# Encode 'Kyphosis' as numerical values
data['Kyphosis'] = data['Kyphosis'].map({'absent': 0, 'present': 1})

# Split the dataset
X = data.drop('Kyphosis', axis=1)
y = data['Kyphosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=5678)

# Train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=5678)
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Model performance
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))


Accuracy: 0.8333333333333334
Confusion Matrix:
 [[19  0]
 [ 4  1]]


In [3]:
import plotly.express as px
import plotly.graph_objects as go

# Feature importances
importances = rf_model.feature_importances_
feature_names = X.columns

# Creating a DataFrame for plotting
feature_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values('Importance', ascending=False)

# Plotting feature importances
fig = px.bar(feature_importance_df, x='Feature', y='Importance',
             title='Feature Importances from Random Forest Model',
             labels={'Importance': 'Importance Score'},
             template='plotly_white')

fig.show()


In [4]:
# Creating a confusion matrix plot
conf_matrix = confusion_matrix(y_test, y_pred)

# Plotting the confusion matrix
fig = go.Figure(data=go.Heatmap(
                z=conf_matrix,
                x=['Predicted 0', 'Predicted 1'],
                y=['Actual 0', 'Actual 1'],
                colorscale='Viridis'))

fig.update_layout(title='Confusion Matrix', xaxis_title='Predicted', yaxis_title='Actual')
fig.show()
