In [None]:
# Import all libraries being used
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import ipywidgets as widgets
from IPython.display import display

# df or dataframe is the variable that will store the data from the penguins_size.csv file.
df = pd.read_csv('penguins_size.csv')

# Rename the columns for convenience and more accurate representation.
df.rename(columns={'species' : 'Species', 'island' : 'Island', 'culmen_length_mm' : 'CulmenLength (mm)', 'culmen_depth_mm' : 'CulmenDepth (mm)', 'flipper_length_mm' : 'FlipperLength (mm)', 'body_mass_g' : 'BodyMass (g)', 'sex' : 'Sex'}, inplace=True, errors='raise')

# View the data after the columns have been renamed.
print(df)

In [None]:
# Drop irrelevant columns and records with missing data.
df.drop(columns=['Island', 'Sex'], inplace=True)
df = df.dropna()

# Print to view all data values after the data has been cleaned.
print(df)

In [None]:
# Create pairplot variable for the entire dataframe with emphasis on the differing species.
df_plot = sb.pairplot(df, hue='Species')

# Set the titles for the plot.
df_plot.fig.suptitle('Penguin Pairplot', y=1.01, fontsize=20, fontweight='bold')


In [None]:
# Create a violin plot variable with culmen length and species.
sb.violinplot(x='Species', y='CulmenLength (mm)', data=df)
plt.title('Culmen Length By Species', y=1.01, fontsize=15, fontweight='bold')

In [None]:
# Create datasets to isolate the subject and target variables.
dfx = df[['CulmenLength (mm)', 'CulmenDepth (mm)', 'FlipperLength (mm)', 'BodyMass (g)']]
dfy = df['Species']

# Split the data in the dataframe for training and testing.
trainDFX, testDFX, trainDFY, testDFY = train_test_split(dfx, dfy, test_size=0.2, random_state=40)

# Train the SVM classification model with the training data.
svm_model = SVC(kernel='linear')
svm_model.fit(trainDFX, trainDFY)

# Test the SVM classification model with the testing data and view precision and accuracy in a classification report.
predictDFY = svm_model.predict(testDFX)
print(classification_report(testDFY, predictDFY))

In [None]:
# Create a confusion matrix variable to compare the actual and predicted results.
confusion_matrix = confusion_matrix(testDFY, predictDFY)

# Store results within a dataframe.
confusionDF = pd.DataFrame(confusion_matrix, index=['Actual Adelie', 'Actual Chinstrap', 'Actual Gentoo'], 
                           columns=['Predicted Adelie', 'Predicted Chinstrap', 'Predicted Gentoo'])

# Plot the confusion matrix and set all required labels and features.
plt.figure(figsize = (8, 6))
sb.heatmap(confusionDF, annot=True, fmt='d', cmap='Blues')
plt.title('Penguin Species Prediction Confusion Matrix', fontsize=15, fontweight='bold')
plt.xlabel('Predicted Species')
plt.ylabel('Actual Species')
plt.show()


In [None]:
# Enable user input to predict the species based on given values.
# Create widgets for all numerical inputs.
culmen_length_input = widgets.FloatText(description='Culmen Length (mm):', value=0, style={'description_width': '150px'})
culmen_depth_input = widgets.FloatText(description='Culmen Depth (mm):', value=0, style={'description_width': '150px'})
flipper_length_input = widgets.FloatText(description='Flipper Length (mm):', value=0, style={'description_width': '150px'})
body_mass_input = widgets.FloatText(description='Body Mass (g):', value=0, style={'description_width': '150px'})

# Create a button to trigger the predictor function.
predict_button = widgets.Button(description='Predict')

# Create an output display widget for the prediction.
prediction_output = widgets.Output()

# Display all the prepared widgets.
display(culmen_length_input, culmen_depth_input, flipper_length_input, body_mass_input, predict_button, prediction_output)

# Create the user input prediction function.
def predict_user_input(b):
    with prediction_output:

        # Pull the values from the user input variables.
        culmen_length = culmen_length_input.value
        culmen_depth = culmen_depth_input.value
        flipper_length = flipper_length_input.value
        body_mass = body_mass_input.value

        # Create the input record for prediction.
        input_record = pd.DataFrame({'CulmenLength (mm)': [culmen_length],
                                     'CulmenDepth (mm)': [culmen_depth],
                                     'FlipperLength (mm)': [flipper_length],
                                     'BodyMass (g)': [body_mass]})
        
        # Make the prediction using the transformed input record.
        predict_input = svm_model.predict(input_record)

        # Display the prediction to the user.
        prediction_output.value = f'Predicted species: {predict_input[0]}'
        print(prediction_output.value)

# Set the action for clicking the button to run the function.
predict_button.on_click(predict_user_input)