Load the dataset from a CSV file into a DataFrame.

In [None]:
import pandas as pd
df = pd.read_csv('KaggleV2-May-2016.csv')

Perform data cleaning by removing missing values.

In [None]:
df.dropna(inplace=True)

Encode the 'Show' feature as categorical numeric values.

In [None]:
df['Show'] = df['Show'].astype('category').cat.codes

Create a binary encoded feature for 'Male'.

In [None]:
df['Male'] = df['Gender'].apply(lambda x: 1 if x == 'Male' else 0)

Create age bins for the 'Age' column.

In [None]:
df['Age Bin'] = pd.cut(df['Age'], bins=[0, 18, 35, 50, 65, 100], labels=['0-18', '19-35', '36-50', '51-65', '66+'])

Create a feature indicating if a person is both diabetic and hypertensive.

In [None]:
df['Diabetic & Hypertensive'] = df.apply(lambda row: 1 if row['Diabetic'] and row['Hypertensive'] else 0, axis=1)

Normalize the 'Wait Days' feature.

In [None]:
df['Wait Days'] = (df['Wait Days'] - df['Wait Days'].mean()) / df['Wait Days'].std()

Select features and labels for model training.

In [None]:
features = df[['Show', 'Male', 'Age Bin', 'Diabetic & Hypertensive', 'Wait Days']]
labels = df['Target']

Split the dataset into training and testing sets.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

Train a Random Forest model using the training data.

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

Evaluate the model's accuracy on the test set.

In [None]:
accuracy = model.score(X_test, y_test)

Visualize the model evaluation results.

In [None]:
import matplotlib.pyplot as plt
plt.bar(['Model Accuracy'], [accuracy])
plt.ylabel('Accuracy')
plt.title('Model Evaluation')
plt.show()

Perform show rate analysis to find the proportion of each show.

In [None]:
df['Show Rate'] = df['Show'].value_counts(normalize=True)