In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Create the Play Tennis dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain',
                'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool',
                    'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal',
                 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong',
             'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Strong'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No',
                   'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Encode categorical variables
label_encoders = {}
for column in df.columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le
#{'Outlook': [1,2,3], 'Temperature': le, 'Humidity': le, 'Wind': le, 'PlayTennis': le}

# Separate features and target
X = df[['Outlook', 'Temperature', 'Humidity', 'Wind']]
y = df['PlayTennis']

# Train Random Forest
model = RandomForestClassifier(n_estimators=5, random_state=42)
model.fit(X, y)

# Predict on a new sample: Sunny, Mild, High, Weak (Day 8)
new_sample = pd.DataFrame({
    'Outlook': [label_encoders['Outlook'].transform(['Sunny'])[0]],
    'Temperature': [label_encoders['Temperature'].transform(['Mild'])[0]],
    'Humidity': [label_encoders['Humidity'].transform(['High'])[0]],
    'Wind': [label_encoders['Wind'].transform(['Weak'])[0]]
})

prediction = model.predict(new_sample)
predicted_label = label_encoders['PlayTennis'].inverse_transform(prediction)

print("Prediction for Sunny, Mild, High, Weak → Play Tennis?:", predicted_label[0])



Prediction for Sunny, Mild, High, Weak → Play Tennis?: No


In [2]:
# Save the model and label encoders
import joblib
joblib.dump(model, 'play_tennis_rf_model.joblib')
joblib.dump(label_encoders, 'play_tennis_label_encoders.joblib')

print("Model and label encoders saved successfully!")

Model and label encoders saved successfully!


In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Actual and predicted labels
y_true = ['Yes', 'No', 'Yes', 'No', 'Yes']
y_pred = ['Yes', 'Yes', 'Yes', 'No', 'No']

# Convert to binary labels (1 = Yes, 0 = No)
y_true_bin = [1 if label == 'Yes' else 0 for label in y_true]
y_pred_bin = [1 if label == 'Yes' else 0 for label in y_pred]

# Calculate metrics
accuracy = accuracy_score(y_true_bin, y_pred_bin)
precision = precision_score(y_true_bin, y_pred_bin)
recall = recall_score(y_true_bin, y_pred_bin)
f1 = f1_score(y_true_bin, y_pred_bin)
conf_matrix = confusion_matrix(y_true_bin, y_pred_bin)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.60
Precision: 0.67
Recall: 0.67
F1 Score: 0.67
Confusion Matrix:
 [[1 1]
 [1 2]]
