Title: Popular Classification Algorithms


Decision Trees


Task 1: Predict the loan default risk based on borrower characteristics.

In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
data = pd.DataFrame({
    'credit_score': np.random.randint(500, 800, size=1000),
    'income': np.random.randint(50000, 150000, size=1000),
    'loan_amount': np.random.randint(10000, 50000, size=1000),
    'default': np.random.choice([0, 1], size=1000, p=[0.7, 0.3])
})

# Split the dataset into features (X) and target (y)
X = data[['credit_score', 'income', 'loan_amount']]  # Features
y = data['default']  # Target (0 for not defaulted, 1 for defaulted)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Predict loan default risk for a new borrower
new_borrower = pd.DataFrame({'credit_score': [700], 'income': [80000], 'loan_amount': [30000]})
prediction = rf.predict(new_borrower)
print("Loan default risk prediction:", prediction[0])




Task 2: Determine if a patient should be tested for a disease based on symptoms.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
data = pd.DataFrame({
    'symptom1': np.random.choice([0, 1], size=1000, p=[0.7, 0.3]),
    'symptom2': np.random.choice([0, 1], size=1000, p=[0.6, 0.4]),
    'symptom3': np.random.choice([0, 1], size=1000, p=[0.8, 0.2]),
    'test_recommendation': np.random.choice([0, 1], size=1000, p=[0.7, 0.3])
})

# Split the dataset into features (X) and target (y)
X = data[['symptom1', 'symptom2', 'symptom3']]  # Features
y = data['test_recommendation']  # Target (0 for not recommended, 1 for recommended)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Determine if a patient should be tested for a disease based on symptoms
new_patient = pd.DataFrame({'symptom1': [1], 'symptom2': [1], 'symptom3': [0]})
prediction = rf.predict(new_patient)
print("Test recommendation:", prediction[0])




Task 3: Classify types of animals based on features like size, habitat, and diet.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
data = pd.DataFrame({
    'size': np.random.choice(['small', 'medium', 'large'], size=1000),
    'habitat': np.random.choice(['land', 'water', 'air'], size=1000),
    'diet': np.random.choice(['herbivore', 'carnivore', 'omnivore'], size=1000),
    'animal_type': np.random.choice(['mammal', 'bird', 'reptile'], size=1000)
})

# Convert categorical features to numerical features
data['size'] = data['size'].map({'small': 0, 'medium': 1, 'large': 2})
data['habitat'] = data['habitat'].map({'land': 0, 'water': 1, 'air': 2})
data['diet'] = data['diet'].map({'herbivore': 0, 'carnivore': 1, 'omnivore': 2})
data['animal_type'] = data['animal_type'].map({'mammal': 0, 'bird': 1, 'reptile': 2})

# Split the dataset into features (X) and target (y)
X = data[['size', 'habitat', 'diet']]  # Features
y = data['animal_type']  # Target (animal type)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


new_animal = pd.DataFrame({'size': [1], 'habitat': [0], 'diet': [1]})
prediction = rf.predict(new_animal)
print("Animal type prediction:", ['mammal', 'bird', 'reptile'][prediction[0]])

