Title: Popular Classification Algorithms

Random Forest

Task 1: Predict employee attrition based on job satisfaction and salary.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
data = pd.DataFrame({
    'job_satisfaction': np.random.uniform(0, 1, size=1000),
    'salary': np.random.randint(50000, 150000, size=1000),
    'attrition': np.random.choice([0, 1], size=1000, p=[0.8, 0.2])
})

# Split the dataset into features (X) and target (y)
X = data[['job_satisfaction', 'salary']]  # Features
y = data['attrition']  # Target (0 for not attrited, 1 for attrited)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Logistic Regression classifier
logreg = LogisticRegression()

# Train the classifier
logreg.fit(X_train, y_train)

# Make predictions on the test set
y_pred = logreg.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Predict employee attrition based on job satisfaction and salary
new_employee = pd.DataFrame({'job_satisfaction': [0.6], 'salary': [80000]})
prediction = logreg.predict(new_employee)
print("Attrition prediction:", prediction[0])




Task 2: Classify types of wine based on chemical analysis.

In [None]:
# Import necessary libraries
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Wine dataset
wine = load_wine()

# Split the dataset into features (X) and target (y)
X = wine.data  # Features (chemical analysis)
y = wine.target  # Target (wine type)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classify a new wine sample based on chemical analysis
new_wine = [[1.5, 0.5, 0.8, 1.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1]]
prediction = rf.predict(new_wine)
print("Wine type prediction:", wine.target_names[prediction[0]])




Task 3: Predict housing loan approval based on financial and personal data.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
data = pd.DataFrame({
    'income': np.random.randint(50000, 200000, size=1000),
    'credit_score': np.random.randint(600, 850, size=1000),
    'loan_amount': np.random.randint(100000, 500000, size=1000),
    'employment_length': np.random.randint(1, 10, size=1000),
    'approval': np.random.choice([0, 1], size=1000, p=[0.4, 0.6])
})

# Split the dataset into features (X) and target (y)
X = data[['income', 'credit_score', 'loan_amount', 'employment_length']]  # Features
y = data['approval']  # Target (0 for not approved, 1 for approved)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


new_applicant = pd.DataFrame({
    'income': [80000],
    'credit_score': [750],
    'loan_amount': [300000],
    'employment_length': [5]
})
prediction = rf.predict(new_applicant)
print("Loan approval prediction:", prediction[0])


