Title: Popular Classification Algorithms

Logistic Regression


Task 1: Predict the likelihood of a student passing a test based on study hours.

In [None]:
# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
study_hours = np.random.randint(1, 10, size=100)
passed = np.where(study_hours > 5, 1, 0)

# Reshape the data
study_hours = study_hours.reshape(-1, 1)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(study_hours, passed, test_size=0.2, random_state=42)

# Create a Logistic Regression classifier
logreg = LogisticRegression()

# Train the classifier
logreg.fit(X_train, y_train)

# Make predictions on the test set
y_pred = logreg.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Predict the likelihood of a student passing the test based on study hours
study_hours_new = np.array([[6]])  # Study hours for a new student
prediction = logreg.predict_proba(study_hours_new)
print("Likelihood of passing:", prediction[0][1])




Task 2: Predict customer churn based on service usage data.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
data = pd.DataFrame({
    'usage_minutes': np.random.randint(0, 1000, size=1000),
    'data_used_gb': np.random.uniform(0, 10, size=1000),
    'churn': np.random.choice([0, 1], size=1000, p=[0.7, 0.3])
})

# Split the dataset into features (X) and target (y)
X = data[['usage_minutes', 'data_used_gb']]  # Features
y = data['churn']  # Target (0 for not churned, 1 for churned)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
rf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Predict customer churn based on service usage data
new_customer = pd.DataFrame({'usage_minutes': [500], 'data_used_gb': [5]})
prediction = rf.predict(new_customer)
print("Customer churn prediction:", prediction[0])




Task 3: Classify if a review is positive or negative using NLP.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate a synthetic dataset
np.random.seed(42)
reviews = np.array([
    "This product is great!",
    "I don't like this product.",
    "The product is amazing!",
    "The product is terrible.",
    "I'm neutral about this product.",
    "This product is fantastic!",
    "I hate this product.",
    "The product is excellent!",
    "The product is awful.",
    "I'm not sure about this product."
] * 10)
labels = np.array([1, 0, 1, 0, 1, 1, 0, 1, 0, 1] * 10)  # 1 for positive, 0 for negative

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.2, random_state=42)

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit the vectorizer to the training data and transform both the training and testing data
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Create a Multinomial Naive Bayes classifier
clf = MultinomialNB()

# Train the classifier
clf.fit(X_train_vectorized, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classify a new review
new_review = ["This product is amazing!"]
new_review_vectorized = vectorizer.transform(new_review)
prediction = clf.predict(new_review_vectorized)
print("Review classification:", prediction[0])


