Given a CSV file with data about customers (features) and  their churn status (target), split the data into training and  testing sets


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Assuming you have CSV files named:
# 'customer_churn.csv', 'student_grades.csv', 'housing_prices.csv'
# with features and a target column named 'churn', 'grade', and 'price' respectively.

# ------------------------------------------------------------------------------
# 1. Split data into training and testing sets (Customer Churn)
# ------------------------------------------------------------------------------
def split_customer_data(file_path="customer_churn.csv", target_column="churn", test_size=0.2, random_state=42):
    """Splits customer data into training and testing sets."""
    try:
        df = pd.read_csv(file_path)
        X = df.drop(columns=[target_column])
        y = df[target_column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
        print("Customer churn data split successfully.")
        print(f"Training set shape: {X_train.shape}, {y_train.shape}")
        print(f"Testing set shape: {X_test.shape}, {y_test.shape}")
        return X_train, X_test, y_train, y_test
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None, None, None, None

# Example usage:
X_train_churn, X_test_churn, y_train_churn, y_test_churn = split_customer_data()


Implement a program that uses Scikit-Learn to train a  Decision Tree classifier on a dataset


In [None]:

# ------------------------------------------------------------------------------
# 2. Train a Decision Tree classifier (Customer Churn Data)
# ------------------------------------------------------------------------------
def train_decision_tree(X_train, y_train, random_state=42):
    """Trains a Decision Tree classifier."""
    if X_train is None or y_train is None:
        print("Error: Training data not available.")
        return None
    model = DecisionTreeClassifier(random_state=random_state)
    model.fit(X_train, y_train)
    print("Decision Tree classifier trained.")
    return model

# Example usage:
dt_model = train_decision_tree(X_train_churn, y_train_churn)

if dt_model:
    y_pred_dt = dt_model.predict(X_test_churn)
    accuracy_dt = accuracy_score(y_test_churn, y_pred_dt)
    print(f"Decision Tree Accuracy on test set: {accuracy_dt:.4f}")


Write a Python program that uses Scikit-Learn to perform k fold cross-validation on a dataset


In [None]:

# ------------------------------------------------------------------------------
# 3. Perform k-fold cross-validation (Customer Churn Data)
# ------------------------------------------------------------------------------
def perform_kfold_cv(X, y, model, k=5):
    """Performs k-fold cross-validation."""
    if X is None or y is None:
        print("Error: Data not available for cross-validation.")
        return None
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    accuracies = []
    for fold, (train_index, val_index) in enumerate(kf.split(X, y)):
        X_train_fold, X_val_fold = X.iloc[train_index], X.iloc[val_index]
        y_train_fold, y_val_fold = y.iloc[train_index], y.iloc[val_index]
        model.fit(X_train_fold, y_train_fold)
        y_pred_fold = model.predict(X_val_fold)
        accuracy = accuracy_score(y_val_fold, y_pred_fold)
        accuracies.append(accuracy)
        print(f"Fold {fold+1} Accuracy: {accuracy:.4f}")
    mean_accuracy = sum(accuracies) / len(accuracies)
    print(f"Mean Cross-Validation Accuracy ({k}-fold): {mean_accuracy:.4f}")
    return mean_accuracy

# Example usage (using the Decision Tree model):
if X_train_churn is not None and y_train_churn is not None:
    df_churn = pd.read_csv("customer_churn.csv")
    X_churn_all = df_churn.drop(columns=["churn"])
    y_churn_all = df_churn["churn"]
    perform_kfold_cv(X_churn_all, y_churn_all, DecisionTreeClassifier(random_state=42))


Create a function that takes a Pandas DataFrame and trains  a Random Forest classifier on the data


In [None]:

# ------------------------------------------------------------------------------
# 4. Train a Random Forest classifier (using a DataFrame)
# ------------------------------------------------------------------------------
def train_random_forest(df, target_column="target", random_state=42):
    """Trains a Random Forest classifier on a Pandas DataFrame."""
    try:
        X = df.drop(columns=[target_column])
        y = df[target_column]
        model = RandomForestClassifier(random_state=random_state)
        model.fit(X, y)
        print("Random Forest classifier trained.")
        return model
    except KeyError:
        print(f"Error: Target column '{target_column}' not found in DataFrame.")
        return None

# Example usage (assuming you have a DataFrame named 'customer_df'):
# customer_df = pd.read_csv("customer_churn.csv")
# rf_model = train_random_forest(customer_df, target_column="churn")
# if rf_model and X_test_churn is not None and y_test_churn is not None:
#     y_pred_rf = rf_model.predict(X_test_churn)
#     accuracy_rf = accuracy_score(y_test_churn, y_pred_rf)
#     print(f"Random Forest Accuracy on test set: {accuracy_rf:.4f}")


Given a CSV file with data about student scores (features)  and their grades (target), split the data into training and  testing sets


In [None]:

# ------------------------------------------------------------------------------
# 5. Split data into training and testing sets (Student Grades)
# ------------------------------------------------------------------------------
def split_student_data(file_path="student_grades.csv", target_column="grade", test_size=0.2, random_state=42):
    """Splits student grades data into training and testing sets."""
    try:
        df = pd.read_csv(file_path)
        X = df.drop(columns=[target_column])
        y = df[target_column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
        print("Student grades data split successfully.")
        print(f"Training set shape: {X_train.shape}, {y_train.shape}")
        print(f"Testing set shape: {X_test.shape}, {y_test.shape}")
        return X_train, X_test, y_train, y_test
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None, None, None, None

# Example usage:
X_train_student, X_test_student, y_train_student, y_test_student = split_student_data()


Implement a program that uses Scikit-Learn to train a  Support Vector Machine (SVM) classifier on a dataset


In [None]:

# ------------------------------------------------------------------------------
# 6. Train a Support Vector Machine (SVM) classifier (Student Grades Data)
# ------------------------------------------------------------------------------
def train_svm(X_train, y_train, random_state=42):
    """Trains a Support Vector Machine (SVM) classifier."""
    if X_train is None or y_train is None:
        print("Error: Training data not available.")
        return None
    model = SVC(random_state=random_state)
    model.fit(X_train, y_train)
    print("Support Vector Machine (SVM) classifier trained.")
    return model

# Example usage:
svm_model = train_svm(X_train_student, y_train_student)

if svm_model:
    y_pred_svm = svm_model.predict(X_test_student)
    accuracy_svm = accuracy_score(y_test_student, y_pred_svm)
    print(f"SVM Accuracy on test set: {accuracy_svm:.4f}")


Write a Python program that uses Scikit-Learn to perform  hyperparameter tuning using Grid Search on a dataset


In [None]:

# ------------------------------------------------------------------------------
# 7. Perform hyperparameter tuning using Grid Search (Student Grades Data)
# ------------------------------------------------------------------------------
def perform_grid_search(X_train, y_train, model, param_grid, cv=5, scoring='accuracy'):
    """Performs hyperparameter tuning using GridSearchCV."""
    if X_train is None or y_train is None:
        print("Error: Training data not available for Grid Search.")
        return None
    grid_search = GridSearchCV(model, param_grid, cv=cv, scoring=scoring)
    grid_search.fit(X_train, y_train)
    print("Grid Search completed.")
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best cross-validation score: {grid_search.best_score_:.4f}")
    return grid_search.best_estimator_

# Example usage (for SVM):
if X_train_student is not None and y_train_student is not None:
    param_grid_svm = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
    best_svm_model = perform_grid_search(X_train_student, y_train_student, SVC(random_state=42), param_grid_svm)
    if best_svm_model and X_test_student is not None and y_test_student is not None:
        y_pred_best_svm = best_svm_model.predict(X_test_student)
        accuracy_best_svm = accuracy_score(y_test_student, y_pred_best_svm)
        print(f"Best SVM Accuracy on test set: {accuracy_best_svm:.4f}")


Create a function that takes a Pandas DataFrame and trains  a k-nearest neighbors (KNN) classifier on the data


In [None]:

# ------------------------------------------------------------------------------
# 8. Train a k-nearest neighbors (KNN) classifier (using a DataFrame)
# ------------------------------------------------------------------------------
def train_knn(df, target_column="target", n_neighbors=5):
    """Trains a k-nearest neighbors (KNN) classifier on a Pandas DataFrame."""
    try:
        X = df.drop(columns=[target_column])
        y = df[target_column]
        model = KNeighborsClassifier(n_neighbors=n_neighbors)
        model.fit(X, y)
        print(f"K-Nearest Neighbors (KNN) classifier trained with n_neighbors={n_neighbors}.")
        return model
    except KeyError:
        print(f"Error: Target column '{target_column}' not found in DataFrame.")
        return None

# Example usage (assuming you have a DataFrame named 'student_df'):
# student_df = pd.read_csv("student_grades.csv")
# knn_model = train_knn(student_df, target_column="grade", n_neighbors=3)
# if knn_model and X_test_student is not None and y_test_student is not None:
#     y_pred_knn = knn_model.predict(X_test_student)
#     accuracy_knn = accuracy_score(y_test_student, y_pred_knn)
#     print(f"KNN Accuracy on test set: {accuracy_knn:.4f}")


Given a CSV file with data about housing prices (features)  and their labels (target), split the data into training and  testing sets


In [None]:

# ------------------------------------------------------------------------------
# 9. Split data into training and testing sets (Housing Prices)
# ------------------------------------------------------------------------------
def split_housing_data(file_path="housing_prices.csv", target_column="price", test_size=0.2, random_state=42):
    """Splits housing prices data into training and testing sets."""
    try:
        df = pd.read_csv(file_path)
        X = df.drop(columns=[target_column])
        y = df[target_column]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
        print("Housing prices data split successfully.")
        print(f"Training set shape: {X_train.shape}, {y_train.shape}")
        print(f"Testing set shape: {X_test.shape}, {y_test.shape}")
        return X_train, X_test, y_train, y_test
    except FileNotFoundError:
        print(f"Error: File not found at {file_path}")
        return None, None, None, None

# Example usage:
X_train_housing, X_test_housing, y_train_housing, y_test_housing = split_housing_data()


Implement a program that uses Scikit-Learn to train a Naive  Bayes classifier on a dataset.


In [None]:

# ------------------------------------------------------------------------------
# 10. Train a Naive Bayes classifier (Housing Prices Data - assuming it's a classification task)
#     Note: Housing prices are typically a regression task. For demonstration,
#     we'll assume the 'price' column has been binned or treated as categories.
# ------------------------------------------------------------------------------
def train_naive_bayes(X_train, y_train):
    """Trains a Naive Bayes classifier."""
    if X_train is None or y_train is None:
        print("Error: Training data not available.")
        return None
    model = GaussianNB()
    model.fit(X_train, y_train)
    print("Naive Bayes classifier trained.")
    return model

# Example usage:
nb_model = train_naive_bayes(X_train_housing, y_train_housing)

if nb_model:
    y_pred_nb = nb_model.predict(X_test_housing)
    accuracy_nb = accuracy_score(y_test_housing, y_pred_nb)
    print(f"Naive Bayes Accuracy on test set: {accuracy_nb:.4f}")