<a href="https://colab.research.google.com/github/parsashu/ML-on-Proteins/blob/master/Untitled10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve, validation_curve
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import make_pipeline

X_train = np.concatenate([
    pd.read_csv('X1_train.csv').values,
    pd.read_csv('X2_train.csv').values,
    pd.read_csv('X3_train.csv').values,
    pd.read_csv('X4_train.csv').values
], axis=0)

y_train = np.concatenate([
    pd.read_csv('y1_train.csv').values.ravel(),
    pd.read_csv('y2_train.csv').values.ravel(),
    pd.read_csv('y3_train.csv').values.ravel(),
    pd.read_csv('y4_train.csv').values.ravel()
], axis=0)

X_test = np.concatenate([
    pd.read_csv('X1_test.csv').values,
    pd.read_csv('X2_test.csv').values,
    pd.read_csv('X3_test.csv').values,
    pd.read_csv('X4_test.csv').values
], axis=0)

y_test = np.concatenate([
    pd.read_csv('y1_test.csv').values.ravel(),
    pd.read_csv('y2_test.csv').values.ravel(),
    pd.read_csv('y3_test.csv').values.ravel(),
    pd.read_csv('y4_test.csv').values.ravel()
], axis=0)

X = X_train
y = y_train

def plot_validation_curve(model, param_name, param_range, model_name):
    train_scores, val_scores = validation_curve(
        model, X, y,
        param_name=param_name,
        param_range=param_range,
        cv=5,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    train_mean = -np.mean(train_scores, axis=1)
    val_mean = -np.mean(val_scores, axis=1)

    plt.figure(figsize=(8, 6))
    plt.plot(param_range, train_mean, label='Training MSE')
    plt.plot(param_range, val_mean, label='Validation MSE')
    plt.xlabel(param_name)
    plt.ylabel('MSE')
    plt.title(f'Validation Curve - {model_name}')
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_learning_curve(model, model_name):
    train_sizes, train_scores, val_scores = learning_curve(
        model, X, y,
        train_sizes=np.linspace(0.1, 1.0, 10),
        cv=5,
        scoring='neg_mean_squared_error',
        n_jobs=-1
    )
    train_mean = -np.mean(train_scores, axis=1)
    val_mean = -np.mean(val_scores, axis=1)

    plt.figure(figsize=(8, 6))
    plt.plot(train_sizes, train_mean, label='Training MSE')
    plt.plot(train_sizes, val_mean, label='Validation MSE')
    plt.xlabel('Training Set Size')
    plt.ylabel('MSE')
    plt.title(f'Learning Curve - {model_name}')
    plt.legend()
    plt.grid(True)
    plt.show()

models = {
    "Decision Tree": (DecisionTreeRegressor(), "max_depth", range(1, 15)),
    "Random Forest": (RandomForestRegressor(n_estimators=50), "max_depth", range(1, 15)),
    "Gradient Boosting": (GradientBoostingRegressor(), "n_estimators", [10, 50, 100, 200]),
    "SVR": (make_pipeline(StandardScaler(), SVR()), "svr__C", [0.1, 1, 10, 100]),
    "Polynomial Regression": (make_pipeline(PolynomialFeatures(), LinearRegression()), "polynomialfeatures__degree", range(1, 8))
}

for model_name, (model, param_name, param_range) in models.items():
    print(f"Processing {model_name}...")
    plot_validation_curve(model, param_name, param_range, model_name)

    if model_name == "Polynomial Regression":
        final_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
    elif model_name == "SVR":
        final_model = make_pipeline(StandardScaler(), SVR(C=10))
    elif model_name == "Decision Tree":
        final_model = DecisionTreeRegressor(max_depth=5)
    elif model_name == "Random Forest":
        final_model = RandomForestRegressor(n_estimators=50, max_depth=5)
    elif model_name == "Gradient Boosting":
        final_model = GradientBoostingRegressor(n_estimators=100)

    plot_learning_curve(final_model, model_name)
