In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
data_path = "transformed_data.csv"
df = pd.read_csv(data_path)

# Define target variable
target_col = "Y"
features = [col for col in df.columns if col != target_col]

# Define models
models = {
    "LinearRegression": LinearRegression(),
    "DecisionTree": DecisionTreeRegressor(),
    "RandomForest": RandomForestRegressor(),
    "GradientBoosting": GradientBoostingRegressor(),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
    "MLP": MLPRegressor(max_iter=500)
}

# Prepare results storage
results = []

# Iterate through each feature separately
for feature in features:
    X = df[[feature]].dropna()
    y = df.loc[X.index, target_col]  # Ensure target matches available features
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train and evaluate each model
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        
        # Store results
        results.append({
            "Feature": feature,
            "Model": model_name,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "R2": r2
        })

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results)
output_path = "model_comparison_results.csv"
results_df.to_csv(output_path, index=False)

print(f"Results saved to {output_path}")


Results saved to model_comparison_results.csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
data_path = "transformed_data.csv"
df = pd.read_csv(data_path)

# Define target variable
target_col = "Y"
features = [col for col in df.columns if col != target_col]

# Define models
models = {
    "LinearRegression": LinearRegression(),
    "DecisionTree": DecisionTreeRegressor(),
    "RandomForest": RandomForestRegressor(),
    "GradientBoosting": GradientBoostingRegressor(),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
    "MLP": MLPRegressor(max_iter=500)
}

# Prepare results storage
results = []

# Iterate through each feature separately
for feature in features:
    X = df[[feature]].dropna()
    y = df.loc[X.index, target_col]  # Ensure target matches available features
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # Train and evaluate each model
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        
        # Store results
        results.append({
            "Feature": feature,
            "Model": model_name,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "R2": r2
        })

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results)
output_path = "model_comparison_results2.csv"
results_df.to_csv(output_path, index=False)

print(f"Results saved to {output_path}")


Results saved to model_comparison_results2.csv


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
data_path = "transformed_data.csv"
df = pd.read_csv(data_path)

# Define target variable
target_col = "Y"
features = [col for col in df.columns if col != target_col]

# Define models
models = {
    "LinearRegression": LinearRegression(),
    "DecisionTree": DecisionTreeRegressor(),
    "RandomForest": RandomForestRegressor(),
    "GradientBoosting": GradientBoostingRegressor(),
    "SVR": SVR(),
    "KNN": KNeighborsRegressor(),
    "MLP": MLPRegressor(max_iter=500)
}

# Prepare results storage
results = []

# Iterate through each feature separately
for feature in features:
    X = df[[feature]].dropna()
    y = df.loc[X.index, target_col]  # Ensure target matches available features
    
    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.06, random_state=42)
    
    # Train and evaluate each model
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        
        # Store results
        results.append({
            "Feature": feature,
            "Model": model_name,
            "MAE": mae,
            "MSE": mse,
            "RMSE": rmse,
            "R2": r2
        })

# Convert results to DataFrame and save to CSV
results_df = pd.DataFrame(results)
output_path = "model_comparison_results3.csv"
results_df.to_csv(output_path, index=False)

print(f"Results saved to {output_path}")


Results saved to model_comparison_results3.csv
