In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

# Load and preprocess data
def preprocess_data(filepath):
    df = pd.read_csv(filepath)
    # Perform cleaning, encoding, etc.
    df.fillna(0, inplace=True)
    # Dummy encode categorical features like 'Category'
    df = pd.get_dummies(df, drop_first=True)
    return df

# Load the data
df = preprocess_data(r"C:\Users\admin\Desktop\ecommerce-price-prediction\data\ecommerce_dataset.csv")

# Define the features (X) and target (y)
X = df.drop(columns=['Price'])  # All columns except 'Price'
y = df['Price']  # Target column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Ridge, Lasso, and ElasticNet models and evaluate them
def train_and_evaluate_models(X_train, X_test, y_train, y_test):
    models = {
        'Ridge': Ridge(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet()
    }
    
    r2_scores = {}

    for name, model in models.items():
        # Train the model
        model.fit(X_train, y_train)
        # Make predictions
        y_pred = model.predict(X_test)
        # Calculate R-squared score
        r2 = r2_score(y_test, y_pred)
        r2_scores[name] = r2
        print(f"{name} R-squared: {r2:.4f}")

    return r2_scores

# Call the function to train and evaluate the models
r2_scores = train_and_evaluate_models(X_train_scaled, X_test_scaled, y_train, y_test)

# Determine the best model based on R-squared score
best_model = max(r2_scores, key=r2_scores.get)
print(f"\nBest model based on R-squared score: {best_model} with R-squared = {r2_scores[best_model]:.4f}")


Ridge R-squared: 0.7118
Lasso R-squared: -1.1493
ElasticNet R-squared: 0.5928

Best model based on R-squared score: Ridge with R-squared = 0.7118
