In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Load the dataset
df = pd.read_csv("mobile price classification.csv")

# Prepare X and y
X = df.drop(['price_range'], axis=1)
y = df['price_range']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models and their respective parameter grids for regression
models = {
    'Linear Regression': (LinearRegression(), {}),
    'Ridge Regression': (Ridge(), {'alpha': [0.1, 1.0, 10.0]}),
    'Lasso Regression': (Lasso(), {'alpha': [0.1, 1.0, 10.0]}),
    'Decision Tree Regressor': (DecisionTreeRegressor(), {
        'max_depth': [None, 5, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2']
    })
}

# Iterate over models, perform GridSearchCV, and print results
for model_name, (model, param_grid) in models.items():
    print(f"Performing GridSearchCV for {model_name}...")
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)
    
    # Print best parameters and best score
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best negative mean squared error for {model_name}: {grid_search.best_score_}")

    # Evaluate model on test set
    y_pred = grid_search.predict(X_test)
    test_mse = mean_squared_error(y_test, y_pred)
    print(f"Test mean squared error for {model_name}: {test_mse}\n")


Performing GridSearchCV for Linear Regression...
Best parameters for Linear Regression: {}
Best negative mean squared error for Linear Regression: -0.10344002119259903
Test mean squared error for Linear Regression: 0.10469290211930678

Performing GridSearchCV for Ridge Regression...
Best parameters for Ridge Regression: {'alpha': 10.0}
Best negative mean squared error for Ridge Regression: -0.1033959796230973
Test mean squared error for Ridge Regression: 0.10466568098230024

Performing GridSearchCV for Lasso Regression...
Best parameters for Lasso Regression: {'alpha': 0.1}
Best negative mean squared error for Lasso Regression: -0.102420907343684
Test mean squared error for Lasso Regression: 0.10489969890495524

Performing GridSearchCV for Decision Tree Regressor...
Best parameters for Decision Tree Regressor: {'max_depth': None, 'max_features': None, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best negative mean squared error for Decision Tree Regressor: -0.13838669217687075
Test m

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv("mobile price classification.csv")

# Prepare X and y
X = df.drop(['price_range'], axis=1)
y = df['price_range']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define models and their respective parameter grids
models = {
    'RandomForestClassifier': (RandomForestClassifier(), {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 5, 10],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'bootstrap': [True, False]
    }),
    'SVC': (SVC(), {
        'C': [0.1, 1, 10, 100],
        'kernel': ['linear', 'poly', 'rbf`', 'sigmoid']
    }),
    'KNeighborsClassifier': (KNeighborsClassifier(), {
        'n_neighbors': [3, 5, 7, 9],
        'weights': ['uniform', 'distance'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
    }),
    'DecisionTreeClassifier': (DecisionTreeClassifier(), {
        'criterion': ['gini', 'entropy'],
        'max_depth': [None, 5, 10, 20],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': [None, 'sqrt', 'log2']
    }),
    'XGBoost': (XGBClassifier(), {
        'learning_rate': [0.01, 0.05, 0.1],
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 7],
        'min_child_weight': [1, 3, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'reg_alpha': [0.0, 0.1, 0.5],
        'reg_lambda': [0.0, 0.1, 0.5]
    })
}

# Iterate over models, perform GridSearchCV, and print results
for model_name, (model, param_grid) in models.items():
    print(f"Performing GridSearchCV for {model_name}...")
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    
    # Print best parameters and best score
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")
    print(f"Best accuracy score for {model_name}: {grid_search.best_score_}")

    # Evaluate model on test set
    y_pred = grid_search.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)
    print(f"Test accuracy score for {model_name}: {test_accuracy}\n")


Performing GridSearchCV for RandomForestClassifier...
Best parameters for RandomForestClassifier: {'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 300}
Best accuracy score for RandomForestClassifier: 0.88125
Test accuracy score for RandomForestClassifier: 0.9

Performing GridSearchCV for SVC...
Best parameters for SVC: {'C': 100, 'kernel': 'linear'}
Best accuracy score for SVC: 0.97625
Test accuracy score for SVC: 0.9725

Performing GridSearchCV for KNeighborsClassifier...
Best parameters for KNeighborsClassifier: {'algorithm': 'auto', 'n_neighbors': 9, 'weights': 'distance'}
Best accuracy score for KNeighborsClassifier: 0.9268750000000001
Test accuracy score for KNeighborsClassifier: 0.955

Performing GridSearchCV for DecisionTreeClassifier...
Best parameters for DecisionTreeClassifier: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 2, 'min_samples_split': 5}
Best accuracy score for DecisionTr