In [12]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
import joblib
def best_model(X_train, y_train, model_path):
# Define the models and their parameter grids
    param_grids = {
        "Lasso Regression": {
            "model": Lasso(),
            "params": {"alpha": [0.01, 0.1, 1, 10, 100]}
        },
        "Random Forest Regressor": {
            "model": RandomForestRegressor(),
            "params": {
                "n_estimators": [50, 100, 200],
                "max_depth": [None, 10, 20],
                "min_samples_split": [2, 5, 10]
            }
        },
        "XGBoost": {
            "model": XGBRegressor(),
            "params": {
                "n_estimators": [50, 100],
                "learning_rate": [0.01, 0.1],
                "max_depth": [3, 5]
            }
        },
        "Support Vector Regression": {
            "model": SVR(),
            "params": {
                "C": [0.1, 1, 10],
                "epsilon": [0.01, 0.1],
                "kernel": ["linear", "rbf"]
            }
        }
    }
    #perform grid search
    best_models={}
    for model_name, config in param_grids.items():
        grid_search=GridSearchCV(
            estimator=config["model"],
            param_grid=config["params"],
            scoring="neg_mean_squared_error",
            cv=5,
            n_jobs=-1
        )
        grid_search.fit(X_train, y_train)
        best_models[model_name]=grid_search.best_estimator_
        print(f"Best params for {model_name}: {grid_search.best_params_}")
        #save best model with best params
        joblib.dump(grid_search.best_estimator_, f"{model_path}/{model_name.replace(' ', '_')}.pkl")
    return best_models

In [1]:
import statsmodels.api as sm
import pandas as pd

X_train=pd.read_excel("~/Library/CloudStorage/OneDrive-国立大学法人東海国立大学機構/Weekly_challenges/Data science and Analytics/Japan_Life_Expectency/data/processed/X_train_best.xlsx")
y_train=pd.read_excel("~/Library/CloudStorage/OneDrive-国立大学法人東海国立大学機構/Weekly_challenges/Data science and Analytics/Japan_Life_Expectency/data/processed/y_train.xlsx")

#evaluate the model's performance

# Add a constant for intercept
X_train_sm = sm.add_constant(X_train)

# Fit the model using statsmodels
ols_model = sm.OLS(y_train, X_train_sm).fit()

# Summary of the model
print(ols_model.summary())


                            OLS Regression Results                            
Dep. Variable:        Life_expectancy   R-squared:                       0.407
Model:                            OLS   Adj. R-squared:                  0.372
Method:                 Least Squares   F-statistic:                     11.65
Date:                Sun, 29 Dec 2024   Prob (F-statistic):           0.000140
Time:                        00:04:36   Log-Likelihood:                -18.090
No. Observations:                  37   AIC:                             42.18
Df Residuals:                      34   BIC:                             47.01
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const           81.7617      0.789    103.643   

In [None]:
import statsmodels.api as sm
import pandas as pd

X_train=pd.read_excel("~/Library/CloudStorage/OneDrive-国立大学法人東海国立大学機構/Weekly_challenges/Data science and Analytics/Japan_Life_Expectency/data/processed/X_train_best.xlsx")
y_train=pd.read_excel("~/Library/CloudStorage/OneDrive-国立大学法人東海国立大学機構/Weekly_challenges/Data science and Analytics/Japan_Life_Expectency/data/processed/y_train.xlsx")

#evaluate the model's performance

# Add a constant for intercept
X_train_sm = sm.add_constant(X_train)

# Fit the model using statsmodels
ols_model = sm.OLS(y_train, X_train_sm).fit()

# Summary of the model
print(ols_model.summary())
