In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the red wine dataset
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')

# Prepare the data
X_red = red_wine.drop(columns=['quality'])
y_red = red_wine['quality']

# Split the data into training and test sets
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_red, test_size=0.2, random_state=42)

# Standardize the data
scaler_red = StandardScaler()
X_train_red = scaler_red.fit_transform(X_train_red)
X_test_red = scaler_red.transform(X_test_red)

# Train the Random Forest regressor
rf_regressor_red = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor_red.fit(X_train_red, y_train_red)

# Predict and evaluate
y_pred_train_red = rf_regressor_red.predict(X_train_red)
y_pred_test_red = rf_regressor_red.predict(X_test_red)

print("Red Wine Random Forest Regressor Train RMSE:", mean_squared_error(y_train_red, y_pred_train_red, squared=False))
print("Red Wine Random Forest Regressor Test RMSE:", mean_squared_error(y_test_red, y_pred_test_red, squared=False))
print("Red Wine Random Forest Regressor Train R2:", r2_score(y_train_red, y_pred_train_red))
print("Red Wine Random Forest Regressor Test R2:", r2_score(y_test_red, y_pred_test_red))


Red Wine Random Forest Regressor Train RMSE: 0.21834135571468008
Red Wine Random Forest Regressor Test RMSE: 0.5483250062690921
Red Wine Random Forest Regressor Train R2: 0.9267238056087482
Red Wine Random Forest Regressor Test R2: 0.5399271357910311


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle

# Load datasets
df_red = pd.read_csv('winequality-red.csv', sep=';')
df_white = pd.read_csv('winequality-white.csv', sep=';')

# Function to train and save a model
def train_and_save_model(df, model_filename):
    # Split data into features and target
    X = df.drop('quality', axis=1)
    y = df['quality']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predict and evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mse ** 0.5
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f'Model for {model_filename}')
    print(f'Mean Squared Error: {mse}')
    print(f'Root Mean Squared Error: {rmse}')
    print(f'Mean Absolute Error: {mae}')
    print(f'R-squared: {r2}')

    # Save the trained model
    with open(model_filename, 'wb') as f:
        pickle.dump(model, f)

# Train and save models for red and white wine
train_and_save_model(df_red, 'model_red.pkl')
train_and_save_model(df_white, 'model_white.pkl')


Model for model_red.pkl
Mean Squared Error: 0.30123812499999997
Root Mean Squared Error: 0.5488516420673258
Mean Absolute Error: 0.4224375
R-squared: 0.5390429623873638
Model for model_white.pkl
Mean Squared Error: 0.34775581632653063
Root Mean Squared Error: 0.5897082467852477
Mean Absolute Error: 0.4185204081632653
R-squared: 0.5509775612930288
