In [7]:
import pandas as pd
import numpy as np
from sklearn.ensemble import StackingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, RepeatedKFold, cross_val_score
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings("ignore")
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [19]:
import pandas as pd
import numpy as np
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
from sklearn.pipeline import make_pipeline

# Load and preprocess data
column_names = [
    'Sex', 'Length', 'Diameter', 'Height',
    'WholeWeight', 'ShuckedWeight', 'VisceraWeight', 'ShellWeight', 'Rings'
]
file_path = "abalone/abalone.data"
df = pd.read_csv(file_path, header=None, names=column_names)

# Bin 'Rings' into classes
def classify_age(rings):
    if rings <= 8:
        return 'Young'
    elif rings <= 10:
        return 'Adult'
    else:
        return 'Old'

df['AgeClass'] = df['Rings'].apply(classify_age)

# One-hot encode 'Sex'
df = pd.get_dummies(df, columns=['Sex'], drop_first=True)

# Label encode age class
le = LabelEncoder()
df['AgeLabel'] = le.fit_transform(df['AgeClass'])  # Young=2, Adult=0, Old=1 (or similar)

# Features and target
X = df.drop(columns=['Rings', 'AgeClass', 'AgeLabel'])
y = df['AgeLabel']

# Define base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=1)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=5))),
    ('logreg', make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000)))
]

# Meta-model
meta_model = LogisticRegression(max_iter=1000)

# Build stacking classifier
stack_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,
    passthrough=True,
    cv=5,
    n_jobs=-1
)

# Cross-validation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(stack_clf, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

# Report
print("Stacking Classifier Accuracy:", np.mean(scores))


Stacking Classifier Accuracy: 0.6597202620678576
Stacking Classifier Accuracy: 0.6597202620678576


In [3]:
column_names = [
    'Sex', 'Length', 'Diameter', 'Height',
    'WholeWeight', 'ShuckedWeight', 'VisceraWeight', 'ShellWeight', 'Rings'
]

# Path to uploaded file
file_path = "abalone/abalone.data"

# Load the dataset with column names
abalone_df = pd.read_csv(file_path, header=None, names=column_names)

# Display the first few rows
abalone_df

Unnamed: 0,Sex,Length,Diameter,Height,WholeWeight,ShuckedWeight,VisceraWeight,ShellWeight,Rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [10]:
# One-hot encode 'Sex'
df = pd.get_dummies(abalone_df, columns=['Sex'], drop_first=True)

# Define X and y
X = df.drop('Rings', axis=1)
y = df['Rings']

# Define base regressors
base_models = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=1)),
    ('knn', make_pipeline(StandardScaler(), KNeighborsRegressor(n_neighbors=5))),
    ('gbr', GradientBoostingRegressor(n_estimators=100, random_state=1)),
    
]

# Define the meta-learner
meta_model = RidgeCV()

# Build Stacking Regressor
stack_model = StackingRegressor(estimators=base_models, final_estimator=meta_model,cv=5, n_jobs=-1, passthrough=True)

# Cross-validation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
scores = cross_val_score(stack_model, X, y, scoring='r2', cv=cv, n_jobs=-1)

# Report
print("Cross-validation accuracy:", scores.mean())


Cross-validation accuracy: 0.5648222786046015
Cross-validation accuracy: 0.5648222786046015
