In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
import warnings
warnings.filterwarnings("ignore")

# Load Data
df = pd.read_csv("/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/train.csv")
test_df = pd.read_csv("/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/test.csv")
submission_df = pd.read_csv("/Users/MacbookPro/LocalStorage/Developer/ShellAi/dataset/sample_solution.csv")

# Drop ID
test_ids = test_df['ID']
test_df_features = test_df.drop(columns=['ID'])

# Define top 10 correlated features per BlendProperty based on image
top_corr_features = {
    'BlendProperty1': ['Component5_fraction', 'Component2_fraction', 'Component3_fraction', 'Component1_fraction', 'Component2_Property1', 'Component3_Property1', 'Component5_Property1', 'Component4_Property1', 'Component1_Property1', 'Component1_Property5'],
    'BlendProperty2': ['Component3_fraction', 'Component2_fraction', 'Component4_fraction', 'Component3_Property2', 'Component5_fraction', 'Component2_Property2', 'Component1_fraction', 'Component1_Property2', 'Component4_Property2', 'Component2_Property3'],
    'BlendProperty3': ['Component2_fraction', 'Component3_fraction', 'Component3_Property3', 'Component2_Property3', 'Component4_Property3', 'Component5_fraction', 'Component1_fraction', 'Component2_Property2', 'Component3_Property2', 'Component5_Property3'],
    'BlendProperty4': ['Component5_fraction', 'Component2_fraction', 'Component1_fraction', 'Component3_fraction', 'Component4_fraction', 'Component1_Property4', 'Component3_Property4', 'Component2_Property4', 'Component5_Property4', 'Component4_Property4'],
    'BlendProperty5': ['Component2_fraction', 'Component4_fraction', 'Component3_fraction', 'Component2_Property5', 'Component3_Property5', 'Component5_Property5', 'Component1_fraction', 'Component1_Property5', 'Component5_fraction', 'Component4_Property5'],
    'BlendProperty6': ['Component5_fraction', 'Component2_fraction', 'Component3_fraction', 'Component1_fraction', 'Component4_fraction', 'Component2_Property6', 'Component5_Property6', 'Component1_Property6', 'Component4_Property6', 'Component3_Property6'],
    'BlendProperty7': ['Component2_fraction', 'Component3_fraction', 'Component5_fraction', 'Component1_fraction', 'Component4_fraction', 'Component2_Property7', 'Component3_Property7', 'Component4_Property7', 'Component5_Property7', 'Component1_Property7'],
    'BlendProperty8': ['Component2_fraction', 'Component3_fraction', 'Component4_fraction', 'Component1_fraction', 'Component5_fraction', 'Component1_Property8', 'Component3_Property8', 'Component4_Property8', 'Component2_Property8', 'Component5_Property8'],
    'BlendProperty9': ['Component4_fraction', 'Component5_fraction', 'Component2_fraction', 'Component3_fraction', 'Component1_fraction', 'Component5_Property9', 'Component3_Property9', 'Component4_Property9', 'Component1_Property9', 'Component2_Property9'],
    'BlendProperty10': ['Component4_fraction', 'Component2_fraction', 'Component5_fraction', 'Component3_fraction', 'Component1_fraction', 'Component1_Property10', 'Component2_Property10', 'Component3_Property10', 'Component5_Property10', 'Component4_Property10'],
}

# Define the best model per BlendProperty (based on image correlation)
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)

def get_best_model_for_property(X, y, prop_name):
    if prop_name in ['BlendProperty1', 'BlendProperty2', 'BlendProperty3', 'BlendProperty10']:
        return ElasticNet(alpha=0.1, l1_ratio=0.7, random_state=42).fit(X, y)
    elif prop_name in ['BlendProperty4', 'BlendProperty8']:
        return RandomForestRegressor(n_estimators=200, max_depth=12, random_state=42, n_jobs=-1).fit(X, y)
    elif prop_name in ['BlendProperty5', 'BlendProperty9']:
        return make_pipeline(StandardScaler(), SVR(kernel='rbf', C=2.0, epsilon=0.1)).fit(X, y)
    elif prop_name == 'BlendProperty6':
        return make_pipeline(StandardScaler(), GaussianProcessRegressor(kernel=C(1.0) * RBF(length_scale=1.0), n_restarts_optimizer=7, random_state=42)).fit(X, y)
    elif prop_name == 'BlendProperty7':
        return Lasso(alpha=0.05, random_state=42).fit(X, y)
    else:  # Neural Network for Property10
        model = Sequential([
            Dense(128, activation='relu', input_shape=(X.shape[1],)),
            Dropout(0.3),
            Dense(64, activation='relu'),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mae')
        model.fit(X, y, epochs=200, batch_size=64, verbose=0)
        return model

# Train, Predict, and Submit
for i in range(1, 11):
    prop = f'BlendProperty{i}'
    print(f"\nðŸ”§ Training model for {prop}...")

    features = top_corr_features[prop]  # No weighted properties now
    X_train = df[features]
    y_train = df[prop]
    X_test = test_df_features[features]

    model = get_best_model_for_property(X_train, y_train, prop)

    # Predict
    if isinstance(model, Sequential):
        preds = model.predict(X_test).flatten()
    else:
        preds = model.predict(X_test)

    submission_df[prop] = preds

# Save final CSV
submission_df['ID'] = test_ids
submission_df.to_csv('oka.csv', index=False)
print("\nâœ… Final submission file 'oka.csv' created successfully!")



ðŸ”§ Training model for BlendProperty1...

ðŸ”§ Training model for BlendProperty2...

ðŸ”§ Training model for BlendProperty3...

ðŸ”§ Training model for BlendProperty4...

ðŸ”§ Training model for BlendProperty5...

ðŸ”§ Training model for BlendProperty6...

ðŸ”§ Training model for BlendProperty7...

ðŸ”§ Training model for BlendProperty8...

ðŸ”§ Training model for BlendProperty9...

ðŸ”§ Training model for BlendProperty10...

âœ… Final submission file 'oka.csv' created successfully!
