In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# 1. Load Data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

# 2. Advanced Feature Engineering
def create_features(df):
    df = df.copy()
    # Combined activity metric
    df['total_activity_score'] = (df['hobby_engagement_level'] + 
                                  df['physical_activity_index'] + 
                                  df['creative_expression_index'])
    
    df['support_x_guidance'] = df['support_environment_score'] * df['external_guidance_usage']
    
    df['focus_x_consistency'] = df['focus_intensity'] * df['consistency_score']
    
    return df

# Apply feature engineering
train_fe = create_features(train_df)
test_fe = create_features(test_df)

# 3. Prepare Data
X = train_fe.drop(['participant_id', 'personality_cluster'], axis=1)
y = train_fe['personality_cluster']
X_test = test_fe.drop(['participant_id'], axis=1)

# Encode Target
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 4. Preprocessing
# Identify columns (new features are numeric)
categorical_cols = ['cultural_background']
numerical_cols = [c for c in X.columns if c not in categorical_cols]

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('svc', SVC(C=5, gamma=0.02, kernel='rbf', class_weight='balanced', random_state=42))
])

# 6. Fit and Predict
pipeline.fit(X, y_encoded)
y_pred_encoded = pipeline.predict(X_test)
y_pred = le.inverse_transform(y_pred_encoded)

# 7. Create Submission
submission = pd.DataFrame({
    'participant_id': test_df['participant_id'],
    'personality_cluster': y_pred
})
submission.to_csv('submission_improved_aa_svm1.csv', index=False)
print("Submission saved to submission_improved_svm1.csv")

Submission saved to submission_improved_svm1.csv
