In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/Patient-Recovery-Prediction-Challenge/sample_submission.csv
/kaggle/input/Patient-Recovery-Prediction-Challenge/train.csv
/kaggle/input/Patient-Recovery-Prediction-Challenge/test.csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Load the data
train_df = pd.read_csv('/kaggle/input/Patient-Recovery-Prediction-Challenge/train.csv')
test_df = pd.read_csv('/kaggle/input/Patient-Recovery-Prediction-Challenge/test.csv')

# Remove Id column from train, save test Ids for submission
train_df = train_df.drop('Id', axis=1)
test_ids = test_df['Id']
test_df = test_df.drop('Id', axis=1)

# Convert Lifestyle Activities to binary (Yes=1, No=0)
train_df['Lifestyle Activities_Yes'] = (train_df['Lifestyle Activities'] == 'Yes').astype(int)
train_df = train_df.drop('Lifestyle Activities', axis=1)

test_df['Lifestyle Activities_Yes'] = (test_df['Lifestyle Activities'] == 'Yes').astype(int)
test_df = test_df.drop('Lifestyle Activities', axis=1)

# Separate features and target
X_train = train_df.drop('Recovery Index', axis=1)
y_train = train_df['Recovery Index']
X_test = test_df

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Lasso Regression
lasso = Lasso(alpha=1.0, random_state=42, max_iter=10000)
lasso.fit(X_train_scaled, y_train)

# Make predictions
y_pred_train = lasso.predict(X_train_scaled)
y_pred_test = lasso.predict(X_test_scaled)

# Evaluate the model on training data
print("=" * 50)
print("LASSO REGRESSION RESULTS")
print("=" * 50)
print(f"\nAlpha (Regularization Parameter): {lasso.alpha}")
print(f"\nTraining Set Performance:")
print(f"  R² Score: {r2_score(y_train, y_pred_train):.4f}")
print(f"  RMSE: {np.sqrt(mean_squared_error(y_train, y_pred_train)):.4f}")
print(f"  MAE: {mean_absolute_error(y_train, y_pred_train):.4f}")

print(f"\n" + "=" * 50)
print("Feature Coefficients:")
print("=" * 50)
feature_importance = pd.DataFrame({
    'Feature': X_train.columns,
    'Coefficient': lasso.coef_
}).sort_values('Coefficient', key=abs, ascending=False)
print(feature_importance.to_string(index=False))

print(f"\nIntercept: {lasso.intercept_:.4f}")
print(f"\nNumber of non-zero coefficients: {np.sum(lasso.coef_ != 0)}")

# Create submission file
submission = pd.DataFrame({
    'Id': test_ids,
    'Recovery Index': y_pred_test
})
submission.to_csv('lasso_submission.csv', index=False)
print(f"\n✓ Submission file saved as 'lasso_submission.csv'")
print(f"✓ Number of predictions: {len(y_pred_test)}")

LASSO REGRESSION RESULTS

Alpha (Regularization Parameter): 1.0

Training Set Performance:
  R² Score: 0.9804
  RMSE: 2.6891
  MAE: 2.1516

Feature Coefficients:
                 Feature  Coefficient
    Initial Health Score    16.632939
           Therapy Hours     6.405074
     Average Sleep Hours     0.000000
      Follow-Up Sessions     0.000000
Lifestyle Activities_Yes     0.000000

Intercept: 55.3115

Number of non-zero coefficients: 2

✓ Submission file saved as 'lasso_submission.csv'
✓ Number of predictions: 2000


In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Load the data
train_df = pd.read_csv('/kaggle/input/Patient-Recovery-Prediction-Challenge/train.csv')
test_df = pd.read_csv('/kaggle/input/Patient-Recovery-Prediction-Challenge/test.csv')

# Remove Id column from train, save test Ids for submission
train_df = train_df.drop('Id', axis=1)
test_ids = test_df['Id']
test_df = test_df.drop('Id', axis=1)

# Convert Lifestyle Activities to binary (Yes=1, No=0)
train_df['Lifestyle Activities_Yes'] = (train_df['Lifestyle Activities'] == 'Yes').astype(int)
train_df = train_df.drop('Lifestyle Activities', axis=1)

test_df['Lifestyle Activities_Yes'] = (test_df['Lifestyle Activities'] == 'Yes').astype(int)
test_df = test_df.drop('Lifestyle Activities', axis=1)

# Separate features and target
X_train = train_df.drop('Recovery Index', axis=1)
y_train = train_df['Recovery Index']
X_test = test_df

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train Ridge Regression
ridge = Ridge(alpha=1.0, random_state=42)
ridge.fit(X_train_scaled, y_train)

# Make predictions
y_pred_train = ridge.predict(X_train_scaled)
y_pred_test = ridge.predict(X_test_scaled)

# Evaluate the model on training data
print("=" * 50)
print("RIDGE REGRESSION RESULTS")
print("=" * 50)
print(f"\nAlpha (Regularization Parameter): {ridge.alpha}")
print(f"\nTraining Set Performance:")
print(f"  R² Score: {r2_score(y_train, y_pred_train):.4f}")
print(f"  RMSE: {np.sqrt(mean_squared_error(y_train, y_pred_train)):.4f}")
print(f"  MAE: {mean_absolute_error(y_train, y_pred_train):.4f}")

print(f"\n" + "=" * 50)
print("Feature Coefficients:")
print("=" * 50)
feature_importance = pd.DataFrame({
    'Feature': X_train.columns,
    'Coefficient': ridge.coef_
}).sort_values('Coefficient', key=abs, ascending=False)
print(feature_importance.to_string(index=False))

print(f"\nIntercept: {ridge.intercept_:.4f}")

# Create submission file
submission = pd.DataFrame({
    'Id': test_ids,
    'Recovery Index': y_pred_test
})
submission.to_csv('ridge_submission.csv', index=False)
print(f"\n✓ Submission file saved as 'ridge_submission.csv'")
print(f"✓ Number of predictions: {len(y_pred_test)}")

RIDGE REGRESSION RESULTS

Alpha (Regularization Parameter): 1.0

Training Set Performance:
  R² Score: 0.9887
  RMSE: 2.0420
  MAE: 1.6193

Feature Coefficients:
                 Feature  Coefficient
    Initial Health Score    17.635057
           Therapy Hours     7.400393
     Average Sleep Hours     0.809936
      Follow-Up Sessions     0.548810
Lifestyle Activities_Yes     0.304260

Intercept: 55.3115

✓ Submission file saved as 'ridge_submission.csv'
✓ Number of predictions: 2000
