In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('/content/Health dataset.csv')

# Function to convert percentage strings to floats
def convert_percentage_to_float(percentage_string):
    if isinstance(percentage_string, str):
        return float(percentage_string.strip('%')) / 100
    return percentage_string

# Apply the conversion function to all columns in the features dataframe
features = data[['Liquidity Ratio', 'Profitability Ratio', 'Solvency Ratio', 'Efficiency Ratio',
                 'Market Growth', 'GDP Growth', 'Competitor Profitability']].applymap(convert_percentage_to_float)

# Define the target
target = data['Financial Health Score']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R2 Score: {r2}')

# Feature importance
importances = model.feature_importances_
feature_names = features.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)
print(importance_df)


  'Market Growth', 'GDP Growth', 'Competitor Profitability']].applymap(convert_percentage_to_float)


Mean Squared Error: 0.10048900000000462
R2 Score: nan
                    Feature  Importance
1       Profitability Ratio    0.211538
2            Solvency Ratio    0.173077
5                GDP Growth    0.153846
6  Competitor Profitability    0.153846
0           Liquidity Ratio    0.134615
4             Market Growth    0.096154
3          Efficiency Ratio    0.076923


