In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

# Load dataset
file_path = "Sample_Investability_Index_Dataset.csv"
df = pd.read_csv(file_path)

# Define allocated credits for each parameter
allocated_credits = {
    "Market Growth Potential": 12,
    "Profitability": 10,
    "Competitive Advantage": 10,
    "Management Quality": 8,
    "Innovation and R&D": 8,
    "Regulatory Environment": 6,
    "Financial Stability": 4,
    "Sustainability and ESG": 2
}

# Compute Investability Index Score (Corrected Calculation)
df["Investability Index"] = df.apply(
    lambda row: sum(row[param] * allocated_credits[param] for param in allocated_credits) / sum(allocated_credits.values()), axis=1
)

# Define features (X) and target (y)
features = list(allocated_credits.keys())  # Use parameter names as features
X = df[features]
y = df["Investability Index"]

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model (Random Forest Regressor)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Model Mean Squared Error: {mse}")

# Save the trained model
joblib.dump(model, "investability_model.pkl")
print("Model saved as investability_model.pkl")


Model Mean Squared Error: 0.17489566666666584
Model saved as investability_model.pkl
