In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, RandomizedSearchCV

In [2]:
data = pd.read_csv('investments.csv')

In [3]:
data.head()

Unnamed: 0,user_id,user_appetite,user_loan_amount,user_average_loan_risk,user_shares_amount,investment_type,investment_amount,investment_date,investment_period,investment_company,investment_sector
0,b99446cf-471b-49b1-a650-560807b2bbe8,0.49,379772,0.59,1111334,index,945357,04/10/2022,long-term,,retail
1,0e082e18-c5bd-4c2a-b522-afc5ab3d64d8,0.41,972524,0.56,700974,real_estate,850658,3/13/2012,medium-term,,real estate
2,1c289baf-b88e-4512-a232-08109c8595f5,0.8,955523,0.45,1369878,forex,65035,02/07/2018,medium-term,,
3,89fdc23f-7128-4fc7-99bf-4d59ff721e85,0.75,164844,0.97,966576,metals,752566,11/15/2016,short-term,,
4,53532f71-41af-4d12-a050-a59ae567bc4e,0.78,349968,0.16,7083979,index,447950,1/27/2014,medium-term,,retail


In [4]:
data = data.drop(['user_id', 'investment_amount', 'investment_date', 'investment_period', 'investment_company', 'investment_sector'], axis=1)

In [5]:
data.head()

Unnamed: 0,user_appetite,user_loan_amount,user_average_loan_risk,user_shares_amount,investment_type
0,0.49,379772,0.59,1111334,index
1,0.41,972524,0.56,700974,real_estate
2,0.8,955523,0.45,1369878,forex
3,0.75,164844,0.97,966576,metals
4,0.78,349968,0.16,7083979,index


In [6]:
data.isnull().sum()

user_appetite             0
user_loan_amount          0
user_average_loan_risk    0
user_shares_amount        0
investment_type           0
dtype: int64

In [7]:
le = LabelEncoder()
data['investment_type'] = le.fit_transform(data['investment_type'])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102000 entries, 0 to 101999
Data columns (total 5 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   user_appetite           102000 non-null  float64
 1   user_loan_amount        102000 non-null  int64  
 2   user_average_loan_risk  102000 non-null  float64
 3   user_shares_amount      102000 non-null  int64  
 4   investment_type         102000 non-null  int32  
dtypes: float64(2), int32(1), int64(2)
memory usage: 3.5 MB


In [8]:
encoding_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
encoding_mapping

{'crypto': 0,
 'etf': 1,
 'forex': 2,
 'index': 3,
 'metals': 4,
 'real_estate': 5,
 'stocks': 6}

In [9]:
X = data.drop('investment_type', axis=1).values
y = data['investment_type'].values

In [10]:
# Scale the features using Min-Max scaling to ensure non-negativity
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, _, _ = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [11]:
# hyperparameter tuning for NMF
param_grid = {
    'n_components': [3, 4, 5],  # Adjust the range of components as needed
    'init': ['random', 'nndsvd', 'nndsvda', 'nndsvdar', None],
    'max_iter': [200, 400, 600],
    'alpha_W': [0, 0.01, 0.1, 1],
    'l1_ratio': [0, 0.01, 0.1, 1]
}

nmf = NMF()
rand_search = RandomizedSearchCV(nmf, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
rand_search.fit(X_train)

best_n_components = rand_search.best_params_['n_components']
best_init = rand_search.best_params_['init']



In [12]:
rand_search.best_params_

{'n_components': 4,
 'max_iter': 400,
 'l1_ratio': 0.01,
 'init': 'random',
 'alpha_W': 1}

In [13]:
# Step 3: Model Training
model = NMF(n_components=best_n_components, init=best_init)
W = model.fit_transform(X_train)  # User matrix
H = model.components_  # Investment matrix



In [14]:
predicted_affinities = np.dot(X_test, H.T)
mse = mean_squared_error(X_test, np.dot(predicted_affinities, H))

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", np.sqrt(mse))

Mean Squared Error: 0.14654805767067172
Root Mean Squared Error: 0.38281595796240225


In [17]:
# Step 4: Prediction
new_user_input = np.array([0.49, 379772,	0.59, 1111334])  # user_appetite, user_loan_amount, user_average_loan_risk, user_shares_amount
scaled_new_user_input = scaler.transform(new_user_input.reshape(1, -1))
predicted_affinities = np.dot(scaled_new_user_input, H.T)[0]

# Rank the investment types based on predicted affinities
top_indices = np.argsort(predicted_affinities)[::-1]  # Sort in descending order

label_to_investment_type = {v: k for k, v in encoding_mapping.items()}

# Get the top 3 recommended investment types and their ranking
top_3_recommendations = [(label_to_investment_type[idx], predicted_affinities[idx]) for idx in top_indices[:3]]

# Print the recommendations and their ranking
print("Top 3 Recommended Investment Types:")
for i, (investment_type, ranking) in enumerate(top_3_recommendations):
    print(f"{i+1}. {investment_type} (Ranking: {ranking:.2f})")

Top 3 Recommended Investment Types:
1. forex (Ranking: 0.73)
2. crypto (Ranking: 0.52)
3. etf (Ranking: 0.39)


In [23]:
import joblib

# Save the model
joblib.dump(model, 'nmf_model.joblib')

['nmf_model.joblib']

In [24]:
# Save the scaler   
joblib.dump(scaler, 'scaler.joblib')

['scaler.joblib']

In [25]:
# Load the model and scaler
model2 = joblib.load('nmf_model.joblib')
scaler2 = joblib.load('scaler.joblib')

# Predict using the loaded model and scaler
scaled_new_user_input = scaler2.transform(new_user_input.reshape(1, -1))

predicted_affinities = np.dot(scaled_new_user_input, model2.components_.T)[0]

top_indices = np.argsort(predicted_affinities)[::-1]  # Sort in descending order

label_to_investment_type = {v: k for k, v in encoding_mapping.items()}

# Get the top 3 recommended investment types and their ranking
top_3_recommendations = [(label_to_investment_type[idx], predicted_affinities[idx]) for idx in top_indices[:3]]

# Print the recommendations and their ranking
print("Top 3 Recommended Investment Types:")
for i, (investment_type, ranking) in enumerate(top_3_recommendations):
    print(f"{i+1}. {investment_type} (Ranking: {ranking:.2f})")

Top 3 Recommended Investment Types:
1. forex (Ranking: 0.73)
2. crypto (Ranking: 0.52)
3. etf (Ranking: 0.39)
