In [1]:
import numpy as np
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#Load data
data_user_30222 = pd.read_csv('/Users/sriskandhakandimalla/Documents/PSYCH119/LumositySample_user30222.csv')
data_user_48567 = pd.read_csv('/Users/sriskandhakandimalla/Documents/PSYCH119/LumositySample_user48567.csv')
data_user_144155 = pd.read_csv('/Users/sriskandhakandimalla/Documents/PSYCH119/LumositySample_user144155.csv')


In [3]:
# Combine the data and clean it
data_user_30222['user_id'] = 30222
data_user_48567['user_id'] = 48567
data_user_144155['user_id'] = 144155

combined_data = pd.concat([data_user_30222, data_user_48567, data_user_144155])
combined_data_clean = combined_data.dropna(subset=['score'])  # Remove rows where 'score' is NaN


In [4]:
def shared_param_least_squares(params, user_ids, t, obsy):
    # Unpack the parameters
    # a1, u1 for user 30222; a2, u2 for user 48567; a3, u3 for user 144155; c shared across all
    a1, u1, a2, u2, a3, u3, c = params

    # Initialize the array to hold the predicted scores
    predy = np.zeros_like(obsy)

    # Calculate predictions for each user based on their parameters
    predy[user_ids == 30222] = a1 - (a1 - u1) * np.exp(-c * t[user_ids == 30222])
    predy[user_ids == 48567] = a2 - (a2 - u2) * np.exp(-c * t[user_ids == 48567])
    predy[user_ids == 144155] = a3 - (a3 - u3) * np.exp(-c * t[user_ids == 144155])

    # Calculate and return the mean squared error
    mse = np.mean((predy - obsy)**2)
    return mse

In [5]:
# Prepare the data for the fitting process
user_ids = combined_data_clean['user_id'].values
gameplays = combined_data_clean['gameplay'].values
scores = combined_data_clean['score'].values


In [6]:
# Initial guesses for the parameters: a1, u1, a2, u2, a3, u3, c
# For a', use the maximum score for each user plus a buffer
# For u's, use the minimum score for each user
# For c, start with a small positive value
initial_params = [
    max(combined_data_clean[combined_data_clean['user_id'] == 30222]['score']) + 10,  # a1
    min(combined_data_clean[combined_data_clean['user_id'] == 30222]['score']),  # u1
    max(combined_data_clean[combined_data_clean['user_id'] == 48567]['score']) + 10,  # a2
    min(combined_data_clean[combined_data_clean['user_id'] == 48567]['score']),  # u2
    max(combined_data_clean[combined_data_clean['user_id'] == 144155]['score']) + 10,  # a3
    min(combined_data_clean[combined_data_clean['user_id'] == 144155]['score']),  # u3
    0.01  # c
]

In [7]:
# Bounds for the parameters, positive paramters
param_bounds = [(0, None) for _ in range(7)]

In [8]:
# Perform the fitting process
result_shared_params = minimize(shared_param_least_squares, initial_params, args=(user_ids, gameplays, scores), bounds=param_bounds)
fitted_params = result_shared_params.x
fitted_mse = result_shared_params.fun
print(fitted_params, fitted_mse)

#Parameter values: 
#[5.67315020e+01 3.83526429e+01 6.55124952e+01 4.77435277e+01
#4.06096506e+01 1.96825775e+01 6.01064829e-02] 9.667977853532975


[5.67315020e+01 3.83526429e+01 6.55124952e+01 4.77435277e+01
 4.06096506e+01 1.96825775e+01 6.01064829e-02] 9.667977853532975
