In [None]:
# The purpose of this code is to train a machine learning model to predict the level of immersion that users experience in virtual reality (VR), based on various features collected from VR experiences. Specifically, it employs a logistic regression classifier to make these predictions. The dataset is preprocessed to create suitable features for the classifier, and the performance of the model is evaluated using standard metrics.

# Here is a breakdown of the features used for prediction and their creation:

# 1. **Age Group Binning**: The ages are binned into predefined ranges (0-18, 19-30, 31-40, 41-50, 51-60) and one-hot encoded to create binary features for each age range.

# 2. **Gender**: The 'Gender' column is one-hot encoded, creating separate binary features for each gender category present in the dataset.

# 3. **VR Headset Type**: The type of VR headset used is one-hot encoded, resulting in binary features that indicate the headset type.

# 4. **Duration Binning**: The duration of the VR experience is binned and one-hot encoded to create binary features for each duration range (5-15, 15-25, 25-35, 35-45, 45-55, 55-65).

# 5. **Motion Sickness**: Levels of motion sickness are one-hot encoded, adding binary features that signify the level of motion sickness reported by users.

# The logistic regression model is then trained on these features to predict the 'ImmersionLevel', which likely indicates how engrossed or absorbed users were during their VR experience. The ‘ImmersionLevel’ is essentially the target variable, and the model attempts to predict it based on the created binary features.

# The code evaluates the model's performance using metrics such as Mean Absolute Error (MAE), Accuracy Score, Confusion Matrix, and a Classification Report (which includes precision, recall, f1-score, and support). These metrics give an indication of the reliability and effectiveness of the model in making predictions about VR experience immersion levels.

# In a practical sense, the insights from this model could help VR designers understand what aspects contribute to higher levels of immersion, informing future VR experience development. However, the code does not specify the exact features in the original data, except those that are explicitly binned and encoded. There may be additional preexisting features in the dataset that are also included in the feature matrix 'X'.




import pandas as pd
import numpy as np

# Importing necessary machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, mean_absolute_error, confusion_matrix

# Load the dataset from a CSV file into a pandas DataFrame
df = pd.read_csv("virtual-reality-experiences/data.csv")

# Define age bins and create one-hot encoded dummy variables for age groups
age_bins = [0, 18, 30, 40, 50, 60]
age_group_dummies = pd.get_dummies(pd.cut(df['Age'], bins=age_bins, labels=['0-18', '19-30', '31-40', '41-50', '51-60'], right=False), prefix='Age')
age_group_dummies = age_group_dummies.astype(int)
df = pd.concat([df, age_group_dummies], axis=1)

# One-hot encode 'Gender' column and add to DataFrame
gender_dummies = pd.get_dummies(df['Gender'])
gender_dummies = gender_dummies.astype(int)
df = pd.concat([df, gender_dummies], axis=1)

# One-hot encode 'VRHeadset' column and add to DataFrame
vrheadset_dummies = pd.get_dummies(df['VRHeadset'])
vrheadset_dummies = vrheadset_dummies.astype(int)
df = pd.concat([df, vrheadset_dummies], axis=1)

# Define duration bins and create one-hot encoded dummy variables for duration groups
duration_bins = [5, 15, 25, 35, 45, 55, 65]
duration_group_dummies = pd.get_dummies(pd.cut(df['Duration'], bins=duration_bins, labels=['5-15', '15-25', '25-35', '35-45', '45-55', '55-65'], right=False), prefix='Duration')
duration_group_dummies = duration_group_dummies.astype(int)
df = pd.concat([df, duration_group_dummies], axis=1)

# One-hot encode 'MotionSickness' column with a prefix and add to DataFrame
ms_dummies = pd.get_dummies(df['MotionSickness'], prefix='MotionSickness_level')
ms_dummies = ms_dummies.astype(int)
df = pd.concat([df, ms_dummies], axis=1)

# Define the target variable and feature matrix
y = df['ImmersionLevel']
X = df.drop('ImmersionLevel', axis=1)

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to evaluate the model's performance
def model_evaluation(y_pred, y_test=y_test):
    print('*'*70)
    print(f'Mean Absolute Error: {mean_absolute_error(y_pred, y_test)}')
    print('*'*70)
    print(f'Accuracy Score: {accuracy_score(y_pred, y_test)}')
    print('*'*70)
    print(f"Confusion Matrix: \n{confusion_matrix(y_pred, y_test)}")
    print('*'*70)
    print(f"Classification Report: \n{classification_report(y_pred, y_test)}")

# Initialize and train a Logistic Regression model
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Make predictions on the test set
lr_pred = lr_model.predict(X_test)

# Evaluate the performance of the Logistic Regression model using the predefined function
model_evaluation(lr_pred)