In [4]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu

# Load dataset
df = pd.read_csv('formresponse.csv')

# Extract relevant columns
app_satisfaction = df['Would you describe the application as easy and simple to use?'].dropna()
chatgpt_satisfaction = df['Do you find the information using ChatGPT more precise and direct?'].dropna()

# Perform Mann-Whitney U Test
stat, p = mannwhitneyu(app_satisfaction, chatgpt_satisfaction)

print(f'Mann-Whitney U Test statistic: {stat}')
print(f'p-value: {p}')

# Interpret the result
alpha = 0.05
if p < alpha:
    print('Significant difference between the groups (reject H0)')
else:
    print('No significant difference between the groups (fail to reject H0)')


Mann-Whitney U Test statistic: 156.0
p-value: 4.009545406358536e-05
Significant difference between the groups (reject H0)


In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load dataset
df = pd.read_csv('formresponse.csv')  # Adjust the path to your dataset

# Example feature columns (explicitly list all the columns you want to use)
feature_columns = [
    'Your Age',
    'Is the information provided by the application brief and easier to understand?',
    'Do the explanations given by the application resonate with you?',
    'Would you describe the application as easy and simple to use?',
    'Does your experience meet your expectations?',
    'Is our application more convenient for learning about Tsinghua?',
    'Did you discover something new about this building from the application?',
    'Do you find the app useful?',
    'Was the web search/app helpful for finding new interesting information?',
    'Did the AR feature assist you in acquiring information about the given building?',
    'Was it challenging to find facts about this building through web search?',
    'Did you encounter difficulty communicating with the persona/building to obtain information?',
    'Do you find the information using ChatGPT more precise and direct?',
    'Have you ever used ChatGPT before?',
    'Was it more challenging to find information through the app compared to using ChatGPT?'
]

# Target variable
target_column = 'Do you find the app useful?'

# Check if all columns exist in the DataFrame
missing_columns = [col for col in feature_columns if col not in df.columns]
if missing_columns:
    print(f"Missing columns in the dataset: {missing_columns}")
else:
    # Combine features and target into a single DataFrame
    combined_df = df[feature_columns + [target_column]].dropna()

    # Separate features and target
    features = combined_df[feature_columns]
    target = combined_df[target_column]

    # Encode categorical variables if necessary
    features = pd.get_dummies(features)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    # Train classifier
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train, y_train)

    # Predict
    y_pred = clf.predict(X_test)

    # Evaluate with F1 Score
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f'F1 Score: {f1}')


F1 Score: 1.0


In [18]:
import pandas as pd
from scipy.stats import mannwhitneyu

# Sample data loading
data = {
    "Timestamp": ["28/05/2024 20:03:22", "28/05/2024 20:22:15", "28/05/2024 20:32:48", "28/05/2024 21:14:09",
                  "28/05/2024 21:18:08", "28/05/2024 21:40:38", "28/05/2024 21:58:16", "28/05/2024 21:59:49",
                  "28/05/2024 22:20:33", "28/05/2024 22:27:44", "28/05/2024 22:35:32", "28/05/2024 22:37:26",
                  "28/05/2024 22:40:10"],
    "Email Address": ["claudiajovinca@gmail.com", "skolastikafelicias.02@gmail.com", "ellorashania98@gmail.com", 
                      "happyflowery23@gmail.com", "florix20@gmail.com", "ngocnhien.amie@gmail.com", 
                      "cendrawati.cen88@gmail.com", "syt8880@gmail.com", "auliasafiraisma@gmail.com", 
                      "fayza.niz2004@mail.ugm.ac.id", "turiabu2009@gmail.com", "kcadmey@gmail.com", 
                      "sneakysoushi@gmail.com"],
    "What's your name?": ["Jovinca", "Felicia", "Ellora", "Cendrawati", "Kevin", "阮青玉然", "Rudijono", "James", 
                          "Safira Isma Aulia", "Fayza Nizma Safaya Harda", "Turi", "cadmey", "Joceline"],
    "Your Age": ["10 - 20", "20 - 30", "20 - 30", "50 - 60", "20 - 30", "20 - 30", "60 - 70", "40 - 50", 
                 "21 - 30", "11 - 20", "21 - 30", "21 - 30", "11 - 20"],
    "Is the information provided by the application brief and easier to understand?": [4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4],
    "Do the explanations given by the application resonate with you?": [4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 4, 4],
    "Would you describe the application as easy and simple to use?": [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
    "Does your experience meet your expectations?": [4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 3, 4, 4],
    "Is our application more convenient for learning about Tsinghua?": [4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4],
    "Did you discover something new about this building from the application?": [4, 4, 4, 4, 3, 4, 4, 4, 3, 4, 2, 4, 4],
    "Do you find the app useful?": [4, 3, 3, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4],
    "Do you perceive the app as interactive?": [4, 4, 3, 4, 3, 4, 4, 4, 4, 3, 3, 4, 4],
    "Was the web search/app helpful for finding new interesting information?": [4, 4, 4, 4, 2, 4, 4, None, 4, 1, 3, 4, 1],
    "Did the AR feature assist you in acquiring information about the given building?": [1, 1, 1, 2, 1, 3, 1, 4, 4, 1, 3, 4, 1],
    "Was it challenging to find facts about this building through web search?": [1, 1, 1, 2, 1, 2, 1, 4, 4, 1, 3, 4, 2],
    "Did you encounter difficulty communicating with the persona/building to obtain information?": [2, 2, 3, 2, 3, 2, 2, None, 2, 3, 3, 4, 1],
    "Do you find the information using ChatGPT more precise and direct?": ["Yes", "Yes", "Yes", "No", "Yes", "Yes", "No", "No", "No", "Yes", "Yes", "Yes", "Yes"],
    "Have you ever used ChatGPT before?": [1, 2, 2, 2, 1, 2, 2, 4, 2, 2, 3, 4, 1],
    "Was it more challenging to find information through the app compared to using ChatGPT?": [1, 2, 2, 2, 1, 2, 2, 4, 2, 2, 3, 4, 1],
    "Right, that's the end. Any Comments?": [None, None, None, None, None, None, None, None, "No", "-", None, None, None]
}

# Creating the DataFrame
df = pd.DataFrame(data)

# Drop the 'Timestamp' and 'Right, that's the end. Any Comments?' columns
df.drop(columns=['Timestamp', "Right, that's the end. Any Comments?"], inplace=True)

# Specify the column names for comparison
useful_column = "Do you find the app useful?"
interactive_column = "Do you perceive the app as interactive?"

# Define groups based on interactivity scores
group_high_interactivity = df[df[interactive_column] >= 4][useful_column]
group_low_interactivity = df[df[interactive_column] <= 3][useful_column]

# Check if either group has zero size
if len(group_high_interactivity) == 0 or len(group_low_interactivity) == 0:
    print("One or both groups have zero size. Cannot perform the Mann-Whitney U test.")
else:
    # Perform Mann-Whitney U test
    statistic, p_value = mannwhitneyu(group_high_interactivity, group_low_interactivity)

    # Interpret the results
    alpha = 0.05  # Set your significance level
    if p_value < alpha:
        print("Reject the null hypothesis. There is a significant difference between the groups.")
    else:
        print("Fail to reject the null hypothesis. There is no significant difference between the groups.")
    
    # Print statistic and p-value for additional context
    print(f"Mann-Whitney U test statistic: {statistic}, p-value: {p_value}")



Reject the null hypothesis. There is a significant difference between the groups.
Mann-Whitney U test statistic: 29.5, p-value: 0.03426400773486914
