### Loading the data 

In [1]:
import pandas as pd

# Import csv files
g1 = pd.read_csv('g1.csv', delimiter=";")
g2 = pd.read_csv('g2.csv', delimiter=";")

# NaN = 0

for column in g1.columns:
    if g1[column].dtype in ['float64', 'int64']:
        g1[column] = g1[column].fillna(0)

for column in g2.columns:
    if g2[column].dtype in ['float64', 'int64']:
        g2[column] = g2[column].fillna(0)

# Exclude columns containing "Other" in their names to avoid showing on the new csv that contains only MEANS
g1_filtered = g1[[col for col in g1.columns if "Other" not in col]]
g2_filtered = g2[[col for col in g2.columns if "Other" not in col]]

# Calculate means for numerical columns in the filtered DataFrames
g1_means = g1_filtered.select_dtypes(include=['float', 'int']).mean()
g2_means = g2_filtered.select_dtypes(include=['float', 'int']).mean()

# Convert means to DataFrame for saving
g1_means_df = pd.DataFrame(g1_means, columns=['Mean']).reset_index()
g2_means_df = pd.DataFrame(g2_means, columns=['Mean']).reset_index()

# Save to new CSV files
g1_means_df.to_csv('g1_means.csv', index=False, sep=';')
g2_means_df.to_csv('g2_means.csv', index=False, sep=';')

### Statistical Analysis

In [9]:
# Demographic analysis

# mean 

mean_age_group1 = g1['Please enter your age (enter a valid number):'].mean()
mean_age_group2 = g2['Please enter your age (enter a valid number):'].mean()

print("Mean Age Group 1:", mean_age_group1)
print("Mean Age Group 2:", mean_age_group2)

total_mean_study = (mean_age_group1+mean_age_group2)/2
print("Total Mean Age:", total_mean_study)

# std 

std_age_group1 = g1['Please enter your age (enter a valid number):'].std()
std_age_group2 = g2['Please enter your age (enter a valid number):'].std()

print("Standard Deviation Age Group 1:", std_age_group1)
print("Standard Deviation Age Group 2:", std_age_group2)

total_sd_study = (std_age_group1+std_age_group2)/2
print("Total SD Age:", total_sd_study)

Mean Age Group 1: 19.904761904761905
Mean Age Group 2: 20.666666666666668
Total Mean Age: 20.285714285714285
Standard Deviation Age Group 1: 9.486331018390418
Standard Deviation Age Group 2: 5.57075698027955
Total SD Age: 7.528543999334984


In [3]:
# Highest emotion per fragment - Group 1 (humans)

highest_emotion_g1 = {}

for fragment_number in range(1, 31): 
    # Filter rows for the current fragment number by checking if the row starts with the fragment number and a dash
    fragment_rows = g1_means_df[g1_means_df['index'].astype(str).str.startswith(f"{fragment_number} -")]
    
     # Find the row with the highest mean for this fragment if any rows were found
    if not fragment_rows.empty:
        max_emotion_row = fragment_rows.loc[fragment_rows['Mean'].idxmax()]
        # Extract only the emotion name (after the dash)
        emotion_name = max_emotion_row['index'].split(" - ", 1)[1]
        highest_emotion_g1[f"Fragment {fragment_number}"] = emotion_name

highest_emotion_df1 = pd.DataFrame(list(highest_emotion_g1.items()), columns=['Fragment', 'Highest Emotion'])
highest_emotion_df1.index += 1 
print(highest_emotion_df1)

       Fragment Highest Emotion
1    Fragment 1            JOY 
2    Fragment 2      NOSTALGIA 
3    Fragment 3       NOSTALGIA
4    Fragment 4    PEACEFULNESS
5    Fragment 5       NOSTALGIA
6    Fragment 6      TENDERNESS
7    Fragment 7        TENSION 
8    Fragment 8       NOSTALGIA
9    Fragment 9        SADNESS 
10  Fragment 10            JOY 
11  Fragment 11       NOSTALGIA
12  Fragment 12            JOY 
13  Fragment 13       NOSTALGIA
14  Fragment 14        SADNESS 
15  Fragment 15            JOY 
16  Fragment 16    PEACEFULNESS
17  Fragment 17           POWER
18  Fragment 18       NOSTALGIA
19  Fragment 19        TENSION 
20  Fragment 20         WONDER 
21  Fragment 21        TENSION 
22  Fragment 22        TENSION 
23  Fragment 23       NOSTALGIA
24  Fragment 24    PEACEFULNESS
25  Fragment 25           POWER
26  Fragment 26         WONDER 
27  Fragment 27        TENSION 
28  Fragment 28       NOSTALGIA
29  Fragment 29           POWER
30  Fragment 30         WONDER 


In [4]:
# Highest emotion per fragment - Group 2 (AI)

highest_emotion_g2 = {}

for fragment_number in range(1, 31): 
    # Filter rows for the current fragment number by checking if the row starts with the fragment number and a dash
    fragment_rows = g2_means_df[g2_means_df['index'].astype(str).str.startswith(f"{fragment_number} -")]
    
    # Find the row with the highest mean for this fragment if any rows were found
    if not fragment_rows.empty:
        max_emotion_row = fragment_rows.loc[fragment_rows['Mean'].idxmax()]
        # Extract only the emotion name (after the dash)
        emotion_name = max_emotion_row['index'].split(" - ", 1)[1]
        highest_emotion_g2[f"Fragment {fragment_number}"] = emotion_name

highest_emotion_df2 = pd.DataFrame(list(highest_emotion_g2.items()), columns=['Fragment', 'Highest Emotion'])
highest_emotion_df2.index += 1 
print(highest_emotion_df2)

       Fragment Highest Emotion
1    Fragment 1            JOY 
2    Fragment 2         SADNESS
3    Fragment 3       NOSTALGIA
4    Fragment 4            JOY 
5    Fragment 5        TENSION 
6    Fragment 6      TENDERNESS
7    Fragment 7        TENSION 
8    Fragment 8    PEACEFULNESS
9    Fragment 9    PEACEFULNESS
10  Fragment 10            JOY 
11  Fragment 11    PEACEFULNESS
12  Fragment 12         WONDER 
13  Fragment 13  TRANSCENDENCE 
14  Fragment 14        SADNESS 
15  Fragment 15            JOY 
16  Fragment 16         WONDER 
17  Fragment 17        TENSION 
18  Fragment 18       NOSTALGIA
19  Fragment 19        TENSION 
20  Fragment 20    PEACEFULNESS
21  Fragment 21        TENSION 
22  Fragment 22           POWER
23  Fragment 23         WONDER 
24  Fragment 24    PEACEFULNESS
25  Fragment 25            JOY 
26  Fragment 26            JOY 
27  Fragment 27        TENSION 
28  Fragment 28            JOY 
29  Fragment 29           POWER
30  Fragment 30            JOY 
