In [1]:
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
from scipy.stats import pearsonr
from sklearn import datasets
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from scipy.stats import kendalltau
from scipy.stats import ttest_ind
import pingouin as pg

The first step I took was to collect and combine the data. So, I created a function that takes the results data from each Large Language Model. I used gpt 3.5, gpt 4, mistral, and gemma. Then, I combined the data from each LLM into a dataframe. In total, I generated 25 samples for Asian American and 25 samples European American for each model. 

In [3]:
def create_dataframe(model):
    # path to the directory containing the CSV files for european american participants
    path = f'results/final/{model}/us_affect_study_final_us{model}*'

    # list of all CSV files
    files = glob.glob(path)

    # read and concatenate all CSV files into a single DataFrame
    df_list_us = [pd.read_csv(file) for file in files]
    combined_df_us = pd.concat(df_list_us, ignore_index=True)
    combined_df_us['group'] = "European American"

    # path to the directory containing the CSV files for asian american participants
    path = f'results/final/{model}/us_affect_study_final_aa{model}*'

    # list of all CSV files
    files = glob.glob(path)

    # read and concatenate all CSV files into a single DataFrame
    df_list_aa = [pd.read_csv(file) for file in files]
    combined_df_aa = pd.concat(df_list_aa, ignore_index=True)
    combined_df_aa['group'] = "Asian American"

    combined_df = pd.concat([combined_df_us, combined_df_aa], axis=0)

    return combined_df

Next, I created a method that would prepare data for analysis.

In [5]:
def prepare_df(model, df):
    #drops unnecessary column that was added when combining the dataframe
    df = df.drop(columns=['Unnamed: 0'], inplace=False)

    #creates a dictionary used to label each feeling in the dataframe as an octant 
    feeling_map = {'high-arousal positive': ['enthusiastic', 'excited', 'strong (elated)'],
                   'positive': ['happy', 'satisfied', 'content'], 'low arousal': ['quiet', 'still', 'passive'],
                   'low-arousal positive': ['calm', 'at rest', 'relaxed', 'peaceful (serene)'],
                   'low-arousal negative': ['dull', 'sleepy', 'sluggish'], 
                   'negative': ['sad', 'lonely', 'unhappy'],
                   'high-arousal negative': ['fearful', 'hostile', 'nervous'],
                    'high arousal': ['aroused', 'surprised', 'astonished']}
    octant_list = []
    #goes through the feeling column in the dataframe and adds it to a separate octant list 
    for f in df['feeling']:
        i = 0
        
        for k, v in feeling_map.items():
            if f in v:
                octant = k
                octant_list.append(octant)
    #sets the new column in the dataframe to be the octant list 
    df['octant'] = octant_list

    #adds a new column that labels the data with the appropriate model 
    df['model'] = model

    #returns the dataframe 
    return df


In [6]:
def calculate_ipsatized_mean(df):

        octants = ['high-arousal positive', 'positive', 'low-arousal positive', 'low arousal', 
             'low-arousal negative', 'negative', 'high-arousal negative', 'high arousal']
    #calculate ideal and actual mean by octant 
        overall_ideal_mean = 0
        overall_actual_mean = 0
        for octant in octants: 
            octant = df.loc[df['octant'] == octant]
            ideal = octant.loc[df['ideal']]
            actual = octant.loc[df['actual']]
            ideal_mean = np.mean(ideal)
            actual_mean = np.mean(actual)
            overall_ideal_mean+=ideal_mean
            overall_actual_mean+=actual_mean
        overall_ideal = overall_ideal_mean // 8
        overall_actual = overall_actual_mean // 8
        print(overall_ideal)
        print(overall_actual)
    
            
            

This function creates a dictionary of lists that contains each model's ideal and actual averages by octant. It also separates it by Asian American and European American. 

In [8]:
def calculate_mean(df):
    # retrieves the individual octants
    octant_list = list(set(df['octant']))

    #creates separate lists to store the ideal and actual means for both European American and Asian American participant groups 
    ideal_mean_us_list = []
    ideal_mean_aa_list = []
    actual_mean_us_list = []
    actual_mean_aa_list = []

    for octant in octant_list:
        
        # filter the dataframe for 'Asian American' and 'European American' groups
        aa = df.loc[df['group'] == 'Asian American']
        aa = aa.loc[aa['octant'] == octant]

        us = df.loc[df['group'] == 'European American']
        us = us.loc[us['octant'] == octant]

        # calculates the average of the 'ideal' column for each group
        ideal_aa = np.average(aa['ideal'])
        ideal_us = np.average(us['ideal'])

        #calculates the average of the 'actual' column for each group
        actual_us = np.average(us['actual'])
        actual_aa = np.average(aa['actual'])

        #adds each mean to a list for each octant
        ideal_mean_aa_list.append(ideal_aa)
        ideal_mean_us_list.append(ideal_us)
        actual_mean_us_list.append(actual_us)
        actual_mean_aa_list.append(actual_aa)

    #creates a dictionary that puts the data all together 
    mean_dictionary = {'octants': octant_list, 'ideal_mean_us_list': ideal_mean_us_list, 'ideal_mean_aa_list': ideal_mean_aa_list,'actual_mean_us_list': actual_mean_us_list, 'actual_mean_aa_list': actual_mean_aa_list}
    return mean_dictionary


This function creates a line graph that shows how each the ideal and actual averages for each model. 

In [10]:
def create_line_graph(df, mean_dictionary, model):

    #puts the octants in order from a scale of high arousal positive to high arousal negative to high arousal
    order = ['high-arousal positive', 'positive', 'low-arousal positive', 'low arousal', 
             'low-arousal negative', 'negative', 'high-arousal negative', 'high arousal']

    #defines lists to put in the dataframe 
    octants = mean_dictionary['octants']
    ideal_mean_us = mean_dictionary['ideal_mean_us_list']
    ideal_mean_aa = mean_dictionary['ideal_mean_aa_list']
    actual_mean_us = mean_dictionary['actual_mean_us_list']
    actual_mean_aa = mean_dictionary['actual_mean_aa_list']
    
    # creates a DataFrame for easy reordering
    df = pd.DataFrame({
        'octants': octants,
        'ideal_mean_us': ideal_mean_us,
        'ideal_mean_aa': ideal_mean_aa,
        'actual_mean_us': actual_mean_us,
        'actual_mean_aa': actual_mean_aa
    })
    
    # convert 'octants' to a categorical variable with the specified order
    df['octants'] = pd.Categorical(df['octants'], categories=order, ordered=True)
    
    # sort the dataframes based on the new order
    df = df.sort_values('octants')

    
    # plots the figure
    fig = go.Figure()
    
    # plots each line
    fig.add_trace(go.Scatter(x=df['octants'], y=df['ideal_mean_us'], mode='lines+markers', name='Ideal Mean US',  
                             marker=dict(symbol='circle')))
    fig.add_trace(go.Scatter(x=df['octants'], y=df['ideal_mean_aa'], mode='lines+markers', name='Ideal Mean AA',  
                             marker=dict(symbol='x')))
    fig.add_trace(go.Scatter(x=df['octants'], y=df['actual_mean_us'], mode='lines+markers', name='Actual Mean US',  
                             marker=dict(symbol='x')))
    fig.add_trace(go.Scatter(x=df['octants'], y=df['actual_mean_aa'], mode='lines+markers', name='Actual Mean AA',  
                             marker=dict(symbol='circle')))
    
    # customize the layout
    fig.update_layout(
        title=f'Comparison of Ideal and Actual Means for US and AA Groups with {model}',
        xaxis_title='Octants',
        yaxis_title='Mean Values',
        legend_title='Legend',
        xaxis=dict(tickangle=45),
        yaxis=dict(range=[0, 6]),  # Set y-axis range here
        height=700,
        width=700,
    )
    
    # show the plots
    fig.show()

This function shows how many of each feeling there in each octant for each model

In [12]:
def visualize_feeling_distribution(df):
    plt.figure(figsize=(10, 6))
    sns.countplot(data=df, x='feeling', hue='octant', palette='coolwarm')
    plt.xticks(rotation=45)
    plt.title('Distribution of Emotional Octants Across Groups')
    plt.xlabel('Octants')
    plt.ylabel('Count')
    plt.legend(title='Group')
    plt.tight_layout()
    plt.show()

In [13]:
def create_bubble_chart(df, model):
    name = model
    df["average"] = (df["ideal"] + df["actual"]) // 2
    fig = px.scatter(df, x="feeling", y= "average",
    	         size="average", 
                     hover_name="feeling", color = "group", log_x=False, size_max=60)
    # Add a title to the graph
    fig.update_layout(title=f"Comparison of Ideal and Actual Averages Across Feelings in {name}")
    
    fig.show()

In [14]:
def plot_mean_difference(mean_dictionary):
    octants = mean_dictionary['octants']
    actual_diff = np.abs(np.array(mean_dictionary['actual_mean_us_list']) - np.array(mean_dictionary['actual_mean_aa_list']))

    plt.figure(figsize=(10, 6))
    sns.barplot(x=octants, y=actual_diff, palette="viridis")
    plt.title('mean difference in actual affect between European American and Asian American groups across all octants')
    plt.xlabel('octants')
    plt.ylabel('absolute mean difference')
    plt.xticks(rotation=45)
    plt.ylim([0, 0.8])
    plt.tight_layout()
    plt.show()

In [15]:
def create_pie_chart(df, model):
    # Count values for pie chart
    category_counts = df['octant'].value_counts()

    # Create the pie chart using Plotly
    fig = px.pie(values=category_counts, names=category_counts.index, 
                 title= f'percentages of each octant in {model}' , hole=0)  # hole=0 for a solid pie chart
    fig.update_traces(textposition='inside', textinfo='percent+label')
    fig.show()

In [16]:
def fit_t_test(mean_dictionary):
        #puts the octants in order from a scale of high arousal positive to high arousal negative to high arousal
    order = ['high-arousal positive', 'positive', 'low-arousal positive', 'low arousal', 
             'low-arousal negative', 'negative', 'high-arousal negative', 'high arousal']

    #defines lists to put in the dataframe 
    octants = mean_dictionary['octants']
    ideal_mean_us = mean_dictionary['ideal_mean_us_list']
    ideal_mean_aa = mean_dictionary['ideal_mean_aa_list']
    actual_mean_us = mean_dictionary['actual_mean_us_list']
    actual_mean_aa = mean_dictionary['actual_mean_aa_list']
    
    # creates a DataFrame for easy reordering
    df = pd.DataFrame({
        'octants': octants,
        'ideal_mean_us': ideal_mean_us,
        'ideal_mean_aa': ideal_mean_aa,
        'actual_mean_us': actual_mean_us,
        'actual_mean_aa': actual_mean_aa
    })
    
    # convert 'octants' to a categorical variable with the specified order
    df['octants'] = pd.Categorical(df['octants'], categories=order, ordered=True)
    
    # sort the dataframes based on the new order
    df = df.sort_values('octants')

    hap = ideal_mean_us['octants' == 'high-arousal positive']
    p = ideal_mean_us['octants' == 'positive']
    lap = ideal_mean_us['octants' == 'low-arousal positive']


    positive_octants_us = []

    positive_octants_us.append(hap)
    positive_octants_us.append(p)
    positive_octants_us.append(lap)


    
    hap_aa = ideal_mean_aa['octants' == 'high-arousal positive']
    p_aa = ideal_mean_aa['octants' == 'positive']
    lap_aa = ideal_mean_aa['octants' == 'low-arousal positive']


    positive_octants_aa = []
    positive_octants_aa.append(hap_aa)
    positive_octants_aa.append(p_aa)
    positive_octants_aa.append(lap_aa)
    
    ttest = ttest_ind(positive_octants_us, positive_octants_aa , alternative="less")
    
    return ttest 
    #other_ttest = ttest_ind(actual_mean_us, actual_mean_aa, axis=0, equal_var=True, nan_policy='propagate', permutations=None, random_state=None, alternative="less", trim=0, keepdims=False)


In [17]:
def create_bar_chart(df):
    fig = go.Figure(data=[
        go.Bar(name='ideal', x=df['feeling'], y=df['ideal']),
        go.Bar(name='actual', x= df['feeling'], y=df['actual'])
    ])
    # Change the bar mode
    fig.update_layout(barmode='group')
    fig.show()

In [18]:
def perform_ttest_on_means(mean_dictionary):
    a = mean_dictionary['ideal_mean_us_list']
    b = mean_dictionary['ideal_mean_aa_list']
    ttest_ab, p_value_ab = ttest_ind(a, b, alternative='two-sided')
    if p_value_ab > 0.05:
        print(f"Fail to reject the null hypothesis. No significant difference. (p = {p_value_ab})")
    else:
        print(f"Reject the null hypothesis. There is a significant difference. (p = {p_value_ab})")

    c = mean_dictionary['actual_mean_us_list']
    d = mean_dictionary['actual_mean_aa_list']
    ttest_cd, p_value_cd = ttest_ind(c, d, alternative='two-sided')
    if p_value_cd > 0.05:
        print(f"Fail to reject the null hypothesis. No significant difference. (p = {p_value_cd})")
    else:
        print(f"Reject the null hypothesis. There is a significant difference. (p = {p_value_cd})")

    e = mean_dictionary['ideal_mean_us_list']
    f = mean_dictionary['actual_mean_us_list']
    ttest_ef, p_value_ef = ttest_ind(e, f, alternative = 'two-sided')
    if p_value_ef > 0.05:
        print(f"Fail to reject the null hypothesis. No significant difference. (p = {p_value_ef})")
    else:
        print(f"Reject the null hypothesis. There is a significant difference. (p = {p_value_ef})")
                  
    g = mean_dictionary['ideal_mean_aa_list']
    h = mean_dictionary['actual_mean_aa_list']
    ttest_gh, p_value_gh = ttest_ind(g,h, alternative='two-sided')
    if p_value_gh > 0.05:
        print(f"Fail to reject the null hypothesis. No significant difference. (p = {p_value_gh})")
    else:
        print(f"Reject the null hypothesis. There is a significant difference. (p = {p_value_gh})")    
                  
    

In [19]:
from scipy.stats import ttest_ind

def perform_t_test(df, num, model):
    octant_set = list(set(df['octant']))

    p_value_dict = {}
    p_list = []

    for octant in octant_set:
        aa = df[df['group'] == 'Asian American'] 
        us = df[df['group'] == 'European American']
        
        a = aa[aa['octant'] == octant]['ideal']
        b = us[us['octant'] == octant]['ideal']
        c = aa[aa['octant'] == octant]['actual']
        d = us[us['octant'] == octant]['actual']
        
        # Ideal AA vs. Ideal US
        _, p_value = ttest_ind(a, b, alternative='less')
        p_list.append(p_value)
        print(f"Testing ideal affect states for {octant}: p-value = {p_value}")
        if p_value < 0.001:
            print("Ideal AA vs. Ideal US - ***")
        elif p_value < 0.05:
            print("Ideal AA vs. Ideal US - **")
        elif p_value < 0.1:
            print("Ideal AA vs. Ideal US - *")
        else:
            print(f"Ideal AA vs. Ideal US - The {model} accepts the null hypothesis with (p = {p_value})")

        # Actual AA vs. Actual US
        _, p_value_two = ttest_ind(c, d, alternative='less')
        p_list.append(p_value_two)
        print(f"Testing actual affect states for {octant}: p-value = {p_value_two}")
        if p_value_two < 0.001:
            print("Actual AA vs. Actual US - ***")
        elif p_value_two < 0.05:
            print("Actual AA vs. Actual US - **")
        elif p_value_two < 0.1:
            print("Actual AA vs. Actual US - *")
        else:
            print(f"Actual AA vs. Actual US - The {model} accepts the null hypothesis with (p = {p_value_two})")

        # Ideal vs. Actual in AA
        _, p_value_three = ttest_ind(a, c, alternative='two-sided')
        p_list.append(p_value_three)
        print(f"Testing ideal vs. actual affect states in Asian Americans for {octant}: p-value = {p_value_three}")
        if p_value_three < 0.001:
            print("Ideal vs. Actual in AA - ***")
        elif p_value_three < 0.05:
            print("Ideal vs. Actual in AA - **")
        elif p_value_three < 0.1:
            print("Ideal vs. Actual in AA - *")
        else:
            print(f"Ideal vs. Actual in AA - The {model} accepts the null hypothesis with (p = {p_value_three})")

        if p_value_three < 0.1:
            if 'positive' in octant or 'high' in octant:
                _, p_value_pos = ttest_ind(a, c, alternative='greater')
                p_list.append(p_value_pos)
                print(f"Testing if AA want ideal emotions more than they feel them for {octant}: p-value = {p_value_pos}")
                if p_value_pos < 0.001:
                    print("Ideal > Actual in AA - ***")
                elif p_value_pos < 0.05:
                    print("Ideal > Actual in AA - **")
                elif p_value_pos < 0.1:
                    print("Ideal > Actual in AA - *")
                else:
                    print(f"Ideal > Actual in AA - The {model} accepts the null hypothesis with (p = {p_value_pos})")

            elif 'negative' in octant or 'low' in octant:
                _, p_value_neg = ttest_ind(a, c, alternative='less')
                p_list.append(p_value_neg)
                print(f"Testing if AA want ideal emotions less than they feel them for {octant}: p-value = {p_value_neg}")
                if p_value_neg < 0.001:
                    print("Ideal < Actual in AA - ***")
                elif p_value_neg < 0.05:
                    print("Ideal < Actual in AA - **")
                elif p_value_neg < 0.1:
                    print("Ideal < Actual in AA - *")
                else:
                    print(f"Ideal < Actual in AA - The {model} accepts the null hypothesis with (p = {p_value_neg})")

        # Ideal vs. Actual in US
        _, p_value_four = ttest_ind(b, d, alternative='two-sided')
        p_list.append(p_value_four)
        print(f"Testing ideal vs. actual affect states in European Americans for {octant}: p-value = {p_value_four}")
        if p_value_four < 0.001:
            print("Ideal vs. Actual in US - ***")
        elif p_value_four < 0.05:
            print("Ideal vs. Actual in US - **")
        elif p_value_four < 0.1:
            print("Ideal vs. Actual in US - *")
        else:
            print(f"Ideal vs. Actual in US - The {model} accepts the null hypothesis with (p = {p_value_four})")

        if p_value_four < 0.1:
            if 'positive' in octant or 'high' in octant:
                _, p_value_pos = ttest_ind(b, d, alternative='greater')
                p_list.append(p_value_pos)
                print(f"Testing if US want ideal emotions more than they feel them for {octant}: p-value = {p_value_pos}")
                if p_value_pos < 0.001:
                    print("Ideal > Actual in US - ***")
                elif p_value_pos < 0.05:
                    print("Ideal > Actual in US - **")
                elif p_value_pos < 0.1:
                    print("Ideal > Actual in US - *")
                else:
                    print(f"Ideal > Actual in US - The {model} accepts the null hypothesis with (p = {p_value_pos})")

            elif 'negative' in octant or 'low' in octant:
                _, p_value_neg = ttest_ind(b, d, alternative='less')
                p_list.append(p_value_neg)
                print(f"Testing if US want ideal emotions less than they feel them for {octant}: p-value = {p_value_neg}")
                if p_value_neg < 0.001:
                    print("Ideal < Actual in US - ***")
                elif p_value_neg < 0.05:
                    print("Ideal < Actual in US - **")
                elif p_value_neg < 0.1:
                    print("Ideal < Actual in US - *")
                else:
                    print(f"Ideal < Actual in US - The {model} accepts the null hypothesis with (p = {p_value_neg})")
        print("length of p list")        
        print(len(p_list))
        p_value_dict[octant] = p_list
 
    return p_value_dict
    




In [20]:
def calculate_cronbach_alpha(df, mean_dict):

    #collects all the unique octants from the dataframe
    octants = df['octant'].unique()

    #reuses and redefines the feeling_map 
    feeling_map = {'high-arousal positive': ['enthusiastic', 'excited', 'strong (elated)'],
                   'positive': ['happy', 'satisfied', 'content'], 'low arousal': ['quiet', 'still', 'passive'],
                   'low-arousal positive': ['calm', 'at rest', 'relaxed', 'peaceful (serene)'],
                   'low-arousal negative': ['dull', 'sleepy', 'sluggish'], 
                   'negative': ['sad', 'lonely', 'unhappy'],
                   'high-arousal negative': ['fearful', 'hostile', 'nervous'],
                    'high arousal': ['aroused', 'surprised', 'astonished']}

    ideal_alpha = []
    actual_alpha = []
    
    #splits the dataframe into Asian American and European American participant groups
    aa = df[df['group'] == 'Asian American']
    us = df[df['group'] == 'European American']

    affect = ['ideal', 'actual']

    
    for a in affect: 
        #for each octant in the unique octant lists
        for octant in octants:
            
    
            #calculates the overall correlation of ideal and actual in AA
            correlation, p_value = pearsonr(aa['ideal'], aa['actual'])
            print(f'Overall correlation of Asian Americans: {correlation:.2f}')
    
            #calculates the overall correlation of ideal and actual in EU
            correlation, p_value = pearsonr(us['ideal'], us['actual'])
            print(f'Overall correlation of European Americans: {correlation:.2f}')
    
            #
            feeling_one = feeling_map[octant][0]
            feeling_two = feeling_map[octant][1]
            feeling_three = feeling_map[octant][2]
    
            group_one = list(aa[aa['feeling'] == feeling_one][a])
            group_two = list(aa[aa['feeling'] == feeling_two][a])
            group_three = list(aa[aa['feeling'] == feeling_three][a])
    
            
            # Find the minimum length of the three groups
            min_length = min(len(group_one), len(group_two), len(group_three))
    
            # Truncate all groups to the minimum length
            group_one = group_one[:min_length]
            group_two = group_two[:min_length]
            group_three = group_three[:min_length]
    
            if octant == 'low-arousal positive':
                feeling_four = feeling_map[octant][3]
                group_four = list(aa[aa['feeling'] == feeling_four][a])
                # Find the minimum length of the three groups
                min_length = min(len(group_one), len(group_two), len(group_three), len(group_four))
            
                # Truncate all groups to the minimum length
                group_one = group_one[:min_length]
                group_two = group_two[:min_length]
                group_three = group_three[:min_length]
                group_four = group_three[:min_length]
    
            
                data = {
                    feeling_one : group_one,
                    feeling_two: group_two,
                    feeling_three: group_three,
                    feeling_four: group_four}
                 # Create 'id' column for unique index values
                data['id'] = list(range(min_length))
            
                # Convert to DataFrame
                df = pd.DataFrame(data)
    
            data = {
                    feeling_one : group_one,
                    feeling_two: group_two,
                    feeling_three: group_three}
            
            # Create 'id' column for unique index values
            data['id'] = list(range(min_length))
                
                # Convert to DataFrame
            df = pd.DataFrame(data)
                
                # Check if there are at least two rows
            if df.shape[0] < 2:
                print("Not enough data for Cronbach's Alpha calculation.")
            else:
                if a == 'ideal':
                    ideal_a = pg.cronbach_alpha(data=df)
                    ideal_alpha.append(ideal_a)
                    print(f"Cronbach's Alpha (AA) : {ideal_a} ")
                if a == 'actual':
                    actual_a= pg.cronbach_alpha(data=df)
                    actual_alpha.append(actual_a)
        


    new_df = {'octant': octants, 
                  'feeling': [x for x in feeling_map.values()],
                  'ideal alpha': ideal_alpha, 
                  'actual alpha':actual_alpha
              
                 }

    new_df = pd.DataFrame(new_df)
        
    print(new_df)








In [21]:
def main():
    models = ['gpt3.5', 'gpt4', 'mistral', 'gemma']

    for i, model in enumerate(models):
        combined_df = create_dataframe(model)
        cleaned_df = prepare_df(model, combined_df)

        if i == 0:
            gpt35 = cleaned_df
            num = calculate_mean(gpt35)
        #perform_ttest_on_means(num)
        #fit_t_test(num)
        #create_line_graph(gpt35, num, model)
        #create_bar_chart(gpt35)
        #perform_t_test(gpt35, num, model)
        #create_pie_chart(gpt35, model)
        #create_bubble_chart(gpt35, model)
        # visualize_feeling_distribution(gpt35)
        #plot_mean_difference(num)

        if i == 1:
            gpt4 = cleaned_df
            num = calculate_mean(gpt4)
        #new = perform_t_test(gpt4, num, model)
        #perform_ttest_on_means(num)
        #create_bar_chart(gpt4)
        #create_line_graph(gpt4, num, model)
        #create_pie_chart(gpt4, model)
        #create_bubble_chart(gpt4, model)
           # visualize_feeling_distribution(gpt4)
        #plot_mean_difference(num)

        if i == 2:
            mistral = cleaned_df
            num = calculate_mean(mistral)
        #perform_ttest_on_means(num)
        #create_bar_chart(mistral)
        #perform_t_test(mistral, num, model)
        #create_pie_chart(mistral, model)
        #create_line_graph(mistral, num, model)
        #create_bubble_chart(mistral, model)
        #visualize_feeling_distribution(mistral)
        #plot_mean_difference(num)
        
        if i == 3:
            gemma = cleaned_df
            num = calculate_mean(gemma)
            perform_t_test(gemma, num, model)
        #perform_ttest_on_means(num)
        #create_bar_chart(gemma)
        #create_line_graph(gemma, num, model)
        #create_pie_chart(gemma, model)
        #create_bubble_chart(gemma, model)
        #visualize_feeling_distribution(gemma)
        #plot_mean_difference(num)
    models = [gpt35, gpt4, mistral, gemma]
    result = pd.concat(models)
    result.to_csv('result.csv', index=False)  
    final_num = calculate_mean(result)
    #calculate_cronbach_alpha(result, final_num)
    
   # perform_ttest_on_means(final_num)
    #create_bar_chart(result)
    #create_bubble_chart(result)

if __name__ == '__main__':
    main()

Testing ideal affect states for low arousal: p-value = 0.1960333207560539
Ideal AA vs. Ideal US - The gemma accepts the null hypothesis with (p = 0.1960333207560539)
Testing actual affect states for low arousal: p-value = 0.9999974852932508
Actual AA vs. Actual US - The gemma accepts the null hypothesis with (p = 0.9999974852932508)
Testing ideal vs. actual affect states in Asian Americans for low arousal: p-value = 4.198461950443905e-26
Ideal vs. Actual in AA - ***
Testing if AA want ideal emotions less than they feel them for low arousal: p-value = 1.0
Ideal < Actual in AA - The gemma accepts the null hypothesis with (p = 1.0)
Testing ideal vs. actual affect states in European Americans for low arousal: p-value = 2.056544744848426e-56
Ideal vs. Actual in US - ***
Testing if US want ideal emotions less than they feel them for low arousal: p-value = 1.0
Ideal < Actual in US - The gemma accepts the null hypothesis with (p = 1.0)
length of p list
6
Testing ideal affect states for low-aro

  res = hypotest_fun_out(*samples, **kwds)
