In [None]:
# t-test for log_Img_Likes

import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import pingouin as pg

# Load the data from CSV file
df = pd.read_csv('img-analysis-dataset.csv', encoding="utf-8-sig")

# Convert the numeric columns to float type
num_cols = ['Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care']
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')

# Replace NaN values with 0 for numeric columns
df[num_cols] = df[num_cols].fillna(0)

Replace boolean values in columns to be tested with 0 and 1
cols_to_test = ['animals_animal_homes', 'animals_animals_in_general', 
                'animals_dogs', 'animals_farm_animals', 'animals_pets', 'animals_reptiles_and_amphibians', 
                'animals_wild_mammals', 'animals_working_animals', 'birds_bird_homes', 'birds_birds',
                'farming_growing_crops', 'farming_on_the_farm', 'fish_and_shellfish_fish', 'fish_and_shellfish_other_sea_creatures', 
                'fish_and_shellfish_shellfish', 'geography_coastlines_and_the_sea', 'geography_mountains_and_valleys', 
                'geography_other_geographic_features', 'geography_rivers_and_lakes',
                'insects,_worms,_etc._insect_forms', 'insects,_worms,_etc._insect_homes', 'insects,_worms,_etc._insects', 'insects,_worms,_etc._other_invertebrates', 
                'insects,_worms,_etc._spiders,_etc.', 'plants_and_trees_plants', 'plants_and_trees_trees', 
                'the_environment_conservation', 'weather_rain', 'weather_sky', 'weather_snow_and_ice', 'weather_temperature', 
                'weather_the_atmosphere', 'weather_wind']

df[cols_to_test] = df[cols_to_test].replace({False: 0, True: 1})

results = []

# Iterate over the columns in "cols_to_test"
for col in cols_to_test:
    group0 = df[df[col] == 0]['Likes_log']
    group1 = df[df[col] == 1]['Likes_log']

    # Only proceed if group1 is not empty
    if len(group1) > 0:
        # Calculate the mean of each group for the current column
        mean_group0 = group0.mean()
        mean_group1 = group1.mean()

        # Perform the independent t-test on the current column
        t_statistic, p_value = stats.ttest_ind(group0, group1)

        # Calculate Cohen's d for the current column
        cohen_d = pg.compute_effsize(group0, group1, eftype='cohen')

        # Calculate the sample size for each group
        n0 = len(group0)
        n1 = len(group1)

        # Calculate the standard deviation, standard error, and confidence interval for each group
        std_dev0 = np.std(group0)
        std_dev1 = np.std(group1)
        std_err0 = sm.stats.DescrStatsW(group0).std_mean
        std_err1 = sm.stats.DescrStatsW(group1).std_mean
        ci_lower0, ci_upper0 = sm.stats.DescrStatsW(group0).tconfint_mean()
        ci_lower1, ci_upper1 = sm.stats.DescrStatsW(group1).tconfint_mean()

        # Add the mean and test results for the current column to the results list
        results.append((col, n0, n1, mean_group0, mean_group1, std_dev0, std_dev1, std_err0, std_err1,
                        ci_lower0, ci_lower1, ci_upper0, ci_upper1, t_statistic, p_value, cohen_d))
    else:
        print(f"Skipping {col} because it has no '1' entries.")

# Create a table to report the results
table = pd.DataFrame(results, columns=['Column', 'Group0 N', 'Group1 N', 'Mean Likes Group0', 'Mean Likes Group1',
                                        'Std. Dev. Group0', 'Std. Dev. Group1', 'Std. Err. Group0', 'Std. Err. Group1',
                                        '95% CI Lower Group0', '95% CI Lower Group1', '95% CI Upper Group0', '95% CI Upper Group1',
                                        'T-Statistic', 'P-Value', 'Cohen\'s d'])

# Add 'Sig' column to table
table['Sig'] = ''
table.loc[table['P-Value'] < 0.05, 'Sig'] = '*'
table.loc[table['P-Value'] < 0.01, 'Sig'] = '**'
table.loc[table['P-Value'] < 0.001, 'Sig'] = '***'

table.to_csv("log_Img_Likes.csv", encoding="utf-8-sig", index=False)


In [None]:
# t-test for log_Img_Comments

import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import pingouin as pg

# Load the data from CSV file
df = pd.read_csv('img-analysis-dataset.csv', encoding="utf-8-sig")

# Convert the numeric columns to float type
num_cols = ['Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care']
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')

# Replace NaN values with 0 for numeric columns
df[num_cols] = df[num_cols].fillna(0)

# Replace boolean values in columns to be tested with 0 and 1
cols_to_test = ['animals_animal_homes', 'animals_animals_in_general', 
                'animals_dogs', 'animals_farm_animals', 'animals_pets', 'animals_reptiles_and_amphibians', 
                'animals_wild_mammals', 'animals_working_animals', 'birds_bird_homes', 'birds_birds',
                'farming_growing_crops', 'farming_on_the_farm', 'fish_and_shellfish_fish', 'fish_and_shellfish_other_sea_creatures', 
                'fish_and_shellfish_shellfish', 'geography_coastlines_and_the_sea', 'geography_mountains_and_valleys', 
                'geography_other_geographic_features', 'geography_rivers_and_lakes',
                'insects,_worms,_etc._insect_forms', 'insects,_worms,_etc._insect_homes', 'insects,_worms,_etc._insects', 'insects,_worms,_etc._other_invertebrates', 
                'insects,_worms,_etc._spiders,_etc.', 'plants_and_trees_plants', 'plants_and_trees_trees', 
                'the_environment_conservation', 'weather_rain', 'weather_sky', 'weather_snow_and_ice', 'weather_temperature', 
                'weather_the_atmosphere', 'weather_wind']

df[cols_to_test] = df[cols_to_test].replace({False: 0, True: 1})

results = []

# Iterate over the columns in "cols_to_test"
for col in cols_to_test:
    group0 = df[df[col] == 0]['Comments_log']
    group1 = df[df[col] == 1]['Comments_log']

    # Only proceed if group1 is not empty
    if len(group1) > 0:
        # Calculate the mean of each group for the current column
        mean_group0 = group0.mean()
        mean_group1 = group1.mean()

        # Perform the independent t-test on the current column
        t_statistic, p_value = stats.ttest_ind(group0, group1)

        # Calculate Cohen's d for the current column
        cohen_d = pg.compute_effsize(group0, group1, eftype='cohen')

        # Calculate the sample size for each group
        n0 = len(group0)
        n1 = len(group1)

        # Calculate the standard deviation, standard error, and confidence interval for each group
        std_dev0 = np.std(group0)
        std_dev1 = np.std(group1)
        std_err0 = sm.stats.DescrStatsW(group0).std_mean
        std_err1 = sm.stats.DescrStatsW(group1).std_mean
        ci_lower0, ci_upper0 = sm.stats.DescrStatsW(group0).tconfint_mean()
        ci_lower1, ci_upper1 = sm.stats.DescrStatsW(group1).tconfint_mean()

        # Add the mean and test results for the current column to the results list
        results.append((col, n0, n1, mean_group0, mean_group1, std_dev0, std_dev1, std_err0, std_err1,
                        ci_lower0, ci_lower1, ci_upper0, ci_upper1, t_statistic, p_value, cohen_d))
    else:
        print(f"Skipping {col} because it has no '1' entries.")

# Create a table to report the results
table = pd.DataFrame(results, columns=['Column', 'Group0 N', 'Group1 N', 'Mean Likes Group0', 'Mean Likes Group1',
                                        'Std. Dev. Group0', 'Std. Dev. Group1', 'Std. Err. Group0', 'Std. Err. Group1',
                                        '95% CI Lower Group0', '95% CI Lower Group1', '95% CI Upper Group0', '95% CI Upper Group1',
                                        'T-Statistic', 'P-Value', 'Cohen\'s d'])

# Add 'Sig' column to table
table['Sig'] = ''
table.loc[table['P-Value'] < 0.05, 'Sig'] = '*'
table.loc[table['P-Value'] < 0.01, 'Sig'] = '**'
table.loc[table['P-Value'] < 0.001, 'Sig'] = '***'

table.to_csv("log_Img_Comments.csv", encoding="utf-8-sig", index=False)


In [None]:
# t-test for log_Img_Shares

import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import pingouin as pg

# Load the data from CSV file
df = pd.read_csv('img-analysis-dataset.csv', encoding="utf-8-sig")

# Convert the numeric columns to float type
num_cols = ['Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care']
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')

# Replace NaN values with 0 for numeric columns
df[num_cols] = df[num_cols].fillna(0)

# Replace boolean values in columns to be tested with 0 and 1
cols_to_test = ['animals_animal_homes', 'animals_animals_in_general', 
                'animals_dogs', 'animals_farm_animals', 'animals_pets', 'animals_reptiles_and_amphibians', 
                'animals_wild_mammals', 'animals_working_animals', 'birds_bird_homes', 'birds_birds',
                'farming_growing_crops', 'farming_on_the_farm', 'fish_and_shellfish_fish', 'fish_and_shellfish_other_sea_creatures', 
                'fish_and_shellfish_shellfish', 'geography_coastlines_and_the_sea', 'geography_mountains_and_valleys', 
                'geography_other_geographic_features', 'geography_rivers_and_lakes',
                'insects,_worms,_etc._insect_forms', 'insects,_worms,_etc._insect_homes', 'insects,_worms,_etc._insects', 'insects,_worms,_etc._other_invertebrates', 
                'insects,_worms,_etc._spiders,_etc.', 'plants_and_trees_plants', 'plants_and_trees_trees', 
                'the_environment_conservation', 'weather_rain', 'weather_sky', 'weather_snow_and_ice', 'weather_temperature', 
                'weather_the_atmosphere', 'weather_wind']

df[cols_to_test] = df[cols_to_test].replace({False: 0, True: 1})

results = []

# Iterate over the columns in "cols_to_test"
for col in cols_to_test:
    group0 = df[df[col] == 0]['Shares_log']
    group1 = df[df[col] == 1]['Shares_log']

    # Only proceed if group1 is not empty
    if len(group1) > 0:
        # Calculate the mean of each group for the current column
        mean_group0 = group0.mean()
        mean_group1 = group1.mean()

        # Perform the independent t-test on the current column
        t_statistic, p_value = stats.ttest_ind(group0, group1)

        # Calculate Cohen's d for the current column
        cohen_d = pg.compute_effsize(group0, group1, eftype='cohen')

        # Calculate the sample size for each group
        n0 = len(group0)
        n1 = len(group1)

        # Calculate the standard deviation, standard error, and confidence interval for each group
        std_dev0 = np.std(group0)
        std_dev1 = np.std(group1)
        std_err0 = sm.stats.DescrStatsW(group0).std_mean
        std_err1 = sm.stats.DescrStatsW(group1).std_mean
        ci_lower0, ci_upper0 = sm.stats.DescrStatsW(group0).tconfint_mean()
        ci_lower1, ci_upper1 = sm.stats.DescrStatsW(group1).tconfint_mean()

        # Add the mean and test results for the current column to the results list
        results.append((col, n0, n1, mean_group0, mean_group1, std_dev0, std_dev1, std_err0, std_err1,
                        ci_lower0, ci_lower1, ci_upper0, ci_upper1, t_statistic, p_value, cohen_d))
    else:
        print(f"Skipping {col} because it has no '1' entries.")

# Create a table to report the results
table = pd.DataFrame(results, columns=['Column', 'Group0 N', 'Group1 N', 'Mean Likes Group0', 'Mean Likes Group1',
                                        'Std. Dev. Group0', 'Std. Dev. Group1', 'Std. Err. Group0', 'Std. Err. Group1',
                                        '95% CI Lower Group0', '95% CI Lower Group1', '95% CI Upper Group0', '95% CI Upper Group1',
                                        'T-Statistic', 'P-Value', 'Cohen\'s d'])

# Add 'Sig' column to table
table['Sig'] = ''
table.loc[table['P-Value'] < 0.05, 'Sig'] = '*'
table.loc[table['P-Value'] < 0.01, 'Sig'] = '**'
table.loc[table['P-Value'] < 0.001, 'Sig'] = '***'

table.to_csv("log_Img_Shares.csv", encoding="utf-8-sig", index=False)


In [None]:
# t-test for log_Img_Love

import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import pingouin as pg

# Load the data from CSV file
df = pd.read_csv('img-analysis-dataset.csv', encoding="utf-8-sig")

# Convert the numeric columns to float type
num_cols = ['Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care']
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')

# Replace NaN values with 0 for numeric columns
df[num_cols] = df[num_cols].fillna(0)

# Replace boolean values in columns to be tested with 0 and 1
cols_to_test = ['animals_animal_homes', 'animals_animals_in_general', 
                'animals_dogs', 'animals_farm_animals', 'animals_pets', 'animals_reptiles_and_amphibians', 
                'animals_wild_mammals', 'animals_working_animals', 'birds_bird_homes', 'birds_birds',
                'farming_growing_crops', 'farming_on_the_farm', 'fish_and_shellfish_fish', 'fish_and_shellfish_other_sea_creatures', 
                'fish_and_shellfish_shellfish', 'geography_coastlines_and_the_sea', 'geography_mountains_and_valleys', 
                'geography_other_geographic_features', 'geography_rivers_and_lakes',
                'insects,_worms,_etc._insect_forms', 'insects,_worms,_etc._insect_homes', 'insects,_worms,_etc._insects', 'insects,_worms,_etc._other_invertebrates', 
                'insects,_worms,_etc._spiders,_etc.', 'plants_and_trees_plants', 'plants_and_trees_trees', 
                'the_environment_conservation', 'weather_rain', 'weather_sky', 'weather_snow_and_ice', 'weather_temperature', 
                'weather_the_atmosphere', 'weather_wind']

df[cols_to_test] = df[cols_to_test].replace({False: 0, True: 1})

results = []

# Iterate over the columns in "cols_to_test"
for col in cols_to_test:
    group0 = df[df[col] == 0]['Love_log']
    group1 = df[df[col] == 1]['Love_log']

    # Only proceed if group1 is not empty
    if len(group1) > 0:
        # Calculate the mean of each group for the current column
        mean_group0 = group0.mean()
        mean_group1 = group1.mean()

        # Perform the independent t-test on the current column
        t_statistic, p_value = stats.ttest_ind(group0, group1)

        # Calculate Cohen's d for the current column
        cohen_d = pg.compute_effsize(group0, group1, eftype='cohen')

        # Calculate the sample size for each group
        n0 = len(group0)
        n1 = len(group1)

        # Calculate the standard deviation, standard error, and confidence interval for each group
        std_dev0 = np.std(group0)
        std_dev1 = np.std(group1)
        std_err0 = sm.stats.DescrStatsW(group0).std_mean
        std_err1 = sm.stats.DescrStatsW(group1).std_mean
        ci_lower0, ci_upper0 = sm.stats.DescrStatsW(group0).tconfint_mean()
        ci_lower1, ci_upper1 = sm.stats.DescrStatsW(group1).tconfint_mean()

        # Add the mean and test results for the current column to the results list
        results.append((col, n0, n1, mean_group0, mean_group1, std_dev0, std_dev1, std_err0, std_err1,
                        ci_lower0, ci_lower1, ci_upper0, ci_upper1, t_statistic, p_value, cohen_d))
    else:
        print(f"Skipping {col} because it has no '1' entries.")

# Create a table to report the results
table = pd.DataFrame(results, columns=['Column', 'Group0 N', 'Group1 N', 'Mean Likes Group0', 'Mean Likes Group1',
                                        'Std. Dev. Group0', 'Std. Dev. Group1', 'Std. Err. Group0', 'Std. Err. Group1',
                                        '95% CI Lower Group0', '95% CI Lower Group1', '95% CI Upper Group0', '95% CI Upper Group1',
                                        'T-Statistic', 'P-Value', 'Cohen\'s d'])

# Add 'Sig' column to table
table['Sig'] = ''
table.loc[table['P-Value'] < 0.05, 'Sig'] = '*'
table.loc[table['P-Value'] < 0.01, 'Sig'] = '**'
table.loc[table['P-Value'] < 0.001, 'Sig'] = '***'

table.to_csv("log_Img_Love.csv", encoding="utf-8-sig", index=False)


In [None]:
# t-test for log_Img_Care

import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import pingouin as pg

# Load the data from CSV file
df = pd.read_csv('img-analysis-dataset.csv', encoding="utf-8-sig")

# Convert the numeric columns to float type
num_cols = ['Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad', 'Angry', 'Care']
df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')

# Replace NaN values with 0 for numeric columns
df[num_cols] = df[num_cols].fillna(0)

# Replace boolean values in columns to be tested with 0 and 1
cols_to_test = ['animals_animal_homes', 'animals_animals_in_general', 
                'animals_dogs', 'animals_farm_animals', 'animals_pets', 'animals_reptiles_and_amphibians', 
                'animals_wild_mammals', 'animals_working_animals', 'birds_bird_homes', 'birds_birds',
                'farming_growing_crops', 'farming_on_the_farm', 'fish_and_shellfish_fish', 'fish_and_shellfish_other_sea_creatures', 
                'fish_and_shellfish_shellfish', 'geography_coastlines_and_the_sea', 'geography_mountains_and_valleys', 
                'geography_other_geographic_features', 'geography_rivers_and_lakes',
                'insects,_worms,_etc._insect_forms', 'insects,_worms,_etc._insect_homes', 'insects,_worms,_etc._insects', 'insects,_worms,_etc._other_invertebrates', 
                'insects,_worms,_etc._spiders,_etc.', 'plants_and_trees_plants', 'plants_and_trees_trees', 
                'the_environment_conservation', 'weather_rain', 'weather_sky', 'weather_snow_and_ice', 'weather_temperature', 
                'weather_the_atmosphere', 'weather_wind']

df[cols_to_test] = df[cols_to_test].replace({False: 0, True: 1})

results = []

# Iterate over the columns in "cols_to_test"
for col in cols_to_test:
    group0 = df[df[col] == 0]['Care_log']
    group1 = df[df[col] == 1]['Care_log']

    # Only proceed if group1 is not empty
    if len(group1) > 0:
        # Calculate the mean of each group for the current column
        mean_group0 = group0.mean()
        mean_group1 = group1.mean()

        # Perform the independent t-test on the current column
        t_statistic, p_value = stats.ttest_ind(group0, group1)

        # Calculate Cohen's d for the current column
        cohen_d = pg.compute_effsize(group0, group1, eftype='cohen')

        # Calculate the sample size for each group
        n0 = len(group0)
        n1 = len(group1)

        # Calculate the standard deviation, standard error, and confidence interval for each group
        std_dev0 = np.std(group0)
        std_dev1 = np.std(group1)
        std_err0 = sm.stats.DescrStatsW(group0).std_mean
        std_err1 = sm.stats.DescrStatsW(group1).std_mean
        ci_lower0, ci_upper0 = sm.stats.DescrStatsW(group0).tconfint_mean()
        ci_lower1, ci_upper1 = sm.stats.DescrStatsW(group1).tconfint_mean()

        # Add the mean and test results for the current column to the results list
        results.append((col, n0, n1, mean_group0, mean_group1, std_dev0, std_dev1, std_err0, std_err1,
                        ci_lower0, ci_lower1, ci_upper0, ci_upper1, t_statistic, p_value, cohen_d))
    else:
        print(f"Skipping {col} because it has no '1' entries.")

# Create a table to report the results
table = pd.DataFrame(results, columns=['Column', 'Group0 N', 'Group1 N', 'Mean Likes Group0', 'Mean Likes Group1',
                                        'Std. Dev. Group0', 'Std. Dev. Group1', 'Std. Err. Group0', 'Std. Err. Group1',
                                        '95% CI Lower Group0', '95% CI Lower Group1', '95% CI Upper Group0', '95% CI Upper Group1',
                                        'T-Statistic', 'P-Value', 'Cohen\'s d'])

# Add 'Sig' column to table
table['Sig'] = ''
table.loc[table['P-Value'] < 0.05, 'Sig'] = '*'
table.loc[table['P-Value'] < 0.01, 'Sig'] = '**'
table.loc[table['P-Value'] < 0.001, 'Sig'] = '***'

table.to_csv("log_Img_Care.csv", encoding="utf-8-sig", index=False)


In [None]:
#Regression for img analysis

import pandas as pd
import statsmodels.api as sm
import xlsxwriter

# Load data
df = pd.read_csv("img-analysis-dataset.csv")

# Independent variables: Subcategories
independent_vars = [
    'animals_animal_homes', 'animals_animals_in_general', 'animals_dogs', 'animals_farm_animals', 
    'animals_pets', 'animals_reptiles_and_amphibians', 'animals_wild_mammals', 'animals_working_animals', 
    'birds_bird_homes', 'birds_birds', 'fish_and_shellfish_fish', 'fish_and_shellfish_other_sea_creatures', 
    'fish_and_shellfish_shellfish', 'plants_and_trees_plants', 'plants_and_trees_trees', 'the_environment_conservation'
]

# Convert True/False columns to 1/0
for var in independent_vars:
    df[var] = df[var].astype(int)

# Add constant to the independent variables
X = sm.add_constant(df[independent_vars])

# Create a writer object to write data into Excel format
writer = pd.ExcelWriter("log_img1Regression.xlsx", engine='xlsxwriter')

dependent_vars = ['Likes_log', 'Shares_log', 'Comments_log', 'Love_log', 'Care_log']

# Include the constant term for the extraction of statistics
all_vars = ['const'] + independent_vars

for dv in dependent_vars:
    temp_df = df.dropna(subset=[dv])
    Y = temp_df[dv].astype(int)
    X_temp = X.loc[temp_df.index]
    
    # Calculate standard deviations for Y and X
    std_Y = Y.std()
    std_X = X_temp.std()
    
    # Fit the model using OLS (linear regression)
    model = sm.OLS(Y, X_temp)
    result = model.fit()

    # Extract, round, and save statistical values for each independent variable
    iv_stats = {
        'IV': [],
        'unstandardized_B': [],
        'SE': [],
        'standardized_beta': [],
        't': [],
        'p': []
    }

    for var in all_vars:  
        iv_stats['IV'].append(var)
        iv_stats['unstandardized_B'].append(round(result.params[var], 3))
        iv_stats['SE'].append(round(result.bse[var], 3))
        
        # Calculate the standardized coefficient
        if var != 'const':  # Skip the constant term
            iv_stats['standardized_beta'].append(round(result.params[var] * (std_X[var] / std_Y), 3))
        else:
            iv_stats['standardized_beta'].append(None)
        
        iv_stats['t'].append(round(result.tvalues[var], 3))
        iv_stats['p'].append(round(result.pvalues[var], 3))

    iv_df = pd.DataFrame(iv_stats)
    iv_df.to_excel(writer, sheet_name=f"{dv}_Stats", index=False, startrow=0)

    # Extract and round statistical values for the regression model
    regression_stats = {
        'Dependent_Var': [dv],
        'F': [round(result.fvalue, 3)],
        'df_regression': [round(result.df_model, 3)],
        'df_residual': [round(result.df_resid, 3)],
        'p': [round(result.f_pvalue, 3)],
        'R_square': [round(result.rsquared, 3)],
        'adjusted_R_square': [round(result.rsquared_adj, 3)]
    }

    regression_df = pd.DataFrame(regression_stats)
    
    # Write regression stats to Excel, one row after the IV stats (with one row as buffer)
    start_row = iv_df.shape[0] + 2
    regression_df.to_excel(writer, sheet_name=f"{dv}_Stats", index=False, startrow=start_row)

# Save the Excel file
writer.save()
