In [82]:
import pandas as pd
import os

# Define the array of participant values to filter by
participant_values = [3, 30, 36, 38, 90, 93, 119, 146, 167, 181, 216]

folder_path = 'native'  # Replace with the actual folder path
new_folder_path = 'programming_none'

# Iterate through all files in the folder
for file_name in os.listdir(folder_path):
    # Check if the file is a CSV file
    if file_name.endswith('.csv'):
        # Build the full file path
        file_path = os.path.join(folder_path, file_name)
        
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Check if 'participants' column exists
        if 'participant' in df.columns:
            # Filter the rows where 'participants' is in the participant_values array
            filtered_df = df[df['participant'].isin(participant_values)]
            
            # If there are any rows after filtering, save them to a new file
            if not filtered_df.empty:
                for participant in filtered_df['participant'].unique():
                    # Create a new file name based on the participant value
                    new_file_name = f"{'p'}_{participant}_{'fixations'}"
                    new_file_path = os.path.join(new_folder_path, new_file_name)
                    
                    # Save the filtered data to a new CSV file
                    filtered_df[filtered_df['participant'] == participant].to_csv(new_file_path, index=False)
                    print(f"Saved filtered data for participant {participant} to {new_file_name}")


Saved filtered data for participant 101 to p_101_fixations
Saved filtered data for participant 102 to p_102_fixations
Saved filtered data for participant 104 to p_104_fixations
Saved filtered data for participant 106 to p_106_fixations
Saved filtered data for participant 109 to p_109_fixations
Saved filtered data for participant 10 to p_10_fixations
Saved filtered data for participant 110 to p_110_fixations
Saved filtered data for participant 112 to p_112_fixations
Saved filtered data for participant 113 to p_113_fixations
Saved filtered data for participant 114 to p_114_fixations
Saved filtered data for participant 115 to p_115_fixations
Saved filtered data for participant 116 to p_116_fixations
Saved filtered data for participant 118 to p_118_fixations
Saved filtered data for participant 122 to p_122_fixations
Saved filtered data for participant 123 to p_123_fixations
Saved filtered data for participant 126 to p_126_fixations
Saved filtered data for participant 128 to p_128_fixations

In [85]:
import os
import pandas as pd

# Define the directory containing the files
folder_path = 'programming_none'  # Replace with the actual folder path

# Iterate through all files in the folder
for file_name in os.listdir(folder_path):
    # Build the full file path
    file_path = os.path.join(folder_path, file_name)
    
    try:
        # Read the raw content of the file
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        
        # Process the content and convert it into CSV format
        # For example, assuming content is separated by newlines and commas
        # You might need custom logic based on your actual content

        # Split content by lines
        lines = content.splitlines()

        # Create a list to store rows for the CSV file
        rows = []
        for line in lines:
            # Split each line by commas (or other delimiters based on your data)
            rows.append(line.split(','))  # Adjust the delimiter if needed
        
        # Create a DataFrame from the rows
        df = pd.DataFrame(rows)
        
        # Define new CSV file path with the same name but with '_new' appended
        new_file_path = os.path.join(folder_path, f"{os.path.splitext(file_name)[0]}.csv")
        
        # Write the DataFrame to a new CSV file
        df.to_csv(new_file_path, index=False, header=False)  # Set header=False if you don't want to write column headers
        
        # After successfully writing the new CSV file, delete the old file
        os.remove(file_path)
        
        print(f"File {file_name} has been processed, saved as {new_file_path}, and deleted.")
    
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")


File p_100_fixations has been processed, saved as ltr\p_100_fixations.csv, and deleted.
File p_101_fixations has been processed, saved as ltr\p_101_fixations.csv, and deleted.
File p_102_fixations has been processed, saved as ltr\p_102_fixations.csv, and deleted.
File p_103_fixations has been processed, saved as ltr\p_103_fixations.csv, and deleted.
File p_104_fixations has been processed, saved as ltr\p_104_fixations.csv, and deleted.
File p_105_fixations has been processed, saved as ltr\p_105_fixations.csv, and deleted.
File p_106_fixations has been processed, saved as ltr\p_106_fixations.csv, and deleted.
File p_107_fixations has been processed, saved as ltr\p_107_fixations.csv, and deleted.
File p_108_fixations has been processed, saved as ltr\p_108_fixations.csv, and deleted.
File p_109_fixations has been processed, saved as ltr\p_109_fixations.csv, and deleted.
File p_10_fixations has been processed, saved as ltr\p_10_fixations.csv, and deleted.
File p_110_fixations has been proc

In [87]:
import pandas as pd
import os

# Define the directory containing the CSV files
folder_path = 'programming_none'  # Replace with the actual folder path

# Create an empty list to store dataframes
all_data = []

# Iterate through all files in the folder
for file_name in os.listdir(folder_path):
    # Check if the file is a CSV file
    if file_name.endswith('.csv'):
        # Build the full file path
        file_path = os.path.join(folder_path, file_name)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Append the DataFrame to the list
        all_data.append(df)

# Concatenate all dataframes into a single dataframe
combined_df = pd.concat(all_data, ignore_index=True)

# Save the combined dataframe to a new CSV file
combined_df.to_csv('programming_none_aggregate.csv', index=False)


All CSV files have been combined into 'combined_output.csv'.


In [92]:
import pandas as pd

# List of input file names
input_files = ['programming_none_aggregate.csv', 'programming_low_aggregate.csv', 'programming_medium_aggregate.csv', 'programming_high_aggregate.csv']

# List to store the results from all files
all_results = []

# Loop over each input file
for file in input_files:
    # Read the CSV file
    df = pd.read_csv(file)

    # Iterate through each participant in the current file
    for participant in df['participant'].unique():
        # Filter the DataFrame for the current participant
        participant_data = df[df['participant'] == participant]
        
        # Initialize a counter for vertical_next_text
        regression = 0
        
        # Iterate through the rows of the participant's data (except the last row)
        for i in range(len(participant_data) - 1):
            # Get the current and next row's aoi_y values
            aoi_y_current = participant_data.iloc[i]['aoi_y']
            aoi_y_next = participant_data.iloc[i + 1]['aoi_y']
            aoi_x_current = participant_data.iloc[i]['aoi_x']
            aoi_x_next = participant_data.iloc[i + 1]['aoi_x']
            
            # Check if the conditions are met
            if ((aoi_y_current == aoi_y_next and aoi_x_current > aoi_x_next) or aoi_y_current > aoi_y_next):
                regression += 1
        
        # Store the result for the current participant and the current file
        all_results.append({'participant': participant, 'regression': regression, 'file': file})

# Convert results to a DataFrame
results_df = pd.DataFrame(all_results)

# Save the results to a new CSV file
results_df.to_csv('regression_experience.csv', index=False)


Vertical next text count for each participant from all files has been saved to 'combined_output.csv'.


In [97]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('vertical_next_experience.csv')

# Group by the 'file' column and sum the 'vertical_next_text' column
summed_df = df.groupby('file')['vertical_next'].sum().reset_index()

# Print the summed values for each file
for index, row in summed_df.iterrows():
    print(f"File: {row['file']} - Total regression: {row['vertical_next']}")


File: ltr_aggregate.csv - Total regression: 5190
File: rtl_aggregate.csv - Total regression: 239


In [78]:
import pandas as pd

# Read the four CSV files
df1 = pd.read_csv('duration_experience_programming_high_file.csv')
df2 = pd.read_csv('duration_experience_programming_medium_file.csv')
df3 = pd.read_csv('duration_experience_programming_low_file.csv')
df4 = pd.read_csv('duration_experience_programming_none_file.csv')
df5 = pd.read_csv('duration_experience_experiment_language_high_file.csv')
df6 = pd.read_csv('duration_experience_experiment_language_medium_file.csv')
df7 = pd.read_csv('duration_experience_experiment_language_low_file.csv')
df8 = pd.read_csv('duration_experience_experiment_language_none_file.csv')

# Rename the 'duration' column in each dataframe to indicate the source file
df1 = df1.rename(columns={'duration': 'Programming High'})
df2 = df2.rename(columns={'duration': 'Programming Medium'})
df3 = df3.rename(columns={'duration': 'Programming Low'})
df4 = df4.rename(columns={'duration': 'Programming None'})
df5 = df5.rename(columns={'duration': 'Experiment Language High'})
df6 = df6.rename(columns={'duration': 'Experiment Language Medium'})
df7 = df7.rename(columns={'duration': 'Experiment Language Low'})
df8 = df8.rename(columns={'duration': 'Experiment Language None'})

# Divide the 'duration' column of the third file by 12
df1['Programming High'] = df1['Programming High'] / 39
df2['Programming Medium'] = df2['Programming Medium'] / 118
df3['Programming Low'] = df3['Programming Low'] / 45
df4['Programming None'] = df4['Programming None'] / 10
df5['Experiment Language High'] = df5['Experiment Language High'] / 17
df6['Experiment Language Medium'] = df6['Experiment Language Medium'] / 98
df7['Experiment Language Low'] = df7['Experiment Language Low'] / 68
df8['Experiment Language None'] = df8['Experiment Language None'] / 29

# Merge the dataframes on the 'token' column
merged_df = df1.merge(df2, on='token', how='outer')\
              .merge(df3, on='token', how='outer')\
              .merge(df4, on='token', how='outer') \
              .merge(df5, on='token', how='outer')\
              .merge(df6, on='token', how='outer') \
              .merge(df7, on='token', how='outer')\
              .merge(df8, on='token', how='outer') 

# Save the merged dataframe to a new CSV file
merged_df.to_csv('duration_experience_combined_output.csv', index=False)

print("CSV files combined successfully into 'combined_output.csv'.")


CSV files combined successfully into 'combined_output.csv'.
