In [1]:
import pandas as pd
from docx import Document
import os

In [2]:
file_path = 'android_data.csv'
data = pd.read_csv(file_path, encoding='ISO-8859-1')

In [3]:
pid_column = 'PID'  
date_column = 'date' 
columns_of_interest = [
    'parentEmotion', 'question1', 'question1Other', 'question_feature', 
    'question_feature_other', 'question_engagement', 'question_engagement_other', 
    'question2', 'question4', 'question5', 'question6'
]

In [4]:
data_filtered = data[[pid_column, date_column] + columns_of_interest]

In [5]:
data_filtered[pid_column] = data_filtered[pid_column].str.strip().str.lower()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_filtered[pid_column] = data_filtered[pid_column].str.strip().str.lower()


In [6]:
grouped_data = data_filtered.groupby(pid_column).apply(lambda df: df.to_dict('records'))

In [7]:
output_folder = 'cleaned_android'
os.makedirs(output_folder, exist_ok=True)

In [8]:
question_mapping = {
    'parentEmotion': 'Parent emotion',
    'question1': 'How would you describe the emotion you are feeling? (Select all that apply)',
    'question1Other': 'Other (emotion)',
    'question_feature': 'What were you interacting with on Instagram?',
    'question_feature_other': 'Other (feature)',
    'question_engagement': 'How were you interacting on Instagram?',
    'question_engagement_other': 'Other (interaction)',
    'question2': 'What made you feel that way?',
    'question4': 'How do you feel that the activity/interaction you described above impacts your digital well-being?',
    'question5': 'Do you feel that you want to experience more or less of the activity/interaction you described above?',
    'question6': 'Are you willing to share your screenshot with the research team?'
}

In [9]:
for pid, entries in grouped_data.items():
    try:
        doc = Document()
        doc.add_heading(f'Participant ID: {pid}', 0)
        
        for entry in entries:
            entry_date = entry.get(date_column, 'Unknown Date')
            doc.add_heading(f'[Entry: {entry_date}]', level=1)
            for col in columns_of_interest:
                question_text = question_mapping.get(col, col)  # Get the question text, default to col if not found
                if entry.get(col):
                    doc.add_paragraph(f"{question_text}: \n- {entry[col]}")
            doc.add_paragraph('\n')
        
        file_name = os.path.join(output_folder, f'_{pid}.docx')
        doc.save(file_name)
        
    except Exception as e:
        print(f"An error occurred while processing PID: {pid}. Error: {e}")

print("Export completed.")

Export completed.
