### Convert the raw test.csv into sequences of length 200. 

In [None]:
import pandas as pd 
import os

In [None]:
main_dir = './'
orig_test_name = 'test_quelevel.csv'
test_seq_name = 'test_sequences_quelevel.csv'

In [None]:
# Load the CSV file into a DataFrame
df = pd.read_csv(os.path.join(main_dir, orig_test_name))

# Display the first few rows of the DataFrame to understand its structure
df.head()

In [None]:
# Columns to process
columns = ['questions', 'concepts', 'responses', 'timestamps']

In [None]:
def process_user_data(user_row, max_length=200):
    """Process a single user's data to ensure each list has a length of 200, padding or splitting as necessary."""
    processed_rows = []
    sequences = [user_row[col].split(',') for col in columns]  # Convert string representations to lists
    
    # Determine the maximum number of chunks needed across all columns
    max_chunks = max((len(seq) + max_length - 1) // max_length for seq in sequences)

    for chunk_idx in range(max_chunks):
        new_row = user_row.copy()
        for i, col in enumerate(columns):
            # Extract the chunk for this column
            chunk = sequences[i][chunk_idx*max_length : (chunk_idx+1)*max_length]
            # Pad the chunk if it's shorter than max_length
            if len(chunk) < max_length:
                chunk += ['-1'] * (max_length - len(chunk))
            new_row[col] = ','.join(chunk)
        
        # Create selectmask for this chunk
        selectmask = [1 if x != '-1' else -1 for x in chunk]
        new_row['selectmasks'] = ','.join(map(str, selectmask))
        
        processed_rows.append(new_row)

    return processed_rows


In [None]:
# New DataFrame to hold the processed data, including the fold and uid columns
processed_rows = []

for _, row in df.iterrows():
    processed_rows.extend(process_user_data(row))

# Convert the list of dictionaries to a DataFrame
processed_df = pd.DataFrame(processed_rows)

In [None]:
processed_df.head()

In [None]:
processed_df = processed_df.reset_index()

In [None]:
processed_df.to_csv(os.path.join(main_dir, test_seq_name), index=False)