In [21]:
import requests
from datetime import datetime, timedelta
import zipfile
import os
import numpy as np
import pandas as pd
import json
import shutil
import subprocess

def get_met(tease):

    

    proxies = {
    'http': f'http:zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
    'https': f'http://zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
    }



    url = 'https://jatos.psychology.uiowa.edu/jatos/api/v1/results/metadata'
    headers = {
        'accept': 'application/json',
        'Authorization': 'Bearer jap_5ThOJ14yf7z1EPEUpAoZYMWoETZcmJk305719',
        'Content-Type': 'application/json',
    }
    data = {
        'studyIds': [999]
    }

    response = requests.post(url, headers=headers, json=data, #proxies=proxies
    )

    # If you want to print the response
    print(response.status_code)
    print(response.json())
    response_json = response.json()

    response = response_json

    # Get the current timestamp
    current_time = datetime.now().timestamp() * 1000  # Convert to milliseconds
    one_minute_ago = current_time - (60 * 60 * 24 * 7 * 1000)
    # Initialize an empty list to store study result IDs
    study_result_ids = []

    # Iterate through the data to check conditions and collect study result IDs
    for study in response['data']:
        for study_result in study['studyResults']:
            if study_result['studyState'] == 'FINISHED' and study_result['endDate'] >= one_minute_ago:
                study_result_ids.append(study_result['id'])
                break  # No need to check other component results for this study result

    # Print the list of study result IDs
    print(study_result_ids)

    if len(study_result_ids) == 0:
        print("No study results found.")
        exit()
    
    return study_result_ids

def get_data(study_result_ids, tease):


    

    proxies = {
    'http': f'http://zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
    'https': f'http://zjgilliam:{tease}@proxy.divms.uiowa.edu:8888',
    }

    headers = {
        'accept': 'application/octet-stream',
        'Authorization': 'Bearer jap_5ThOJ14yf7z1EPEUpAoZYMWoETZcmJk305719',
        'Content-Type': 'application/json',
    }
    # Get the data for each study result
    datas = {
        'studyIds': [999],
        'studyResultIds': study_result_ids
    }

    url = 'https://jatos.psychology.uiowa.edu/jatos/api/v1/results/data'
    response = requests.post(url, headers=headers, json=datas, #proxies=proxies
    )
    # Debugging information
    print(f"Status Code: {response.status_code}")


    # Save the unzip file and save .txt file to the current directory
    if response.status_code == 200:
        jrzip_file = 'response.jrzip'
        with open(jrzip_file, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded file: {jrzip_file}")

        # Verify if the file is a valid zip file
        if zipfile.is_zipfile(jrzip_file):
            print("The file is a valid zip file.")

            # Create a new zip file with only the desired files
            filtered_jrzip_file = 'filtered_response.jrzip'
            with zipfile.ZipFile(jrzip_file, 'r') as zip_ref:
                with zipfile.ZipFile(filtered_jrzip_file, 'w') as filtered_zip_ref:
                    for zip_info in zip_ref.infolist():
                        # Check if the filename contains any of the study_result_ids
                        if any(str(study_result_id) in zip_info.filename for study_result_id in study_result_ids):
                            filtered_zip_ref.writestr(zip_info, zip_ref.read(zip_info.filename))
            print(f"Filtered zip file created: {filtered_jrzip_file}")

            # Extract the filtered zip file
            with zipfile.ZipFile(filtered_jrzip_file, 'r') as zip_ref:
                zip_ref.extractall('./../../data/raw')
            print(f"Unzipped file: {filtered_jrzip_file}")

            # Optionally, remove the original and filtered zip files after extraction
            os.remove(jrzip_file)
            os.remove(filtered_jrzip_file)

            # Walk through the directory and find all .txt files, save paths to a list
            txt_files = []
            for root, dirs, files in os.walk("./../../data/raw"):
                for file in files:
                    if file.endswith(".txt"):
                        txt_files.append(os.path.join(root, file))
            print(f"Found {len(txt_files)} .txt files.")
            #move the text file to the data folder

        else:
            print("The file is not a valid zip file.")
    else:
        print("Failed to retrieve or save the file.")
        print(f"Response Text: {response.text}")

    return txt_files

In [22]:
def get_next_run_dir(sub):
    base_dir = f'./../../data/{sub}/processed'
    i = 1
    while os.path.exists(os.path.join(base_dir, f'run-{i}')):
        i += 1
    return os.path.join(base_dir, f'run-{i}')



def convert_beh(txt_files):

            
    dic = {}
    for idx, b in enumerate(txt_files, start=1):
        tweets = []
        with open(b, 'r') as file:
            for line in file:
                tweets.append(json.loads(line))
        dic[idx] = pd.json_normalize(tweets, 'data')

    print("Data dictionaries created.")

    all_paths = []
    for i in dic:
        df = dic[i]
        for sub in np.unique(df['multichar_response']):
            print(f"Processing subject: {sub}")
            # Filter data for this subject
            sub_df = df[df['multichar_response'] == sub]

            # Get next run directory
            run_dir = get_next_run_dir(sub)
            os.makedirs(run_dir, exist_ok=True)

            # Build the CSV file path
            csv_filename = f"{sub}.csv"
            csv_path = os.path.join(run_dir, csv_filename)

            # Save CSV
            sub_df.to_csv(csv_path, index=False)
            print(f"Saved {csv_path}")

            all_paths.append(csv_path)

    return all_paths

In [25]:
def move_txt(txt_files):
    dic = {}
    for file_path in txt_files:
        tweets = []
        with open(file_path, 'r') as file:
            # Read text file and append each line as a JSON object to tweets
            for line in file:
                tweets.append(json.loads(line))
        dic[file_path] = pd.json_normalize(tweets, 'data')

    for file_path, df in dic.items():
        for sub in np.unique(df['multichar_response']):
            print(sub)
            target_dir = f'./../../data/{sub}/raw'
            os.makedirs(target_dir, exist_ok=True)
            # Save the DataFrame to a text file in the target directory
            output_file = os.path.join(target_dir, os.path.basename(file_path))
            with open(output_file, 'w') as f:
                f.write(df.to_string(index=False))
            print(f"Saved {output_file} to {target_dir}")
        os.remove(file_path)
        print(f"Removed {file_path}")

    # Move the directory removal outside the loop
    for root, dirs, files in os.walk('./../../data/raw'):
        for d in dirs:
            shutil.rmtree(os.path.join(root, d))
    # Optionally, remove the raw directory itself
    os.rmdir('./../../data/raw')

    return None


In [26]:
text_files = get_data(get_met('zjgill'), 'zjgilliam')
convert_beh(text_files)
move_txt(text_files)

200
{'apiVersion': '1.0.1', 'data': [{'studyId': 999, 'studyUuid': '93008b03-4ff3-4190-9db1-d71fb2c74af2', 'studyTitle': 'Pipe_Comparison ', 'studyResults': [{'id': 12256, 'uuid': 'b1fb7f82-e7b5-4099-aa83-15ba25d3dae8', 'studyCode': 'm752BMNbAjG', 'startDate': 1731022807000, 'endDate': 1731022880000, 'duration': '00:01:13', 'lastSeenDate': 1731022808000, 'studyState': 'FINISHED', 'workerId': 5187, 'workerType': 'Jatos', 'batchId': 1036, 'batchUuid': '784275e0-000b-4cd2-93f2-bd306b55dbcb', 'batchTitle': 'Default', 'groupId': None, 'componentResults': [{'id': 11864, 'componentId': 1000, 'componentUuid': 'b7583473-ed28-4dec-8175-f03157901667', 'startDate': 1731022808000, 'endDate': 1731022880000, 'duration': '00:01:12', 'componentState': 'FINISHED', 'path': '/study_result_12256/comp-result_11864', 'data': {'size': 38773, 'sizeHumanReadable': '38.8 kB'}, 'files': []}]}, {'id': 12258, 'uuid': '2973436b-3e6e-4146-bdaa-99b0dffc5a43', 'studyCode': 'cYe0OluHqvF', 'startDate': 1731024546000, 'en

In [41]:
csv = pd.DataFrame(columns=['sub_name', 'composite', 'rank'])
csv.to_csv('./../../data/results.csv')

In [50]:
df = pd.read_csv('./../../data/results.csv')
df

Unnamed: 0.1,Unnamed: 0,sub_name,composite,rank


In [None]:
#remove Unnamed: 0, composite_x and composite_y

df = df.drop(columns=['Unnamed: 0'])

KeyError: "['Unnamed: 0', 'index'] not found in axis"

In [52]:
df

Unnamed: 0,sub_name,composite,rank


In [53]:
df.to_csv('./../../data/results.csv', index=False)