In [1]:
pip install gspread oauth2client pandas

Collecting gspread
  Using cached gspread-6.2.0-py3-none-any.whl.metadata (11 kB)
Collecting oauth2client
  Using cached oauth2client-4.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting google-auth-oauthlib>=0.4.1 (from gspread)
  Using cached google_auth_oauthlib-1.2.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting httplib2>=0.9.1 (from oauth2client)
  Using cached httplib2-0.22.0-py3-none-any.whl.metadata (2.6 kB)
Collecting requests-oauthlib>=0.7.0 (from google-auth-oauthlib>=0.4.1->gspread)
  Using cached requests_oauthlib-2.0.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib>=0.4.1->gspread)
  Using cached oauthlib-3.2.2-py3-none-any.whl.metadata (7.5 kB)
Using cached gspread-6.2.0-py3-none-any.whl (59 kB)
Using cached oauth2client-4.1.3-py2.py3-none-any.whl (98 kB)
Using cached google_auth_oauthlib-1.2.1-py2.py3-none-any.whl (24 kB)
Using cached httplib2-0.22.0-py3-none-any.whl (96 kB)
Using cached reque

In [2]:
# 📌 Cell 1 – Input variable names
variable_names = [
    "ST059Q02JA",
    "ST230Q01JA",
    "ST251Q04JA",
    "ST255Q01JA",
    "ST268Q04JA",
    "ST259Q01JA",
    "ST251Q06JA",
    "ST263Q02JA",
    "ST296Q04JA",
    "FL170Q02JA",
    "ST256Q08JA",
    "ST292Q03JA",
    "SC217Q04JA",
    "SC211Q03JA",
    "ST290Q03WA",
    "SC189Q02WA",
    "ST268Q01JA",
    "ST253Q01JA",
    "GRADE",
    "ST004D01T"
]


In [3]:
# OLD AUTH BLOCK - replace this
# sheet = client.open(SHEET_NAME).sheet1
# data = sheet.get_all_values()

# ✅ NEW BLOCK
import pandas as pd
import requests
import io

SHEET_ID = "1nPLjWPaHIK-fFRYxoHTwdl6LL3DBi_vLqNIGs5KWHOQ"
csv_url = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv"

response = requests.get(csv_url)
df = pd.read_csv(io.StringIO(response.text), header=None)
data = df.values.tolist()
#df.to_csv("codebook.csv", index=False)

In [4]:
# 📌 Cell 3 – Process and Generate Output

import pandas as pd
import json

stop_words = {"Valid Skip", "Not Applicable", "Invalid", "No Response", "Missing"}
output_rows = []

for variable in variable_names:
    header_row_index = None

    # Find the header row
    for i, row in enumerate(data):
        if len(row) > 0 and str(row[0]).strip() == variable:
            header_row_index = i
            break

    if header_row_index is None:
        print(f"⚠️ Variable '{variable}' not found.")
        continue

    row = data[header_row_index]
    variable_label = str(row[1]).strip() if len(row) > 1 else ""

    # Check what the next row's text is
    next_row = data[header_row_index + 1] if header_row_index + 1 < len(data) else []
    first_text = str(next_row[7]).strip() if len(next_row) > 7 else ""

    # 👉 CASE 1: Use range if next row is stop word and range is in header
    if first_text in stop_words:
        range_text = str(row[5]).strip() if len(row) > 5 else ""
        try:
            print(f"📏 Range for '{variable}' = '{range_text}'")
            min_val, max_val = map(float, range_text.split('-'))
            answers_json = json.dumps({
                "range": {
                    "min": int(min_val),
                    "max": int(max_val),
                    "step": 1
                }
            }, ensure_ascii=False)
        except Exception as e:
            print(f"❌ Range parsing failed for variable '{variable}':", e)
            continue

    # 👉 CASE 2: Collect explicit answer options from cols G & H
    else:
        answers = []
        row_index = header_row_index + 1

        while row_index < len(data):
            row = data[row_index]
            if len(row) < 8:
                break
            print(f"📏 H for '{variable}' = '{row[7]}'")
            value = str(row[6]).strip()
            text = str(row[7]).strip()

            if text in stop_words:
                break

            # ✅ Allow 'None' as valid text (don’t drop it)
            if text.lower() in {"nan", ""}:
                text = 'None'

            try:
                answers.append({"value": int(float(value)), "text": text})
            except ValueError:
                pass  # Skip bad value rows

            row_index += 1

        answers_json = json.dumps(answers, ensure_ascii=False)

    # Append to output
    output_rows.append({
        "Variable_name": variable,
        "Variable_label": variable_label,
        "Variable_context": "",
        "Variable_answers": answers_json,
        "Recommendation_Threshold": "",
        "Recommended_Intervention": "",
        "Hide": "",
        "Hidden_Value": "",
        "Admin_Only": ""
    })


# Create DataFrame and write to CSV
df_output = pd.DataFrame(output_rows)
df_output

📏 Range for 'ST059Q02JA' = '0.0-90.0'
📏 H for 'ST230Q01JA' = 'nan'
📏 H for 'ST230Q01JA' = 'One'
📏 H for 'ST230Q01JA' = 'Two'
📏 H for 'ST230Q01JA' = 'Three or more'
📏 H for 'ST230Q01JA' = 'Valid Skip'
📏 H for 'ST251Q04JA' = 'nan'
📏 H for 'ST251Q04JA' = 'One'
📏 H for 'ST251Q04JA' = 'Two'
📏 H for 'ST251Q04JA' = 'Three or more'
📏 H for 'ST251Q04JA' = 'Valid Skip'
📏 H for 'ST255Q01JA' = 'There are no books.'
📏 H for 'ST255Q01JA' = '1-10 books'
📏 H for 'ST255Q01JA' = '11-25 books'
📏 H for 'ST255Q01JA' = '26-100 books'
📏 H for 'ST255Q01JA' = '101-200 books'
📏 H for 'ST255Q01JA' = '201-500 books'
📏 H for 'ST255Q01JA' = 'More than 500 books'
📏 H for 'ST255Q01JA' = 'Valid Skip'
📏 H for 'ST268Q04JA' = 'Strongly disagree'
📏 H for 'ST268Q04JA' = 'Disagree'
📏 H for 'ST268Q04JA' = 'Agree'
📏 H for 'ST268Q04JA' = 'Strongly agree'
📏 H for 'ST268Q04JA' = 'Valid Skip'
📏 H for 'ST259Q01JA' = '1'
📏 H for 'ST259Q01JA' = '10'
📏 H for 'ST259Q01JA' = '2'
📏 H for 'ST259Q01JA' = '3'
📏 H for 'ST259Q01JA' = '4'
📏 H

Unnamed: 0,Variable_name,Variable_label,Variable_context,Variable_answers,Recommendation_Threshold,Recommended_Intervention,Hide,Hidden_Value,Admin_Only
0,ST059Q02JA,Total number of [class periods] per week for a...,,"{""range"": {""min"": 0, ""max"": 90, ""step"": 1}}",,,,,
1,ST230Q01JA,"How many siblings (including brothers, sisters...",,"[{""value"": 1, ""text"": ""None""}, {""value"": 2, ""t...",,,,,
2,ST251Q04JA,How many of these items are there at your [hom...,,"[{""value"": 1, ""text"": ""None""}, {""value"": 2, ""t...",,,,,
3,ST255Q01JA,How many books are there in your [home]?,,"[{""value"": 1, ""text"": ""There are no books.""}, ...",,,,,
4,ST268Q04JA,Agree/disagree: Mathematics is easy for me.,,"[{""value"": 1, ""text"": ""Strongly disagree""}, {""...",,,,,
5,ST259Q01JA,Now think about where you would place your fam...,,"[{""value"": 1, ""text"": ""1""}, {""value"": 10, ""tex...",,,,,
6,ST251Q06JA,How many of these items are there at your [hom...,,"[{""value"": 1, ""text"": ""None""}, {""value"": 2, ""t...",,,,,
7,ST263Q02JA,Agree/disagree: Your intelligence is something...,,"[{""value"": 1, ""text"": ""Strongly disagree""}, {""...",,,,,
8,ST296Q04JA,How much time spent on homework in: Total time...,,"[{""value"": 1, ""text"": ""Up to 30 minutes a day""...",,,,,
9,ST256Q08JA,How many of these books at [home]: [Technical ...,,"[{""value"": 1, ""text"": ""None""}, {""value"": 2, ""t...",,,,,


In [5]:
df_output.to_csv("variable_metadata_output.csv", index=False)
print("✅ Output saved to 'variable_metadata_output_v3.csv'")

✅ Output saved to 'variable_metadata_output_v3.csv'
