In [2]:
import csv
import requests
import pandas as pd
import json
import logging

# Set up basic configuration for logging
from concurrent.futures import ThreadPoolExecutor, as_completed

def fetch_assignment_data_for_student_concurrent(student_id, base_url, token):
    headers = {"Authorization": f"Bearer {token}"}
    full_url = base_url.format(student_id)
    try:
        response = requests.get(full_url, headers=headers)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        assignment_data = response.json()  # Assign the JSON response data to assignment_data
        # Debug output
        #print(f"Data retrieved for student {student_id}: {assignment_data}")
        return student_id, assignment_data        
    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch data for student {student_id}: {e}")
        return student_id, None

def fetch_data_and_save_to_excel_concurrent(csv_path, base_url, token, json_file, excel_file, max_workers=10):
    data = []
    student_ids = []

    with open(csv_path, mode='r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        student_id_list = [row['Student ID'] for row in reader]

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_student_id = {executor.submit(fetch_assignment_data_for_student_concurrent, student_id, base_url, token): student_id for student_id in student_id_list}
        
        for future in as_completed(future_to_student_id):
            student_id, assignment_data = future.result()
            if assignment_data is not None:
                data.extend(assignment_data)
                student_ids.extend([student_id] * len(assignment_data))
            else:
                print(f"No data fetched for student ID: {student_id}.")

    with open(json_file, 'w') as jsonfile:
        json.dump(data, jsonfile)
    
    if data:  # Proceed only if data is not empty
        df = pd.DataFrame(data)
        df['Student ID'] = student_ids
        cols = df.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        df = df[cols]
        df.to_excel(excel_file, index=False)
    else:
        print("No data to save.")

# Here's where you replace the placeholders with your actual values:
csv_file_path = 'student_roster.csv'  # Replace 'path/to/your/student_roster.csv' with the actual path to your CSV file.
api_base_url = "https://usflearn.instructure.com/api/v1/courses/1857108/analytics/users/{}/assignments"  # Use your actual API base URL.
bearer_token = "13~D9Rs8zX2eEumEXC0knPDXdIXQLbq4cKVwVnYGmBYEgBFPMI2Yu1suCAk1l1E90Uq"  # Use your actual bearer token.
output_json_file = 'assignment_data.json'  # Specify your desired filename or path for the JSON output.
output_excel_file = 'assignment_data.xlsx'  # Specify your desired filename or path for the Excel output.

# Finally, call the function with the parameters you've set.
fetch_data_and_save_to_excel_concurrent(
    csv_path=csv_file_path,
    base_url=api_base_url,
    token=bearer_token,
    json_file=output_json_file,
    excel_file=output_excel_file
)


In [1]:
import pandas as pd

# Load the Excel file into a pandas DataFrame
df = pd.read_excel('assignment_data.xlsx')  # Update this path to your actual file path

# Get unique student IDs
unique_student_ids = df['Student ID'].unique()

# Count the number of unique student IDs
unique_student_id_count = len(unique_student_ids)

#print(f"Unique Student IDs: {unique_student_ids}")
print(f"Count of Unique Student IDs: {unique_student_id_count}")


Count of Unique Student IDs: 704


In [3]:
import pandas as pd
from ast import literal_eval

# Load the data from the Excel file
df = pd.read_excel('assignment_data.xlsx')  # Replace with your file path

# Define a function to parse the submission column if it contains a stringified dictionary
def parse_submission(submission):
    if pd.isnull(submission):
        return {}  # Return an empty dictionary if the value is NaN
    try:
        return literal_eval(submission)  # Use literal_eval to parse the string as a dictionary
    except (ValueError, SyntaxError):
        return {}  # Return an empty dictionary if there is any error

# Apply the function to the submission column
df['submission'] = df['submission'].apply(parse_submission)

# Extract the values from the submission column into separate columns
df['posted_at'] = df['submission'].apply(lambda x: x.get('posted_at'))
df['score'] = df['submission'].apply(lambda x: x.get('score'))
df['submitted_at'] = df['submission'].apply(lambda x: x.get('submitted_at'))

# Now you can drop the original 'submission' column if you want
df.drop('submission', axis=1, inplace=True)

# Save the new DataFrame to a new Excel file
#df.to_excel('modified_assignment_data.xlsx')  # Replace with your desired file path
# Define the columns to keep
columns_to_keep = ['Student ID', 'title', 'points_possible', 'due_at', 'status', 'score', 'submitted_at']

# Drop all other columns except the ones listed in columns_to_keep
df = df[columns_to_keep]

# Display the resulting DataFrame
print(df.head())
# Optionally, save the resulting DataFrame back to an Excel file
df.to_excel('modified_assignment_data.xlsx', index=False)  # Provide the desired path for your new file





   Student ID                                              title  \
0     5037219                        Complete Biz Cafe Case Quiz   
1     5037219                     Complete Biz Cafe Content Quiz   
2     5037219  Skills Training Module 01 Impact of Digital Te...   
3     5037219                  Skills Training Module 02 The Web   
4     5037219        Skills Training Module 03 Computer Hardware   

   points_possible                due_at    status  score  \
0               10  2024-02-05T04:59:00Z      late   10.0   
1               10  2024-02-05T04:59:00Z      late    9.0   
2              100  2024-02-12T04:59:00Z  floating    0.0   
3              100  2024-02-12T04:59:00Z  floating    0.0   
4              100  2024-02-12T04:59:00Z  floating    0.0   

           submitted_at  
0  2024-02-18T20:39:50Z  
1  2024-02-18T20:11:20Z  
2                  None  
3                  None  
4                  None  
