In [1]:
import nest_asyncio
nest_asyncio.apply()

import aiohttp
import asyncio
import json
import pandas as pd
from datetime import datetime  # Change the import statement
from datetime import timedelta

# Define global variables for dataframes
student_df = None
engagement_df = None
ums_att_df = None
ums_avg_att_df = None

ums_upload_avg_att=None

# Edoofy app information
edoofy_base_url = "https://edoofa-portal.bubbleapps.io/api/1.1/obj"
edoofy_bearer_token = "2cde31d8f48919a2db1467cc06a56132"
edoofy_headers = {'Authorization': f'Bearer {edoofy_bearer_token}'}

# UMS app information
ums_base_url = "https://edoofa-ums-90164.bubbleapps.io/version-test/api/1.1/obj"
ums_bearer_token = "786720e8eb68de7054d1149b56cc04f9"
ums_headers = {'Authorization': f'Bearer {ums_bearer_token}'}

# Asynchronous function to fetch data from a table
async def fetch_table_data(session, base_url, headers, table, constraints=None):
    records = []
    cursor = 0
    total_fetched = 0

    while True:
        params = {'limit': 100, 'cursor': cursor}
        if constraints:
            params['constraints'] = json.dumps(constraints)

        api_url = f"{base_url}/{table}"
        print(f"Fetching {table} data from {base_url}... Cursor: {cursor}")

        async with session.get(api_url, headers=headers, params=params) as response:
            if response.status != 200:
                print(f"Failed to fetch data from {table}: {await response.text()}")
                break

            data = await response.json()
            new_records = data['response']['results']
            records.extend(new_records)
            total_fetched += len(new_records)

            print(f"Fetched {len(new_records)} new records, Total fetched: {total_fetched}")

            cursor += 100

            if len(new_records) < 100:
                print(f"Exiting loop, fetched less than 100 records.")
                break

    df = pd.DataFrame(records)
    print(f"Fetched {len(df)} records for {table}.")
    return df

def map_students_to_engagement(student_df, engagement_df):
    # Ensure '_id' and 'EWYL-group-name' are in student_df
    if '_id' in student_df.columns and 'EWYL-group-name' in student_df.columns:
        student_id_to_EWYL = dict(zip(student_df['_id'], student_df['EWYL-group-name']))
        engagement_df['ewyl'] = engagement_df['student'].map(student_id_to_EWYL)
    else:
        print("Error: '_id' or 'EWYL-group-name' not in student_df columns")
    return engagement_df



def process_engagement_data(ums_att_df, engagement_df, student_df, student_id):
    # Create mappings
    student_to_kam_mapping = dict(zip(student_df['_id'], student_df['KAM-group-name']))
    student_to_ewyl_mapping = dict(zip(student_df['_id'], student_df['EWYL-group-name']))

    # Add KAM-group-name and ewyl columns to the engagement dataframe
    engagement_df['KAM-group-name'] = engagement_df['student'].map(student_to_kam_mapping)
    engagement_df['ewyl'] = engagement_df['student'].map(student_to_ewyl_mapping)

    # Find the latest date for the specific student in ums_att_df
    student_latest_ums_date = pd.to_datetime(ums_att_df[ums_att_df['ewyl-group-name'] == student_id]['date'].max())

    if pd.isnull(student_latest_ums_date):
        # Process all records from engagement_df for this student
        student_engagement_records = engagement_df[engagement_df['ewyl'] == student_id]
        print("adding all records")
        
    else:
        # Process only records newer than the latest in ums_att_df
        student_engagement_records = engagement_df[
            (engagement_df['student'] == student_id) &
            (pd.to_datetime(engagement_df['engagement-date']) > student_latest_ums_date)
        ]
        print("adding new records")
        
    return student_engagement_records
    

# Function to post processed engagement data
async def post_processed_data(session, base_url, headers, processed_data):
    api_url = f"{base_url}/Attendance"
    
    for index, row in processed_data.iterrows():
        data = {
            'admissions-group-name': row['KAM-group-name'],
            'ewyl-group-name': row['ewyl'],
            'attendance-type': row['engagement-type'],
            'present': row['daily-attendance'],
            'date': row['engagement-date']
        }
        
        async with session.post(api_url, headers=headers, json=data) as response:
            if response.status == 201:
                print(f"Successfully posted data for row {index}")
            else:
                print(f"Failed to post data for row {index}: {await response.text()}")



def get_percentage(number_of_present, total_sessions):
    if total_sessions == 0:
        return 0  # Avoid division by zero
    percent = (number_of_present / total_sessions) * 100
    #print(percent)
    return percent

# Function to check if a row exists for the current month for a student
async def check_if_row_exists(student, year, month, ums_avg_att_df):
    # Filtering the ums_avg_att_df to check if there's an existing record for the student
    existing_rows = ums_avg_att_df[
        (ums_avg_att_df['ewyl-group-name'] == student) &
        (ums_avg_att_df['year'] == year) &
        (ums_avg_att_df['month'] == month)
    ]

    if not existing_rows.empty:
        # Assuming '_id' is the column name for the row ID
        return existing_rows['_id'].iloc[0]
    else:
        return False

# Function to post a new attendance summary
async def post_new_attendance_summary(session, base_url, headers, data):
    api_url = f"{base_url}/Attendance-Summary"
    async with session.post(api_url, headers=headers, json=data) as response:
        if response.status == 201:
            print("Successfully created a new row in Attendance-Summary.")
        else:
            print(f"Failed to create a new row: {await response.text()}")

# Function to patch an existing attendance summary
async def patch_attendance_summary(session, base_url, headers, row_id, data):
    api_url = f"{base_url}/Attendance-Summary/{row_id}"
    async with session.patch(api_url, headers=headers, json=data) as response:
        if response.status == 200:
            print("Successfully updated the row in Attendance-Summary.")
        else:
            print(f"Failed to update the row: {await response.text()}")

            

async def process_avg_att(session, base_url, headers, combined_df, ums_avg_att_df, is_current_month):
    results_df = pd.DataFrame(columns=['student', 'attendance-percentage'])

    # Group by 'ewyl-group-name' and 'admissions-group-name'
    grouped = combined_df.groupby(['ewyl-group-name', 'admissions-group-name'])
    for (ewyl_group, kam_group_name), group in grouped:
        total_sessions = len(group)
        number_of_present = group['present'].sum()
        attendance_percentage = get_percentage(number_of_present, total_sessions)

        # Get the year and month for the latest engagement date
        latest_engagement_date = pd.to_datetime(group['date'].max())
        current_year = latest_engagement_date.year
        current_month = latest_engagement_date.strftime("%B")
        first_day_of_month = latest_engagement_date.replace(day=1).strftime("%Y-%m-%d")

        # Calculate avg-att-percent-till-last-month
        past_records = combined_df[
            (combined_df['ewyl-group-name'] == ewyl_group) &
            ((combined_df['date'] < first_day_of_month) | (combined_df['date'] == first_day_of_month))
        ]
        past_present = past_records['present'].sum()
        avg_att_percent_till_last_month = get_percentage(past_present, len(past_records))

        row_id = await check_if_row_exists(ewyl_group, current_year, current_month, ums_avg_att_df)

        data = {
            'ewyl-group-name': ewyl_group,
            'admissions-group-name': kam_group_name,
            'attendance-percentage': attendance_percentage,
            'avg-att-percent-till-last-month': avg_att_percent_till_last_month,
            'year': current_year,
            'month': current_month,
            'first-day-of-month': first_day_of_month
        }

        if row_id:
            await patch_attendance_summary(session, base_url, headers, row_id, data)
            print("patched")
        else:
            await post_new_attendance_summary(session, base_url, headers, data)
            print("posted")

        new_row = {'student': ewyl_group, 'attendance-percentage': attendance_percentage}
        results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)

    return results_df




async def main():
    global student_df, engagement_this_month_df, engagement_previous_months_df, ums_att_df, ums_avg_att_df
    
    async with aiohttp.ClientSession() as session:
        # Fetch data from 'ums_att'
        ums_att_df = await fetch_table_data(session, ums_base_url, ums_headers, "Attendance")
        
        ums_avg_att_df = await fetch_table_data(session, ums_base_url, ums_headers, "Attendance-Summary")
        
        # Determine the latest date from 'ums_att'
        ums_latest = pd.to_datetime(ums_att_df['date'].max())
        
        # Fetch Student table from Edoofy
        student_constraints = [           
            {'key': '_id', 'constraint_type': 'equals', 'value': '1695736497533x818400363201798900'}
        ]
        student_df = await fetch_table_data(session, edoofy_base_url, edoofy_headers, "Student", constraints=student_constraints)
        
        # Fetch engagement data where 'engagement-date' is greater than 'ums_latest'
        engagement_constraints = [
            #{'key': 'engagement-date', 'constraint_type': 'greater than', 'value': ums_latest.isoformat()},
            {'key': 'student', 'constraint_type': 'equals', 'value': '1695736497533x818400363201798900'}
        ]
        engagement_df = await fetch_table_data(session, edoofy_base_url, edoofy_headers, "Engagement", constraints=engagement_constraints)
        engagement_df = engagement_df[engagement_df['engagement-type'].isin(['IE Call', 'IE Chat', 'Activity', 'Lesson'])]
        
        # Calculate the start and end date for the till last month (excluding this month)
        latest_engagement_date = pd.to_datetime(engagement_df['engagement-date'].max())
        end_date = latest_engagement_date.replace(day=1) - timedelta(days=1)
       # start_date = end_date - timedelta(days=365)

        # Filter engagement data for the till last month
        previous_months_df = engagement_df[            
            (pd.to_datetime(engagement_df['engagement-date']) <= end_date)
        ]

        # Split the data into this month and previous months
        this_month_start = latest_engagement_date.replace(day=1)
        engagement_this_month_df = engagement_df[
            pd.to_datetime(engagement_df['engagement-date']) >= this_month_start
        ]
        engagement_previous_months_df = previous_months_df
        
        # Apply mapping to engagement dataframes
        engagement_this_month_df = map_students_to_engagement(student_df, engagement_this_month_df)
        engagement_previous_months_df = map_students_to_engagement(student_df, engagement_previous_months_df)
        
        all_processed_data = []

        for _, student_row in student_df.iterrows():
            student_id = student_row['EWYL-group-name']
            processed_df = process_engagement_data(ums_att_df, engagement_df, student_df, student_id)

            # Post processed data for each student
            if not processed_df.empty:
                await post_processed_data(session, ums_base_url, ums_headers, processed_df)
                all_processed_data.append(processed_df)
            else:
                print(f"No data to post for student ID {student_id}")
        print(all_processed_data)
        combined_df = pd.concat([ums_att_df] + all_processed_data, ignore_index=True)
        print(combined_df)
        # Process average attendance
        await process_avg_att(session, ums_base_url, ums_headers, combined_df, ums_avg_att_df, True)


await main()


Fetching Attendance data from https://edoofa-ums-90164.bubbleapps.io/version-test/api/1.1/obj... Cursor: 0
Fetched 3 new records, Total fetched: 3
Exiting loop, fetched less than 100 records.
Fetched 3 records for Attendance.
Fetching Attendance-Summary data from https://edoofa-ums-90164.bubbleapps.io/version-test/api/1.1/obj... Cursor: 0
Fetched 12 new records, Total fetched: 12
Exiting loop, fetched less than 100 records.
Fetched 12 records for Attendance-Summary.
Fetching Student data from https://edoofa-portal.bubbleapps.io/api/1.1/obj... Cursor: 0
Fetched 1 new records, Total fetched: 1
Exiting loop, fetched less than 100 records.
Fetched 1 records for Student.
Fetching Engagement data from https://edoofa-portal.bubbleapps.io/api/1.1/obj... Cursor: 0
Fetched 22 new records, Total fetched: 22
Exiting loop, fetched less than 100 records.
Fetched 22 records for Engagement.
adding all records


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  engagement_df['ewyl'] = engagement_df['student'].map(student_id_to_EWYL)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  engagement_df['ewyl'] = engagement_df['student'].map(student_id_to_EWYL)


Successfully posted data for row 2
Successfully posted data for row 3
Successfully posted data for row 4
Successfully posted data for row 5
Successfully posted data for row 6
Successfully posted data for row 7
Successfully posted data for row 8
Successfully posted data for row 9
Successfully posted data for row 10
Successfully posted data for row 11
Successfully posted data for row 15
Successfully posted data for row 16
Successfully posted data for row 21
[               Modified Date              Created Date  \
2   2023-12-09T17:34:08.367Z  2023-11-23T06:32:00.883Z   
3   2023-11-24T13:42:28.548Z  2023-11-24T13:22:13.560Z   
4   2023-11-24T14:00:24.415Z  2023-11-24T13:42:27.815Z   
5   2023-11-24T17:07:05.930Z  2023-11-24T14:00:21.878Z   
6   2023-11-24T17:07:03.062Z  2023-11-24T17:07:03.062Z   
7   2023-11-24T17:20:03.691Z  2023-11-24T17:20:03.691Z   
8   2023-11-25T06:36:01.455Z  2023-11-25T06:36:01.455Z   
9   2023-11-25T07:17:29.691Z  2023-11-25T07:17:29.691Z   
10  2023-11-25T08

Successfully created a new row in Attendance-Summary.
posted
Successfully created a new row in Attendance-Summary.
posted


In [None]:
processed_df = pd.concat([ums_att_df, new_dataframe], ignore_index=True)


In [None]:
print(new_dataframe)

In [None]:
print(ums_att_df)

In [None]:
print(engagement_this_month_df)

In [None]:
ums_avg_att_df.head(1)