In [None]:
import requests
import pandas as pd
from datetime import date, timedelta
import boto3
from io import StringIO

def fetch_data_for_date(url, headers, target_date):
    all_data = []
    next_cursor = None

    while True:
        paginated_url = url + target_date
        if next_cursor:
            paginated_url += f"&cursor={next_cursor}"

        response = requests.get(paginated_url, headers=headers)
        if response.status_code == 200:
            data = response.json()
            if not data['data']:
                break
            all_data.extend(data['data'])
            if 'meta' in data and 'next_cursor' in data['meta']:
                next_cursor = data['meta']['next_cursor']
            else:
                break
        else:
            print("Failed to fetch data:", response.status_code)
            break

    return all_data

def save_to_s3(data, bucket_name, file_name):
    s3 = boto3.client('s3')
    csv_buffer = StringIO()
    pd.DataFrame(data).to_csv(csv_buffer, index=False)
    csv_buffer.seek(0)
    s3.put_object(Bucket=bucket_name, Key=file_name, Body=csv_buffer.getvalue())

def read_from_s3(bucket_name, file_name):
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket_name, Key=file_name)
    df = pd.read_csv(obj['Body'])
    return df

base_url = "https://api.balldontlie.io/v1/stats?dates[]="
headers = {
    "Authorization": "b550c0f1-670c-48dd-aba9-81e75cdd43b7"
}

yesterday = date.today() - timedelta(days=1)
yesterday_date = yesterday.strftime("%Y-%m-%d")

data = fetch_data_for_date(base_url, headers, yesterday_date)

if data:
    df_new = pd.json_normalize(data)
    bucket_name = "rmb-glue"
    current_year_file_name = "Current_Year.csv"
    
    try:
        # Read existing data from S3
        existing_df = read_from_s3(bucket_name, current_year_file_name)
        # Concatenate new data with existing data
        updated_df = pd.concat([existing_df, df_new], ignore_index=True)
        # Remove duplicate rows
        updated_df = updated_df.drop_duplicates()
        # Save updated data back to S3
        save_to_s3(updated_df.to_dict(orient='records'), bucket_name, current_year_file_name)
        print(f"Fetched data for yesterday: {yesterday_date}. Data appended to 'Current_Year.csv' in S3.")
    except Exception as e:
        print("Error:", e)
else:
    print("No data available for yesterday:", yesterday_date)
