In [9]:
import requests
import pandas as pd
import time
import os
from datetime import datetime, timedelta

GRAPHQL_URL = "https://ows.goszakup.gov.kz/v3/graphql"

HEADERS = {
    "Content-Type": "application/json",
    "Authorization": "Bearer d5c3d78fc111d88a0a37b4ab8f83cbd5"
}

query_template = """
query GetPlans($filter: PlansFiltersInput, $after: Int) {
    Plans(filter: $filter, limit: 200, after: $after) {
        id
        dateCreate
        PlansKato {
            id
            plnPointsId
            refKatoCode
            refCountriesCode
            fullDeliveryPlaceNameRu
            fullDeliveryPlaceNameKz
            count
            systemId
        }
    }
}
"""

output_dir = "plans_data"
os.makedirs(output_dir, exist_ok=True)
combined_file = os.path.join(output_dir, "plans_combined.parquet")
temp_dir = os.path.join(output_dir, "temp")
os.makedirs(temp_dir, exist_ok=True)

# –û–ø—Ä–µ–¥–µ–ª—è–µ–º –Ω–∞—á–∞–ª—å–Ω—É—é –¥–∞—Ç—É –∏ after
if os.path.exists(combined_file):
    existing_df = pd.read_parquet(combined_file)
    last_date_create = pd.to_datetime(existing_df["dateCreate"].max())
    next_date_create = (last_date_create + timedelta(seconds=1)).strftime("%Y-%m-%d %H:%M:%S")
    variables = {"filter": {"dateCreate": next_date_create}, "after": None}
    print(f"üìÇ –ù–∞–π–¥–µ–Ω —Ñ–∞–π–ª —Å –ø–æ—Å–ª–µ–¥–Ω–µ–π –¥–∞—Ç–æ–π: {last_date_create}")
else:
    variables = {"filter": {"dateCreate": "2025-03-20 00:00:00"}, "after": None}
    print("üìÇ –ù–∞—á–∏–Ω–∞–µ–º —Å–±–æ—Ä —Å –Ω–∞—á–∞–ª—å–Ω–æ–π –¥–∞—Ç—ã: 2025-03-20 00:00:00")

batch_size_limit = 100000  # –õ–∏–º–∏—Ç —Ä–∞–∑–º–µ—Ä–∞ –±–∞—Ç—á–∞
request_count = 0
batch_count = 0
plans_batch = []

while True:
    request_count += 1
    print(f"üîÑ –ó–∞–ø—Ä–æ—Å #{request_count} (after={variables['after']})")

    try:
        response = requests.post(GRAPHQL_URL, json={"query": query_template, "variables": variables}, headers=HEADERS, timeout=10)
        response.raise_for_status()
        data = response.json()

        if "errors" in data:
            print(f"‚ùå –û—à–∏–±–∫–∞ API: {data['errors']}")
            break

        plans = data.get("data", {}).get("Plans", [])
        if not plans:
            print("‚úÖ –í—Å–µ –¥–∞–Ω–Ω—ã–µ –∑–∞–≥—Ä—É–∂–µ–Ω—ã.")
            break

        for plan in plans:
            for kato in plan.get("PlansKato", []):
                plans_batch.append({
                    "plan_id": plan["id"],
                    "dateCreate": plan["dateCreate"],
                    "kato_id": kato["id"],
                    "plnPointsId": kato["plnPointsId"],
                    "refKatoCode": kato["refKatoCode"],
                    "refCountriesCode": kato["refCountriesCode"],
                    "fullDeliveryPlaceNameRu": kato["fullDeliveryPlaceNameRu"],
                    "fullDeliveryPlaceNameKz": kato["fullDeliveryPlaceNameKz"],
                    "count": kato["count"],
                    "systemId": kato["systemId"]
                })
        
        if len(plans_batch) >= batch_size_limit:
            batch_count += 1
            temp_file = os.path.join(temp_dir, f"plans_batch_{batch_count}.parquet")
            pd.DataFrame(plans_batch).to_parquet(temp_file, index=False)
            print(f"üíæ –°–æ—Ö—Ä–∞–Ω—ë–Ω –≤—Ä–µ–º–µ–Ω–Ω—ã–π —Ñ–∞–π–ª: {temp_file} ({len(plans_batch)} –∑–∞–ø–∏—Å–µ–π)")
            plans_batch = []

        variables["after"] = plans[-1]["id"]
        time.sleep(1)
    except Exception as e:
        print(f"‚ö†Ô∏è –û—à–∏–±–∫–∞: {e}")
        break

# –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –æ—Å—Ç–∞–≤—à–∏—Ö—Å—è –¥–∞–Ω–Ω—ã—Ö
if plans_batch:
    batch_count += 1
    temp_file = os.path.join(temp_dir, f"plans_batch_{batch_count}.parquet")
    pd.DataFrame(plans_batch).to_parquet(temp_file, index=False)
    print(f"üíæ –°–æ—Ö—Ä–∞–Ω—ë–Ω –≤—Ä–µ–º–µ–Ω–Ω—ã–π —Ñ–∞–π–ª: {temp_file} ({len(plans_batch)} –∑–∞–ø–∏—Å–µ–π)")

# –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö —Ñ–∞–π–ª–æ–≤
temp_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith(".parquet")]
if temp_files:
    print(f"üîÑ –û–±—ä–µ–¥–∏–Ω—è–µ–º {len(temp_files)} –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö —Ñ–∞–π–ª–æ–≤...")
    df_list = [pd.read_parquet(f) for f in temp_files]
    all_data = pd.concat(df_list, ignore_index=True)
    if os.path.exists(combined_file):
        existing_df = pd.read_parquet(combined_file)
        all_data = pd.concat([existing_df, all_data]).drop_duplicates(subset=["plan_id", "kato_id"], keep="last")
    all_data.to_parquet(combined_file, index=False)
    print(f"‚úÖ –î–∞–Ω–Ω—ã–µ –æ–±–Ω–æ–≤–ª–µ–Ω—ã –≤ {combined_file} ({len(all_data)} –∑–∞–ø–∏—Å–µ–π)")
    
    # –£–¥–∞–ª–µ–Ω–∏–µ –≤—Ä–µ–º–µ–Ω–Ω—ã—Ö —Ñ–∞–π–ª–æ–≤
    for temp_file in temp_files:
        os.remove(temp_file)
    print("üóëÔ∏è –í—Ä–µ–º–µ–Ω–Ω—ã–µ —Ñ–∞–π–ª—ã —É–¥–∞–ª–µ–Ω—ã.")

print(f"‚úÖ –ó–∞–≤–µ—Ä—à–µ–Ω–æ. –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π –≤ –∏—Ç–æ–≥–æ–≤–æ–º —Ñ–∞–π–ª–µ: {len(pd.read_parquet(combined_file))}")

# –ü—Ä–æ–≤–µ—Ä–∫–∞ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
df = pd.read_parquet(combined_file)
print("\n–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫:")
display(df.head(5))
print("\n–ü–æ—Å–ª–µ–¥–Ω–∏–µ 5 —Å—Ç—Ä–æ–∫:")
display(df.tail(5))

üìÇ –ù–∞–π–¥–µ–Ω —Ñ–∞–π–ª —Å –ø–æ—Å–ª–µ–¥–Ω–µ–π –¥–∞—Ç–æ–π: 2025-03-25 14:56:19
üîÑ –ó–∞–ø—Ä–æ—Å #1 (after=None)
‚úÖ –í—Å–µ –¥–∞–Ω–Ω—ã–µ –∑–∞–≥—Ä—É–∂–µ–Ω—ã.
‚úÖ –ó–∞–≤–µ—Ä—à–µ–Ω–æ. –í—Å–µ–≥–æ –∑–∞–ø–∏—Å–µ–π –≤ –∏—Ç–æ–≥–æ–≤–æ–º —Ñ–∞–π–ª–µ: 1490


In [7]:
# –ü—Ä–æ–≤–µ—Ä–∫–∞ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–∞
df = pd.read_parquet(combined_file)
print("\n–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫:")
display(df.head(5))
print("\n–ü–æ—Å–ª–µ–¥–Ω–∏–µ 5 —Å—Ç—Ä–æ–∫:")
display(df.tail(5))


–ü–µ—Ä–≤—ã–µ 5 —Å—Ç—Ä–æ–∫:


Unnamed: 0,plan_id,dateCreate,kato_id,plnPointsId,refKatoCode,refCountriesCode,fullDeliveryPlaceNameRu,fullDeliveryPlaceNameKz,count,systemId
0,78319870,2025-03-25 14:56:19,97625939,78319870,614430100,398,"–¢—É—Ä–∫–µ—Å—Ç–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å, –ú–∞–∫—Ç–∞–∞—Ä–∞–ª—å—Å–∫–∏–π —Ä–∞–π–æ–Ω, –ø...","–¢“Ø—Ä–∫—ñ—Å—Ç–∞–Ω –æ–±–ª—ã—Å—ã, –ú–∞“õ—Ç–∞–∞—Ä–∞–ª –∞—É–¥–∞–Ω—ã, –ú—ã—Ä–∑–∞–∫–µ–Ω—Ç ...",4.0,3
1,78319854,2025-03-25 14:53:13,97625917,78319854,614430100,398,"–¢—É—Ä–∫–µ—Å—Ç–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å, –ú–∞–∫—Ç–∞–∞—Ä–∞–ª—å—Å–∫–∏–π —Ä–∞–π–æ–Ω, –ø...","–¢“Ø—Ä–∫—ñ—Å—Ç–∞–Ω –æ–±–ª—ã—Å—ã, –ú–∞“õ—Ç–∞–∞—Ä–∞–ª –∞—É–¥–∞–Ω—ã, –ú—ã—Ä–∑–∞–∫–µ–Ω—Ç ...",10.0,3
2,78319835,2025-03-25 14:49:51,97625892,78319835,614430100,398,"–¢—É—Ä–∫–µ—Å—Ç–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å, –ú–∞–∫—Ç–∞–∞—Ä–∞–ª—å—Å–∫–∏–π —Ä–∞–π–æ–Ω, –ø...","–¢“Ø—Ä–∫—ñ—Å—Ç–∞–Ω –æ–±–ª—ã—Å—ã, –ú–∞“õ—Ç–∞–∞—Ä–∞–ª –∞—É–¥–∞–Ω—ã, –ú—ã—Ä–∑–∞–∫–µ–Ω—Ç ...",20.0,3
3,78319795,2025-03-25 14:43:00,97625838,78319795,614430100,398,"–¢—É—Ä–∫–µ—Å—Ç–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å, –ú–∞–∫—Ç–∞–∞—Ä–∞–ª—å—Å–∫–∏–π —Ä–∞–π–æ–Ω, –ø...","–¢“Ø—Ä–∫—ñ—Å—Ç–∞–Ω –æ–±–ª—ã—Å—ã, –ú–∞“õ—Ç–∞–∞—Ä–∞–ª –∞—É–¥–∞–Ω—ã, –ú—ã—Ä–∑–∞–∫–µ–Ω—Ç ...",10.0,3
4,78319768,2025-03-25 14:38:02,97625810,78319768,614430100,398,"–¢—É—Ä–∫–µ—Å—Ç–∞–Ω—Å–∫–∞—è –æ–±–ª–∞—Å—Ç—å, –ú–∞–∫—Ç–∞–∞—Ä–∞–ª—å—Å–∫–∏–π —Ä–∞–π–æ–Ω, –ø...","–¢“Ø—Ä–∫—ñ—Å—Ç–∞–Ω –æ–±–ª—ã—Å—ã, –ú–∞“õ—Ç–∞–∞—Ä–∞–ª –∞—É–¥–∞–Ω—ã, –ú—ã—Ä–∑–∞–∫–µ–Ω—Ç ...",8.0,3



–ü–æ—Å–ª–µ–¥–Ω–∏–µ 5 —Å—Ç—Ä–æ–∫:


Unnamed: 0,plan_id,dateCreate,kato_id,plnPointsId,refKatoCode,refCountriesCode,fullDeliveryPlaceNameRu,fullDeliveryPlaceNameKz,count,systemId
1485,78296914,2025-03-20 00:03:17,97595602,78296914,751210000,398,"–≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏–π —Ä–∞–π–æ–Ω –≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏...","–ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥–∞–Ω—ã –ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥...",1.0,3
1486,78296907,2025-03-20 00:02:13,97595594,78296907,751210000,398,"–≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏–π —Ä–∞–π–æ–Ω –≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏...","–ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥–∞–Ω—ã –ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥...",1.0,3
1487,78296904,2025-03-20 00:01:27,97595591,78296904,751210000,398,"–≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏–π —Ä–∞–π–æ–Ω –≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏...","–ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥–∞–Ω—ã –ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥...",1.0,3
1488,78296898,2025-03-20 00:00:39,97595585,78296898,751210000,398,"–≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏–π —Ä–∞–π–æ–Ω –≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏...","–ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥–∞–Ω—ã –ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥...",1.0,3
1489,78296894,2025-03-20 00:00:03,97595580,78296894,751210000,398,"–≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏–π —Ä–∞–π–æ–Ω –≥.–ê–ª–º–∞—Ç—ã, –ê–ª–∞—Ç–∞—É—Å–∫–∏...","–ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥–∞–Ω—ã –ê–ª–º–∞—Ç—ã “õ., –ê–ª–∞—Ç–∞—É –∞—É–¥...",1.0,3
