In [None]:
import requests
import json
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO)

def getData(page):
    base_url = "https://api.stackexchange.com/2.3/questions"
    
    # Use semicolon-separated tags
    tags = 'nlp'
    
    params = {
        "order": "desc",
        "sort": "creation",
        "tagged": tags,
        "site": "stackoverflow",
        "pagesize": 100,
        "page": page,
        "filter": "withbody",
        # Ensure questions have at least one answer
        "answers": "1"
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        if "items" not in data or not data["items"]:
            logging.warning("No questions found.")
            return None
        
        data_list = []
        for question in data["items"]:
            answer_id = question["accepted_answer_id"]
            
            # Fetch answers for this question
            answers_url = f"https://api.stackexchange.com/2.3/answers/{answer_id}"
            answers_params = {
                "site": "stackoverflow",
                "filter": "withbody",
            }
            
            try:
                answers_response = requests.get(answers_url, params=answers_params)
                answers_response.raise_for_status()
                answers_data = answers_response.json()
                
                # Find first answer (top-voted or accepted)
                answers = answers_data.get('items', [])
                if answers:
                    # Prioritize accepted answer, then top-voted
                    first_answer = next((a for a in answers if a.get('is_accepted', False)), answers[0])
                    
                    data_list.append([
                        question.get('title', ''),
                        question.get('body', ''),
                        ", ".join(question.get('tags', [])),
                        first_answer.get('body', ''),
                        question.get('score', 0),
                        question.get('creation_date','')
                    ])
            
            except Exception as e:
                logging.error(f"Error fetching answers for question {answer_id}: {e}")
        
        if data_list:
            df = pd.DataFrame(data_list, columns=[
                "Title", "Description", "Tags", "Accepted Answer", "Question Score", "Question Time"
            ])
            
            file_name = "questions_data.csv"
            
            with open(file_name, 'a', encoding='utf-8') as f:
                    df.to_csv(f, header=f.tell() == 0, index=False)
            logging.info(f"Data appended to {file_name}")
        
        else:
            logging.warning("No data to write to CSV.")
        
    except requests.exceptions.RequestException as e:
        logging.error(f"Request error: {e}")
    except json.JSONDecodeError as e:
        logging.error(f"JSON decode error: {e}")
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
    
    return None


for page in range(1, 3):
    result = getData(page)
    if result is not None and not result.empty:
        print(f"Page {page}: {len(result)} questions retrieved")

ERROR:root:Request error: 400 Client Error: Bad Request for url: https://api.stackexchange.com/2.3/questions?order=desc&sort=creation&tagged=nlp&site=stackoverflow&pagesize=100&page=1&filter=withbody&answers=1
ERROR:root:Request error: 400 Client Error: Bad Request for url: https://api.stackexchange.com/2.3/questions?order=desc&sort=creation&tagged=nlp&site=stackoverflow&pagesize=100&page=2&filter=withbody&answers=1
ERROR:root:Request error: 400 Client Error: Bad Request for url: https://api.stackexchange.com/2.3/questions?order=desc&sort=creation&tagged=nlp&site=stackoverflow&pagesize=100&page=3&filter=withbody&answers=1
ERROR:root:Request error: 400 Client Error: Bad Request for url: https://api.stackexchange.com/2.3/questions?order=desc&sort=creation&tagged=nlp&site=stackoverflow&pagesize=100&page=4&filter=withbody&answers=1
ERROR:root:Request error: 400 Client Error: Bad Request for url: https://api.stackexchange.com/2.3/questions?order=desc&sort=creation&tagged=nlp&site=stackoverf