In [42]:
import requests
import json
import pandas as pd
import logging

logging.basicConfig(level=logging.INFO)

def getData(page, tag):
    
    base_url = "https://api.stackexchange.com/2.3/questions"
    params = {
        "order": "asc",
        "sort": "creation",
        "tagged": tag,
        "site": "stackoverflow",
        "pagesize": 100,
        "page": page,
        "filter": "withbody",
        "answers": "1"
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        if "items" not in data or not data["items"]:
            logging.warning("No questions found.")
            return None
        
        data_list = []
        for question in data["items"]:
            answer_id = question.get("accepted_answer_id",'')
                                     
            if answer_id:
                answers_url = f"https://api.stackexchange.com/2.3/answers/{answer_id}"
                answers_params = {
                    "site": "stackoverflow",
                    "filter": "withbody",
                }

                try:
                    answers_response = requests.get(answers_url, params=answers_params)
                    answers_response.raise_for_status()
                    answers_data = answers_response.json()
                    answers = answers_data.get('items', False)
                    if answers[0].get('is_accepted', False):
                        accepted_answer=answers[0]
                        data_list.append([
                            question.get('title', ''),
                            question.get('body', ''),
                            ", ".join(question.get('tags', [])),
                            accepted_answer.get('body', ''),
                            question.get('score', 0),
                            question.get('creation_date','')
                        ])

                except Exception as e:
                    logging.error(f"Error fetching answers for question {answer_id}: {e}")
        
            if data_list:
                df = pd.DataFrame(data_list, columns=[
                    "Title", "Description", "Tags", "Accepted Answer", "Question Score", "Question Time"
                ])

                file_name = "questions_data.csv"

                with open(file_name, 'a', encoding='utf-8') as f:
                        df.to_csv(f, header=f.tell() == 0, index=False,lineterminator='\n')
                logging.info(f"Data appended to {file_name}")
        
    except requests.exceptions.RequestException as e:
        logging.error(f"Request error: {e}")
    except json.JSONDecodeError as e:
        logging.error(f"JSON decode error: {e}")
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
    
    return None


In [None]:
for page in range(1,5):
    result = getData(page,'nlp')


{'tags': ['.net', 'datetime', 'nlp'], 'owner': {'account_id': 411, 'reputation': 5842, 'user_id': 521, 'user_type': 'registered', 'accept_rate': 100, 'profile_image': 'https://www.gravatar.com/avatar/3893360ae5ac6c06ade059fce126bc51?s=256&d=identicon&r=PG', 'display_name': 'palmsey', 'link': 'https://stackoverflow.com/users/521/palmsey'}, 'is_answered': True, 'view_count': 6470, 'closed_date': 1492171912, 'accepted_answer_id': 631134, 'answer_count': 9, 'score': 27, 'last_activity_date': 1438422109, 'creation_date': 1219445110, 'question_id': 23689, 'link': 'https://stackoverflow.com/questions/23689/natural-language-date-time-parser-for-net', 'closed_reason': 'Not suitable for this site', 'title': 'Natural language date/time parser for .NET?', 'body': '<p>Does anyone know of a .NET date/time parser similar to <a href="http://chronic.rubyforge.org/" rel="noreferrer">Chronic for Ruby</a> (handles stuff like "tomorrow" or "3pm next thursday")?</p>\n\n<p>Note: I do write Ruby (which is how

INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv


{'tags': ['language-agnostic', 'nlp'], 'owner': {'account_id': 1196, 'reputation': 2297, 'user_id': 1592, 'user_type': 'registered', 'accept_rate': 60, 'profile_image': 'https://www.gravatar.com/avatar/2886583fab3929c95a68e339e73d61d8?s=256&d=identicon&r=PG', 'display_name': 'jeffreypriebe', 'link': 'https://stackoverflow.com/users/1592/jeffreypriebe'}, 'is_answered': True, 'view_count': 6486, 'closed_date': 1451720842, 'answer_count': 4, 'score': 14, 'last_activity_date': 1396481617, 'creation_date': 1219611453, 'last_edit_date': 1233102270, 'question_id': 25332, 'link': 'https://stackoverflow.com/questions/25332/whats-a-good-natural-language-library-to-use-for-paraphrasing', 'closed_reason': 'Not suitable for this site', 'title': 'What&#39;s a good natural language library to use for paraphrasing?', 'body': "<p>I'm looking for an existing library to summarize or paraphrase content (I'm aiming at blog posts) - any experience with existing natural language processing libraries?</p>\n\n

INFO:root:Data appended to questions_data.csv


{'tags': ['nlp'], 'owner': {'account_id': 1917, 'reputation': 18035, 'user_id': 2644, 'user_type': 'registered', 'accept_rate': 82, 'profile_image': 'https://www.gravatar.com/avatar/a2d07b1a0a6ddcfed5c18ddfafd5a887?s=256&d=identicon&r=PG', 'display_name': 'pek', 'link': 'https://stackoverflow.com/users/2644/pek'}, 'is_answered': True, 'view_count': 33164, 'closed_date': 1357631818, 'accepted_answer_id': 41448, 'answer_count': 17, 'score': 118, 'last_activity_date': 1353304626, 'creation_date': 1220438173, 'last_edit_date': 1495541392, 'question_id': 41424, 'link': 'https://stackoverflow.com/questions/41424/how-do-you-implement-a-did-you-mean', 'closed_reason': 'exact duplicate', 'title': 'How do you implement a &quot;Did you mean&quot;?', 'body': '<blockquote>\n  <p><strong>Possible Duplicate:</strong><br>\n  <a href="https://stackoverflow.com/questions/307291/how-does-the-google-did-you-mean-algorithm-work">How does the Google “Did you mean?” Algorithm work?</a>  </p>\n</blockquote>\n

INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'nlp', 'semantics'], 'owner': {'account_id': 2809, 'reputation': 3636, 'user_id': 1925263, 'user_type': 'registered', 'accept_rate': 89, 'profile_image': 'https://www.gravatar.com/avatar/38fa6a316b34296bb2532f09e13e8e2e?s=256&d=identicon&r=PG', 'display_name': 'btw0', 'link': 'https://stackoverflow.com/users/1925263/btw0'}, 'is_answered': True, 'view_count': 49878, 'accepted_answer_id': 63076, 'answer_count': 11, 'score': 65, 'last_activity_date': 1593283252, 'creation_date': 1221481602, 'last_edit_date': 1221483755, 'question_id': 62328, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/62328/is-there-an-algorithm-that-tells-the-semantic-similarity-of-two-phrases', 'title': 'Is there an algorithm that tells the semantic similarity of two phrases', 'body': '<p>input: phrase 1, phrase 2</p>\n\n<p>output: semantic similarity value (between 0 and 1), or the probability these two phrases are talking about the same thing</p>\n'}


INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'cluster-analysis', 'machine-learning', 'nlp'], 'owner': {'user_type': 'does_not_exist', 'display_name': 'adityaw'}, 'is_answered': True, 'view_count': 5257, 'answer_count': 5, 'community_owned_date': 1221503797, 'score': 3, 'last_activity_date': 1398538698, 'creation_date': 1221503797, 'last_edit_date': 1233102218, 'question_id': 65487, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/65487/how-do-you-categorize-based-on-text-content', 'title': 'How Do You Categorize Based On Text Content?', 'body': '<p>How does one automatically find categories for text based on content?</p>\n'}
{'tags': ['algorithm', 'language-agnostic', 'parsing', 'numbers', 'nlp'], 'owner': {'account_id': 6714, 'reputation': 6967, 'user_id': 11414, 'user_type': 'registered', 'accept_rate': 73, 'profile_image': 'https://i.sstatic.net/s8vyO.png?s=256', 'display_name': 'Evgeny Zislis', 'link': 'https://stackoverflow.com/users/11414/evgeny-zislis'}, 'is_answered': 

INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'nlp', 'word-frequency'], 'owner': {'account_id': 9391, 'reputation': 8250, 'user_id': 17328, 'user_type': 'registered', 'accept_rate': 80, 'profile_image': 'https://www.gravatar.com/avatar/0b8f5cf3259414d6b33897ffc6430df2?s=256&d=identicon&r=PG', 'display_name': 'Mark McDonald', 'link': 'https://stackoverflow.com/users/17328/mark-mcdonald'}, 'is_answered': True, 'view_count': 22642, 'accepted_answer_id': 90846, 'answer_count': 8, 'score': 33, 'last_activity_date': 1741687724, 'creation_date': 1221720566, 'last_edit_date': 1434981418, 'question_id': 90580, 'content_license': 'CC BY-SA 3.0', 'link': 'https://stackoverflow.com/questions/90580/word-frequency-algorithm-for-natural-language-processing', 'title': 'Word frequency algorithm for natural language processing', 'body': '<p>Without getting a degree in information retrieval, I\'d like to know if there exists any algorithms for counting the frequency that words occur in a given body of text.  The goal is to get

INFO:root:Data appended to questions_data.csv


{'tags': ['string', 'linguistics', 'nlp'], 'owner': {'account_id': 747, 'reputation': 10629, 'user_id': 976, 'user_type': 'registered', 'accept_rate': 100, 'profile_image': 'https://www.gravatar.com/avatar/ee07c455591be3321bdd8305aab15880?s=256&d=identicon&r=PG', 'display_name': 'Ozgur Ozcitak', 'link': 'https://stackoverflow.com/users/976/ozgur-ozcitak'}, 'is_answered': True, 'view_count': 5849, 'accepted_answer_id': 92033, 'answer_count': 13, 'score': 24, 'last_activity_date': 1320568799, 'creation_date': 1221740420, 'last_edit_date': 1320568799, 'question_id': 92006, 'content_license': 'CC BY-SA 3.0', 'link': 'https://stackoverflow.com/questions/92006/how-do-i-determine-if-a-random-string-sounds-like-english', 'title': 'How do I determine if a random string sounds like English?', 'body': '<p>I have an algorithm that generates strings based on a list of input words. How do I separate only the strings that sounds like English words? ie. discard <strong>RDLO</strong> while keeping <str

INFO:root:Data appended to questions_data.csv


{'tags': ['nlp', 'text-analysis'], 'owner': {'account_id': 9101, 'reputation': 9917, 'user_id': 16668, 'user_type': 'registered', 'accept_rate': 64, 'profile_image': 'https://www.gravatar.com/avatar/662b99660e8446a5f56aa754dd7b11a3?s=256&d=identicon&r=PG', 'display_name': 'Alex Weinstein', 'link': 'https://stackoverflow.com/users/16668/alex-weinstein'}, 'is_answered': True, 'view_count': 13430, 'protected_date': 1496512315, 'accepted_answer_id': 126378, 'answer_count': 2, 'score': 13, 'last_activity_date': 1496422293, 'creation_date': 1222192307, 'question_id': 122595, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/122595/nlp-qualitatively-positive-vs-negative-sentence', 'title': 'NLP: Qualitatively &quot;positive&quot; vs &quot;negative&quot; sentence', 'body': '<p>I need your help in determining the best approach for analyzing industry-specific sentences (i.e. movie reviews) for "positive" vs "negative". I\'ve seen libraries such as OpenNLP before, bu

INFO:root:Data appended to questions_data.csv


{'tags': ['.net', 'nlp'], 'owner': {'account_id': 9959, 'reputation': 6753, 'user_id': 18619, 'user_type': 'registered', 'accept_rate': 87, 'profile_image': 'https://www.gravatar.com/avatar/0ad58350b46012b50b00c5e61d0dc7d3?s=256&d=identicon&r=PG', 'display_name': 'Alexandre Brisebois', 'link': 'https://stackoverflow.com/users/18619/alexandre-brisebois'}, 'is_answered': True, 'view_count': 323, 'accepted_answer_id': 127503, 'answer_count': 4, 'score': 2, 'last_activity_date': 1233101781, 'creation_date': 1222264104, 'last_edit_date': 1233101781, 'question_id': 127238, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/127238/contextual-natural-language-resources-where-do-i-start', 'title': 'Contextual Natural Language Resources, Where Do I Start?', 'body': '<p>Where can i find some .Net or conceptual resources to start working with Natural Language where I can pull context and subjects from text. I wish not to work with word frequency algorithms.</p>\n'}


INFO:root:Data appended to questions_data.csv


{'tags': ['language-agnostic', 'parsing', 'nlp'], 'owner': {'account_id': 3370, 'reputation': 7212, 'user_id': 4857, 'user_type': 'registered', 'accept_rate': 96, 'profile_image': 'https://i.sstatic.net/cxMVR.jpg?s=256', 'display_name': 'Ande Turner', 'link': 'https://stackoverflow.com/users/4857/ande-turner'}, 'is_answered': True, 'view_count': 1904, 'accepted_answer_id': 707502, 'answer_count': 8, 'score': 5, 'last_activity_date': 1274560532, 'creation_date': 1222373983, 'last_edit_date': 1274560532, 'question_id': 135777, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/135777/a-stringtoken-parser-which-gives-google-search-style-did-you-mean-suggestions', 'title': 'A StringToken Parser which gives Google Search style &quot;Did you mean:&quot; Suggestions', 'body': '<h2>Seeking a method to:</h2>\n\n<h2>Take whitespace separated tokens in a String; return a suggested Word</h2>\n\n<p><br>\n<strong>ie:</strong><br>\nGoogle Search can take <em>"fonetic wrd 

INFO:root:Data appended to questions_data.csv


{'tags': ['nlp', 'linguistics', 'corpus'], 'owner': {'account_id': 6824, 'reputation': 34478, 'user_id': 11596, 'user_type': 'registered', 'accept_rate': 94, 'profile_image': 'https://www.gravatar.com/avatar/e78cebfe065dac6f667b1b8259d9299c?s=256&d=identicon&r=PG', 'display_name': 'unmounted', 'link': 'https://stackoverflow.com/users/11596/unmounted'}, 'is_answered': True, 'view_count': 1658, 'accepted_answer_id': 138310, 'answer_count': 7, 'score': 5, 'last_activity_date': 1391084995, 'creation_date': 1222395349, 'last_edit_date': 1495540883, 'question_id': 137380, 'content_license': 'CC BY-SA 3.0', 'link': 'https://stackoverflow.com/questions/137380/nlp-building-small-corpora-or-where-to-get-lots-of-not-too-specialized-engl', 'title': 'NLP: Building (small) corpora, or &quot;Where to get lots of not-too-specialized English-language text files?&quot;', 'body': '<p>Does anyone have a suggestion for where to find archives or collections of everyday English text for use in a small corpus

INFO:root:Data appended to questions_data.csv


{'tags': ['java', 'dsl', 'nlp', 'parsing'], 'owner': {'account_id': 8161, 'reputation': 12742, 'user_id': 14540, 'user_type': 'registered', 'accept_rate': 91, 'profile_image': 'https://www.gravatar.com/avatar/c4ceadae4f61dac8ff3ababc3faa6c42?s=256&d=identicon&r=PG', 'display_name': 'kolrie', 'link': 'https://stackoverflow.com/users/14540/kolrie'}, 'is_answered': True, 'view_count': 14601, 'closed_date': 1580482119, 'accepted_answer_id': 144374, 'answer_count': 6, 'score': 18, 'last_activity_date': 1580481644, 'creation_date': 1222545372, 'last_edit_date': 1233101128, 'question_id': 144339, 'link': 'https://stackoverflow.com/questions/144339/what-would-the-best-tool-to-create-a-natural-dsl-in-java', 'closed_reason': 'Not suitable for this site', 'title': 'What would the best tool to create a natural DSL in Java?', 'body': '<p>A couple of days ago, I read a blog entry (<a href="http://ayende.com/Blog/archive/2008/09/08/Implementing-generic-natural-language-DSL.aspx" rel="noreferrer">http

INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'statistics', 'nlp', 'named-entity-recognition'], 'owner': {'account_id': 8717, 'reputation': 21332, 'user_id': 15842, 'user_type': 'registered', 'accept_rate': 59, 'profile_image': 'https://www.gravatar.com/avatar/58d77327a8f46041462b31c117e5c51a?s=256&d=identicon&r=PG', 'display_name': 'Gregg Lind', 'link': 'https://stackoverflow.com/users/15842/gregg-lind'}, 'is_answered': True, 'view_count': 6591, 'accepted_answer_id': 164722, 'answer_count': 2, 'community_owned_date': 1222973072, 'score': 9, 'last_activity_date': 1692196803, 'creation_date': 1222973072, 'last_edit_date': 1257179414, 'question_id': 163923, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/163923/methods-for-geotagging-or-geolabelling-text-content', 'title': 'Methods for Geotagging or Geolabelling Text Content', 'body': "<p>What are some good algorithms for automatically labeling text with the city / region  or origin?  That is, if a blog is about New York, how ca

INFO:root:Data appended to questions_data.csv


{'tags': ['encoding', 'theory', 'nlp', 'linguistics'], 'owner': {'account_id': 3370, 'reputation': 7212, 'user_id': 4857, 'user_type': 'registered', 'accept_rate': 96, 'profile_image': 'https://i.sstatic.net/cxMVR.jpg?s=256', 'display_name': 'Ande Turner', 'link': 'https://stackoverflow.com/users/4857/ande-turner'}, 'is_answered': True, 'view_count': 2497, 'accepted_answer_id': 173946, 'answer_count': 8, 'community_owned_date': 1223295041, 'score': 4, 'last_activity_date': 1274560520, 'creation_date': 1223131686, 'last_edit_date': 1495535271, 'question_id': 170452, 'content_license': 'CC BY-SA 3.0', 'link': 'https://stackoverflow.com/questions/170452/theory-lexical-encoding', 'title': 'Theory: &quot;Lexical Encoding&quot;', 'body': '<p><strong>I am using the term "Lexical Encoding" for my lack of a better one.</strong></p>\n\n<p>A Word is arguably the fundamental unit of communication as opposed to a Letter.  Unicode tries to assign a numeric value to each Letter of all known Alphabets

INFO:root:Data appended to questions_data.csv


{'tags': ['java', 'nlp', 'named-entity-recognition'], 'owner': {'account_id': 11931, 'reputation': 2640, 'user_id': 23238, 'user_type': 'registered', 'accept_rate': 67, 'profile_image': 'https://i.sstatic.net/5uVzs.jpg?s=256', 'display_name': 'webclimber', 'link': 'https://stackoverflow.com/users/23238/webclimber'}, 'is_answered': True, 'view_count': 23773, 'closed_date': 1398109959, 'accepted_answer_id': 390090, 'answer_count': 4, 'score': 27, 'last_activity_date': 1398108583, 'creation_date': 1223571294, 'question_id': 188176, 'link': 'https://stackoverflow.com/questions/188176/named-entity-recognition-libraries-for-java', 'closed_reason': 'Not suitable for this site', 'title': 'Named Entity Recognition Libraries for Java', 'body': '<p>I am looking for a simple but "good enough" Named Entity Recognition library (and dictionary) for java, I am looking to process emails and documents and extract some "basic information" like:\nNames, places, Address and Dates</p>\n\n<p>I\'ve been looki

INFO:root:Data appended to questions_data.csv


{'tags': ['php', 'nlp', 'stemming', 'snowball', 'porter-stemmer'], 'owner': {'account_id': 8483, 'reputation': 838, 'user_id': 15318, 'user_type': 'registered', 'accept_rate': 100, 'profile_image': 'https://www.gravatar.com/avatar/d25bf284b1ee8e827f1a22f01c66082e?s=256&d=identicon&r=PG', 'display_name': 'Dave', 'link': 'https://stackoverflow.com/users/15318/dave'}, 'is_answered': True, 'view_count': 37295, 'accepted_answer_id': 190885, 'answer_count': 3, 'score': 36, 'last_activity_date': 1441217372, 'creation_date': 1223635409, 'last_edit_date': 1237394608, 'question_id': 190775, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/190775/stemming-algorithm-that-produces-real-words', 'title': 'Stemming algorithm that produces real words', 'body': '<p>I need to take a paragraph of text and extract from it a list of "tags".  Most of this is quite straight forward. However I need some help now stemming the resulting word list to avoid duplicates. Example: Commu

INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'statistics', 'nlp'], 'owner': {'account_id': 8717, 'reputation': 21332, 'user_id': 15842, 'user_type': 'registered', 'accept_rate': 59, 'profile_image': 'https://www.gravatar.com/avatar/58d77327a8f46041462b31c117e5c51a?s=256&d=identicon&r=PG', 'display_name': 'Gregg Lind', 'link': 'https://stackoverflow.com/users/15842/gregg-lind'}, 'is_answered': True, 'view_count': 9706, 'answer_count': 6, 'score': 21, 'last_activity_date': 1645638404, 'creation_date': 1223644987, 'last_edit_date': 1223673055, 'question_id': 191248, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/191248/latent-dirichlet-allocation-pitfalls-tips-and-programs', 'title': 'Latent Dirichlet Allocation, pitfalls, tips and programs', 'body': '<p>I\'m experimenting with <a href="http://en.wikipedia.org/wiki/Latent_Dirichlet_allocation" rel="noreferrer">Latent Dirichlet Allocation</a> for topic disambiguation and assignment, and I\'m looking for advice.</p>\n\n<ol>\n<li>

INFO:root:Data appended to questions_data.csv


{'tags': ['javascript', 'python', 'nlp'], 'owner': {'account_id': 2809, 'reputation': 3636, 'user_id': 1925263, 'user_type': 'registered', 'accept_rate': 89, 'profile_image': 'https://www.gravatar.com/avatar/38fa6a316b34296bb2532f09e13e8e2e?s=256&d=identicon&r=PG', 'display_name': 'btw0', 'link': 'https://stackoverflow.com/users/1925263/btw0'}, 'is_answered': True, 'view_count': 1250, 'accepted_answer_id': 196950, 'answer_count': 10, 'score': 9, 'last_activity_date': 1300858863, 'creation_date': 1223883132, 'question_id': 196924, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/196924/how-to-ensure-user-submit-only-english-text', 'title': 'How to ensure user submit only english text', 'body': '<p>I am building a project involving natural language processing, since the nlp module currently only deal with english text, so I have to make sure the user submitted content (not long, only several words) is in english. Are there established ways to achieve this? 

INFO:root:Data appended to questions_data.csv


{'tags': ['nlp', 'grammar'], 'owner': {'account_id': 26309, 'reputation': 2331, 'user_id': 68336, 'user_type': 'registered', 'accept_rate': 88, 'profile_image': 'https://i.sstatic.net/1UVjt.jpg?s=256', 'display_name': 'Enrico Murru', 'link': 'https://stackoverflow.com/users/68336/enrico-murru'}, 'is_answered': True, 'view_count': 17810, 'closed_date': 1320772230, 'accepted_answer_id': 202771, 'answer_count': 51, 'community_owned_date': 1224082933, 'score': 55, 'last_activity_date': 1495993330, 'creation_date': 1224017473, 'last_edit_date': 1335895584, 'question_id': 202750, 'link': 'https://stackoverflow.com/questions/202750/is-there-a-human-readable-programming-language', 'closed_reason': 'not constructive', 'title': 'Is there a human readable programming language?', 'body': "<p>I mean, is there a coded language with human style coding?\nFor example:</p>\n\n<pre><code>Create an object called MyVar and initialize it to 10;\nTake MyVar and call MyMethod() with parameters. . .\n</code></

INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv


{'tags': ['parsing', 'nlp'], 'owner': {'account_id': 10104, 'reputation': 7393, 'user_id': 18926, 'user_type': 'registered', 'accept_rate': 63, 'profile_image': 'https://www.gravatar.com/avatar/793978804ee7797173c64ea661ad6dda?s=256&d=identicon&r=PG', 'display_name': 'Greg', 'link': 'https://stackoverflow.com/users/18926/greg'}, 'is_answered': True, 'view_count': 18685, 'answer_count': 5, 'score': 31, 'last_activity_date': 1555903599, 'creation_date': 1224043039, 'last_edit_date': 1233101716, 'question_id': 203684, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/203684/how-can-i-use-nlp-to-parse-recipe-ingredients', 'title': 'How can I use NLP to parse recipe ingredients?', 'body': '<p>I need to parse recipe ingredients into amount, measurement, item, and description as applicable to the line, such as 1 cup flour, the peel of 2 lemons and 1 cup packed brown sugar etc. What would be the best way of doing this? I am interested in using python for the proje

INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'text', 'nlp', 'analysis', 'lexical-analysis'], 'owner': {'user_type': 'does_not_exist', 'display_name': 'Michael Julson'}, 'is_answered': True, 'view_count': 11035, 'answer_count': 7, 'score': 21, 'last_activity_date': 1302439869, 'creation_date': 1224542302, 'last_edit_date': 1224552868, 'question_id': 220187, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/220187/algorithms-or-libraries-for-textual-analysis-specifically-dominant-words-phra', 'title': 'Algorithms or libraries for textual analysis, specifically: dominant words, phrases across text, and collection of text', 'body': "<p>I'm working on a project where I need to analyze a page of text and collections of pages of text to determine dominant words.   I'd like to know if there is a library (prefer c# or java) that will handle the heavy lifting for me.  If not, is there an algorithm or multiple that would achieve my goals below.  </p>\n\n<p>What I want to do is similar to 

INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv
INFO:root:Data appended to questions_data.csv


{'tags': ['algorithm', 'nlp'], 'owner': {'user_type': 'does_not_exist', 'display_name': 'rouli'}, 'is_answered': True, 'view_count': 611, 'answer_count': 3, 'score': 4, 'last_activity_date': 1230211278, 'creation_date': 1224955538, 'last_edit_date': 1224969255, 'question_id': 236722, 'content_license': 'CC BY-SA 2.5', 'link': 'https://stackoverflow.com/questions/236722/how-does-googles-in-quotes-work', 'title': 'How does Google&#39;s In Quotes work?', 'body': '<p>I find Google\'s <a href="http://labs.google.com/inquotes/" rel="nofollow noreferrer">In Quotes</a> a really nifty application, and as a CS guy, I have to understand how it works. How do you think it turns news articles into a list of quotes attributed to specific persons?\nSure, there are some mistakes, but their algorithm seems to be smarter than just a simple heuristic or multiple regular expressions. For example, a quote can be attributed to someone even though his/her name was only mentioned in the last paragraph.</p>\n\n