# StackAPI

#### Import the necessary libraries here:

In [4]:
# pip install stackapi

Note: you may need to restart the kernel to use updated packages.


In [1]:
import json
import requests
import pandas as pd 
from stackapi import StackAPI
from datetime import datetime
from random import randint
from time import sleep


In [2]:
SITE = StackAPI('stackoverflow')

#### Question 1: Find the questions and answers of last month.

In [39]:
# Getting questions
# Due to the large dataset created in 1 month in stackoverflow just searched 2 days

SITE.max_pages=22

sleep(randint(2,4))
questions1 = SITE.fetch('questions', fromdate=datetime(2022,10,1), todate=datetime(2022,10,2))
sleep(randint(2,4))
questions2 = SITE.fetch('questions', fromdate=datetime(2022,10,2), todate=datetime(2022,10,3))


In [44]:
df1 = pd.DataFrame.from_dict(questions1)
df1 = pd.json_normalize(questions1['items'])
df1 = df1[['question_id','creation_date','title','owner.display_name']]
df1["creation_date"] = pd.to_datetime(df1["creation_date"], unit='s')
df1 = df1.sort_values(by=['creation_date'], ascending=True)

In [45]:
df2 = pd.DataFrame.from_dict(questions2)
df2 = pd.json_normalize(questions2['items'])
df2 = df2[['question_id','creation_date','title','owner.display_name']]
df2["creation_date"] = pd.to_datetime(df2["creation_date"], unit='s')
df2 = df2.sort_values(by=['creation_date'], ascending=True)

In [46]:
questions_df = pd.concat([df1, df2], axis=0) 
questions_df

Unnamed: 0,question_id,creation_date,title,owner.display_name
1685,73914952,2022-10-01 00:03:28,Configuring Filebeat to ingest and parse .ndjs...,Feng
354,73914960,2022-10-01 00:06:24,Exclude items from navbar toggle,Shaho
1370,73914968,2022-10-01 00:09:04,How to use spaces and periods in an Ansible pl...,David Hill
73,73914969,2022-10-01 00:09:12,.NET MAUI: Check checkbox when clicking on label,Terra
114,73914970,2022-10-01 00:09:20,PyQt5 QCheckbox with image,Rimuto
...,...,...,...,...
303,73930104,2022-10-02 23:58:31,Set timeout function not slowing down,Grmln
229,73930106,2022-10-02 23:59:03,Why is Updating Data In MySQL Table throws Err...,JCprog
289,73930107,2022-10-02 23:59:04,Conditional Column: Order of Tests Matters?,Michael Sheaver
853,73930108,2022-10-02 23:59:05,How to properly capture/consume onTouchEvent i...,user1865027


In [12]:
# Getting answers
# Due to the large dataset created in 1 month in stackoverflow just searched 2 days

SITE.max_pages=22

sleep(randint(6,10))
answers1 = SITE.fetch('answers', fromdate=datetime(2022,10,1), todate=datetime(2022,10,2))
sleep(randint(6,10))
answers2 = SITE.fetch('answers', fromdate=datetime(2022,10,2), todate=datetime(2022,10,3))

In [14]:
df3 = pd.DataFrame.from_dict(answers1)
df3 = pd.json_normalize(answers1['items'])
df3 = df3[['answer_id','question_id','creation_date','owner.display_name']]
df3["creation_date"] = pd.to_datetime(df3["creation_date"], unit='s')
df3 = df3.sort_values(by=['creation_date'], ascending=True)

In [15]:
df4 = pd.DataFrame.from_dict(answers2)
df4 = pd.json_normalize(answers2['items'])
df4 = df4[['answer_id','question_id','creation_date','owner.display_name']]
df4["creation_date"] = pd.to_datetime(df4["creation_date"], unit='s')
df4 = df4.sort_values(by=['creation_date'], ascending=True)

In [16]:
answers_df = pd.concat([df3, df4], axis=0) 
answers_df

Unnamed: 0,answer_id,question_id,creation_date,owner.display_name
219,73914956,73914414,2022-10-01 00:05:24,Christian
75,73914962,73684217,2022-10-01 00:06:36,Silverhoft
127,73915027,73904491,2022-10-01 00:20:51,Davislor
217,73915063,73904491,2022-10-01 00:31:18,Peter Cordes
107,73915075,73913020,2022-10-01 00:35:01,Nabil A Navab
...,...,...,...,...
249,73930101,73761366,2022-10-02 23:58:09,ajuice443
248,73930105,73930022,2022-10-02 23:58:57,Marcin
247,73930109,73929944,2022-10-02 23:59:16,r2evans
246,73930112,73930070,2022-10-02 23:59:33,pranayroni


#### Question 2: Find the most voted question today with at least a score of 5 and tagged with 'python'. 

In [47]:
# your code here

sleep(randint(6,10))
voted_question = SITE.fetch('questions', fromdate=datetime(2022,10,30), todate=datetime(2022,11,4), min=5, sort='votes', tagged='python')
len(voted_question['items'])

In [59]:
df = pd.DataFrame.from_dict(voted_question)
df = pd.json_normalize(voted_question['items'])
df.head(1)

Unnamed: 0,tags,is_answered,view_count,answer_count,score,last_activity_date,creation_date,last_edit_date,question_id,content_license,link,title,owner.reputation,owner.user_id,owner.user_type,owner.accept_rate,owner.profile_image,owner.display_name,owner.link,accepted_answer_id
0,"[python, python-dataclasses]",True,139,3,8,1667497326,1667213232,1667213000.0,74262112,CC BY-SA 4.0,https://stackoverflow.com/questions/74262112/d...,dataclasses: how to ignore default values usin...,2507,412137,registered,99.0,https://www.gravatar.com/avatar/708f77715d7732...,Nadav,https://stackoverflow.com/users/412137/nadav,


#### Question 3: Find the answers with id 6784 and 6473.

In [18]:
# your code here

sleep(randint(6,10))
id_answers = SITE.fetch('answers', ids=[6784, 6473])

In [21]:
df = pd.DataFrame.from_dict(id_answers)
df = pd.json_normalize(id_answers['items'])
df

Unnamed: 0,is_accepted,score,last_activity_date,creation_date,answer_id,question_id,content_license,owner.reputation,owner.user_id,owner.user_type,owner.accept_rate,owner.profile_image,owner.display_name,owner.link
0,False,0,1218291583,1218291583,6784,6778,CC BY-SA 2.5,30451,758,registered,77,https://www.gravatar.com/avatar/0b4adc19284ed0...,Mark Renouf,https://stackoverflow.com/users/758/mark-renouf
