# StackAPI

#### Import the necessary libraries here:

In [1]:
# your code here
import pandas as pd
import numpy as np

In [2]:
!pip install stackapi

Defaulting to user installation because normal site-packages is not writeable


In [3]:
from stackapi import StackAPI

In [4]:
SITE = StackAPI('stackoverflow')
#SITE.page_size = 100
#SITE.max_pages = 10
comments = SITE.fetch('comments')

In [5]:
comments

{'backoff': 0,
 'has_more': True,
 'page': 5,
 'quota_max': 300,
 'quota_remaining': 300,
 'total': 0,
 'items': [{'owner': {'reputation': 39938,
    'user_id': 106104,
    'user_type': 'registered',
    'accept_rate': 62,
    'profile_image': 'https://www.gravatar.com/avatar/c88b9310b92a937108ec4336c2832f92?s=128&d=identicon&r=PG',
    'display_name': 'user253751',
    'link': 'https://stackoverflow.com/users/106104/user253751'},
   'edited': False,
   'score': 0,
   'creation_date': 1584622112,
   'post_id': 60757105,
   'comment_id': 107496394},
  {'owner': {'reputation': 63133,
    'user_id': 2541560,
    'user_type': 'registered',
    'profile_image': 'https://i.stack.imgur.com/7H9TF.png?s=128&g=1',
    'display_name': 'Kayaman',
    'link': 'https://stackoverflow.com/users/2541560/kayaman'},
   'edited': False,
   'score': 0,
   'creation_date': 1584622111,
   'post_id': 60757069,
   'comment_id': 107496393},
  {'owner': {'reputation': 51,
    'user_id': 11172846,
    'user_type'

In [6]:
# Transforming imported comments into a DataFrame:

In [7]:
# First, create a dataframe with all the information from the posts 
posts = comments.get('items')

In [8]:
posts_df = pd.DataFrame(posts)
posts_df.head()

Unnamed: 0,owner,edited,score,creation_date,post_id,comment_id,reply_to_user
0,"{'reputation': 39938, 'user_id': 106104, 'user...",False,0,1584622112,60757105,107496394,
1,"{'reputation': 63133, 'user_id': 2541560, 'use...",False,0,1584622111,60757069,107496393,
2,"{'reputation': 51, 'user_id': 11172846, 'user_...",False,0,1584622111,60757346,107496392,
3,"{'reputation': 5524, 'user_id': 218640, 'user_...",False,0,1584622108,60756826,107496391,"{'reputation': 1588, 'user_id': 5746236, 'user..."
4,"{'reputation': 8413, 'user_id': 4111404, 'user...",False,0,1584622106,60757122,107496390,


In [9]:
posts_df.columns

Index(['owner', 'edited', 'score', 'creation_date', 'post_id', 'comment_id',
       'reply_to_user'],
      dtype='object')

In [10]:
# There are two information that are still dictionary types: `owner` and `reply_to_user` in 2 STEPS.

In [11]:
# STEP 1: retrieve data from 'owner' column and transform in a new DataFrame 'owner_df':

cols = pd.DataFrame([i.get('owner') for i in posts]).columns
new_cols = ['owner_'+col for col in cols] 

owner_df = pd.DataFrame([i.get('owner') for i in posts])
owner_df.columns = new_cols
owner_df.head()

Unnamed: 0,owner_reputation,owner_user_id,owner_user_type,owner_accept_rate,owner_profile_image,owner_display_name,owner_link
0,39938,106104,registered,62.0,https://www.gravatar.com/avatar/c88b9310b92a93...,user253751,https://stackoverflow.com/users/106104/user253751
1,63133,2541560,registered,,https://i.stack.imgur.com/7H9TF.png?s=128&g=1,Kayaman,https://stackoverflow.com/users/2541560/kayaman
2,51,11172846,registered,,https://www.gravatar.com/avatar/c3c88014b0dc5d...,J.Furmanek,https://stackoverflow.com/users/11172846/j-fur...
3,5524,218640,registered,67.0,https://www.gravatar.com/avatar/b46c5c5c70b433...,char m,https://stackoverflow.com/users/218640/char-m
4,8413,4111404,registered,80.0,https://lh5.googleusercontent.com/-kzPWJe4orps...,Luis Miguel Mej&#237;a Su&#225;rez,https://stackoverflow.com/users/4111404/luis-m...


In [12]:
# verifying if the 2 DataFrames have the same number of rows: 
owner_df.shape[0] == posts_df.shape[0]

True

In [56]:
# STEP 2: retrieve data from 'reply_to_user' column and transform in a new DataFrame 'reply_df':

In [45]:
# Creating new DataFrame `reply_df` from the post's df column `reply_to_user`
reply_df = pd.DataFrame([i.get('reply_to_user') for i in posts])
reply_df

Unnamed: 0,0
0,
1,
2,
3,"{'reputation': 1588, 'user_id': 5746236, 'user..."
4,
...,...
495,
496,
497,
498,


In [57]:
# filling NaN rows with an unavailable dictionary, in order to create a pattern for all the columns:
mask = posts_df.reply_to_user.isna()

empty_dict = ({
 'reputation': 'unavailable',
 'user_id': 'unavailable',
 'user_type': 'unavailable',
 'profile_image': 'unavailable',
 'display_name': 'unavailable',
 'link': 'unavailable'})

reply_df.loc[mask,:] = [empty_dict]
reply_df

Unnamed: 0,0
0,"{'reputation': 'unavailable', 'user_id': 'unav..."
1,"{'reputation': 'unavailable', 'user_id': 'unav..."
2,"{'reputation': 'unavailable', 'user_id': 'unav..."
3,"{'reputation': 1588, 'user_id': 5746236, 'user..."
4,"{'reputation': 'unavailable', 'user_id': 'unav..."
...,...
495,"{'reputation': 'unavailable', 'user_id': 'unav..."
496,"{'reputation': 'unavailable', 'user_id': 'unav..."
497,"{'reputation': 'unavailable', 'user_id': 'unav..."
498,"{'reputation': 'unavailable', 'user_id': 'unav..."


In [158]:
# creating new columns names:

cols = list(reply_df.loc[0,0].keys())
new_cols = ['reply2user_'+col for col in cols] 
new_cols

['reply2user_reputation',
 'reply2user_user_id',
 'reply2user_user_type',
 'reply2user_profile_image',
 'reply2user_display_name',
 'reply2user_link']

In [159]:
reply_df.loc[0,0]

{'reputation': 'unavailable',
 'user_id': 'unavailable',
 'user_type': 'unavailable',
 'profile_image': 'unavailable',
 'display_name': 'unavailable',
 'link': 'unavailable'}

In [160]:
reply_values = []
for i in range (reply_df.shape[0]):
    val = list(reply_df.loc[i,0].values())
    reply_values.append(val)
reply_values

[['unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable'],
 ['unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable'],
 ['unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable'],
 [1588,
  5746236,
  'registered',
  'https://www.gravatar.com/avatar/d9e5b2b5e435d4f91a4d28c3892347d7?s=128&d=identicon&r=PG&f=1',
  'Yash Rami',
  'https://stackoverflow.com/users/5746236/yash-rami'],
 ['unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable'],
 ['unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable'],
 ['unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable',
  'unavailable'],
 [1599,
  8188060,
  'registered',
  'https://i.stack.imgur.com/cxRLb.jpg?s=128&g=1',
  'JO3-W3B-D3V',
  'https://stackoverflow.com/users/8188060/jo3-w3b-d3v'],
 ['unavailable',
 

In [164]:
pd.DataFrame(reply_values)

Unnamed: 0,0,1,2,3,4,5,6
0,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
1,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
2,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
3,1588,5746236,registered,https://www.gravatar.com/avatar/d9e5b2b5e435d4...,Yash Rami,https://stackoverflow.com/users/5746236/yash-rami,
4,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
...,...,...,...,...,...,...,...
495,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
496,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
497,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,
498,unavailable,unavailable,unavailable,unavailable,unavailable,unavailable,


In [165]:
# verifying if the 2 DataFrames have the same number of rows: 
reply_df.shape[0] == posts_df.shape[0]

True

In [166]:
# Concatanating owner's information in a new DataFrame called `new_posts_df`:

new_posts_df = pd.concat([owner_df, posts_df[['edited', 'score', 'creation_date', 'post_id', 'comment_id','reply_to_user']]], axis=1)
new_posts_df.head(2)

Unnamed: 0,owner_reputation,owner_user_id,owner_user_type,owner_accept_rate,owner_profile_image,owner_display_name,owner_link,edited,score,creation_date,post_id,comment_id,reply_to_user
0,39938,106104,registered,62.0,https://www.gravatar.com/avatar/c88b9310b92a93...,user253751,https://stackoverflow.com/users/106104/user253751,False,0,1584622112,60757105,107496394,
1,63133,2541560,registered,,https://i.stack.imgur.com/7H9TF.png?s=128&g=1,Kayaman,https://stackoverflow.com/users/2541560/kayaman,False,0,1584622111,60757069,107496393,


In [167]:
new_posts_df.shape

(500, 13)

#### Question 1: Find the questions and answers of last month.

In [168]:
# your code here

# Transforming the 'creation_date' column in a readable DATE and TIME format
date_hour = pd.to_datetime(new_posts_df.loc[:,'creation_date'],unit='s')

# Storing these 2 types of information in 2 different variables 
date = date_hour.apply(lambda x: str(x).split(' ')[0].strip())
hour = date_hour.apply(lambda x: str(x).split(' ')[1].strip())

# creating 2 new columns to the `new_posts_df` DataFrame
new_posts_df['post_date'] = date
new_posts_df['post_hour'] = hour

In [169]:
new_posts_df.head(2)

Unnamed: 0,owner_reputation,owner_user_id,owner_user_type,owner_accept_rate,owner_profile_image,owner_display_name,owner_link,edited,score,creation_date,post_id,comment_id,reply_to_user,post_date,post_hour
0,39938,106104,registered,62.0,https://www.gravatar.com/avatar/c88b9310b92a93...,user253751,https://stackoverflow.com/users/106104/user253751,False,0,1584622112,60757105,107496394,,2020-03-19,12:48:32
1,63133,2541560,registered,,https://i.stack.imgur.com/7H9TF.png?s=128&g=1,Kayaman,https://stackoverflow.com/users/2541560/kayaman,False,0,1584622111,60757069,107496393,,2020-03-19,12:48:31


#### Question 2: Find the most voted question today with at least a score of 5 and tagged with 'python'. 

In [None]:
# your code here

#### Question 3: Find the answers with id 6784 and 6473.

In [177]:
# your code here
(new_posts_df.loc[:,'owner_user_id'] == 6473).sum()


0

In [172]:
new_posts_df.loc[:, :]

Unnamed: 0,owner_reputation,owner_user_id,owner_user_type,owner_accept_rate,owner_profile_image,owner_display_name,owner_link,edited,score,creation_date,post_id,comment_id,reply_to_user,post_date,post_hour
0,39938,106104,registered,62.0,https://www.gravatar.com/avatar/c88b9310b92a93...,user253751,https://stackoverflow.com/users/106104/user253751,False,0,1584622112,60757105,107496394,,2020-03-19,12:48:32
1,63133,2541560,registered,,https://i.stack.imgur.com/7H9TF.png?s=128&g=1,Kayaman,https://stackoverflow.com/users/2541560/kayaman,False,0,1584622111,60757069,107496393,,2020-03-19,12:48:31
2,51,11172846,registered,,https://www.gravatar.com/avatar/c3c88014b0dc5d...,J.Furmanek,https://stackoverflow.com/users/11172846/j-fur...,False,0,1584622111,60757346,107496392,,2020-03-19,12:48:31
3,5524,218640,registered,67.0,https://www.gravatar.com/avatar/b46c5c5c70b433...,char m,https://stackoverflow.com/users/218640/char-m,False,0,1584622108,60756826,107496391,"{'reputation': 1588, 'user_id': 5746236, 'user...",2020-03-19,12:48:28
4,8413,4111404,registered,80.0,https://lh5.googleusercontent.com/-kzPWJe4orps...,Luis Miguel Mej&#237;a Su&#225;rez,https://stackoverflow.com/users/4111404/luis-m...,False,0,1584622106,60757122,107496390,,2020-03-19,12:48:26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,1189,8462962,registered,43.0,https://www.gravatar.com/avatar/78712f98200d77...,excelguy,https://stackoverflow.com/users/8462962/excelguy,False,0,1584621089,39939158,107495866,,2020-03-19,12:31:29
496,2305,1269732,registered,,https://i.stack.imgur.com/BrHDr.png?s=128&g=1,Stock Overflaw,https://stackoverflow.com/users/1269732/stock-...,False,0,1584621088,60730866,107495865,,2020-03-19,12:31:28
497,5,12954906,registered,,https://www.gravatar.com/avatar/2a04372276e0f7...,AlexFAHM,https://stackoverflow.com/users/12954906/alexfahm,False,0,1584621086,60751289,107495864,,2020-03-19,12:31:26
498,39,10116894,registered,,https://www.gravatar.com/avatar/36203731b78fb8...,Martin Jaskulla,https://stackoverflow.com/users/10116894/marti...,False,0,1584621084,60756494,107495862,,2020-03-19,12:31:24
