# Lesson - Json Module, List Comprehensions and Lamnbda Functions  

In [1]:
world_cup_str = """
[
    {
        "team_1": "France",
        "team_2": "Croatia",
        "game_type": "Final",
        "score" : [4, 2]
    },
    {
        "team_1": "Belgium",
        "team_2": "England",
        "game_type": "3rd/4th Playoff",
        "score" : [2, 0]
    }
]
"""


In [2]:
import json
world_cup_obj=json.loads(world_cup_str)
print(world_cup_obj)

[{'team_1': 'France', 'team_2': 'Croatia', 'game_type': 'Final', 'score': [4, 2]}, {'team_1': 'Belgium', 'team_2': 'England', 'game_type': '3rd/4th Playoff', 'score': [2, 0]}]


In [3]:
#Reading json file and prelimninary exploration
file=open("hn_2014.json")
hn=json.load(file)
print(type(hn))
print(len(hn))
print(type(hn[0])) #list entries are dictionaries
print(hn[0].keys())

<class 'list'>
35806
<class 'dict'>
dict_keys(['author', 'numComments', 'points', 'url', 'storyText', 'createdAt', 'tags', 'createdAtI', 'title', 'objectId'])


In [4]:
# Deleting Dictionary Keys
def del_key(dict_, key):
    # create a copy so we don't
    # modify the original dict
    modified_dict = dict_.copy()
    del modified_dict[key]
    return modified_dict
hn_clean=[]
for d in hn:
    d= del_key(d,'createdAtI')
    hn_clean.append(d)

In [5]:
# List Comprehensions
hn_clean=[del_key(d,'createdAtI') for d in hn]
print(hn_clean[0])

{'author': 'dragongraphics', 'numComments': 0, 'points': 2, 'url': 'http://ashleynolan.co.uk/blog/are-we-getting-too-sassy', 'storyText': '', 'createdAt': '2014-05-29T08:07:50Z', 'tags': ['story', 'author_dragongraphics', 'story_7815238'], 'title': 'Are we getting too Sassy? Weighing up micro-optimisation vs. maintainability', 'objectId': '7815238'}


- List comprehensions for Transforming and Creating Lists
List comprehensions can be used for many different things. Three common applications are:
- Transforming a list
- Creating a new list
- Reducing a list
Below, we use a list comprehension to extract the url value from each dictionary in hn_clean and assign the result to urls.

In [6]:
urls=[d['url'] for d in hn_clean]
print(urls[:2])
print(type(urls))

['http://ashleynolan.co.uk/blog/are-we-getting-too-sassy', 'http://spectrum.ieee.org/automaton/robotics/home-robots/telemba-telepresence-robot']
<class 'list'>


In [7]:
# Reducing a list using "if" statement in List Comprehension

has_comments = []

for d in hn_clean:  # filtering stories with comments using loop
    if d['numComments'] > 0:
        has_comments.append(d)

num_comments = len(has_comments)
print(num_comments)

has_comments = [d for d in hn_clean if d['numComments'] > 0] # filtering stories with comments using list comprehension

num_comments = len(has_comments)
print(num_comments)

# Filter hn_clean for entries having greater than 1000 points
thousand_points=[d for d in hn_clean if d['points']>1000] 
num_thousand_points=len(thousand_points)
print(num_thousand_points)


9279
9279
8


In [8]:
# Passing Functions as Arguments
def key_func(hn_dict):
    return hn_dict['numComments']
most_comments=max(hn_clean,key=key_func)
print(most_comments)

{'author': 'platz', 'numComments': 1208, 'points': 889, 'url': 'https://blog.mozilla.org/blog/2014/04/03/brendan-eich-steps-down-as-mozilla-ceo/', 'storyText': None, 'createdAt': '2014-04-03T19:02:53Z', 'tags': ['story', 'author_platz', 'story_7525198'], 'title': 'Brendan Eich Steps Down as Mozilla CEO', 'objectId': '7525198'}


In [9]:
# Lambda Functions e.g. lambda x:x or lambda x:x+1
# For following function write lambda function:
# def multiply(a, b):
#    return a * b
lambda a,b:a*b

<function __main__.<lambda>(a, b)>

In [10]:
# Using Lambda Functions to Analyze json Data
hn_sorted_points=sorted(hn_clean,key=lambda d:d['points'],reverse=True)
top_5_titles=[d['title'] for d in hn_sorted_points[:5]]

print(top_5_titles[:5])


['2048', 'Today is The Day We Fight Back', 'Wozniak: “Actually, the movie was largely a lie about me”', 'Microsoft Open Sources C# Compiler', 'Elon Musk: To the People of New Jersey']


### Reading JSON files into pandas
- Pandas has the `pandas.read_json()` [function](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_json.html), which is designed to read JSON from either a file or a JSON string.
- If already a Python obj through conversion using `json.loads()` or `json.load()` we can simply use the `pandas.DataFrame()` [constructor](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html#pandas.DataFrame) and pass the list of dictionaries directly to it to convert the JSON to a dataframe:
```
import json
json_obj=json.loads(json_str)
print(world_cup_obj)

import pandas as pd
json_df = pd.DataFrame(json_obj)
print(json_df)

age                 favorite_foods    name
0   36             [Pumpkin, Oatmeal]  Sabine
1   40    [Chicken, Pizza, Chocolate]     Zoe
2   40                 [Caesar Salad]   Heidi
```

In [11]:
import pandas as pd
hn_df=pd.DataFrame(hn_clean)
print(hn_df.head(2))

           author  numComments  points  \
0  dragongraphics            0       2   
1             jcr            0       1   

                                                 url storyText  \
0  http://ashleynolan.co.uk/blog/are-we-getting-t...             
1  http://spectrum.ieee.org/automaton/robotics/ho...             

              createdAt                                           tags  \
0  2014-05-29T08:07:50Z  [story, author_dragongraphics, story_7815238]   
1  2014-05-29T08:05:58Z             [story, author_jcr, story_7815234]   

                                               title objectId  
0  Are we getting too Sassy? Weighing up micro-op...  7815238  
1  Telemba Turns Your Old Roomba and Tablet Into ...  7815234  


In [12]:
# Above the column 'tags' seems to be a list

tags = hn_df['tags']
print(tags.dtype) # stored as object type by Pandas
tags_types = tags.apply(type) # the object type items are list type
type_counts = tags_types.value_counts(dropna=False)
print(type_counts)

object
<class 'list'>    35806
Name: tags, dtype: int64


In [13]:
tags = hn_df['tags']
four_tags=tags[tags.apply(len)==4] #filtering tags lists with 4 values
print(four_tags.count())

2347


#### Extracting tags using apply() with a lambda function
- We can use following code to extract last (4th) item in tags;
```
def extract_tag(l):
    if len(l) == 4:
        return l[-1]
    else:
        return None
 ```
We could then use `Series.apply()` to apply this function as is to the series.        
-  But We will use a special version of an `if` statement `ternary operator`. `[on_true] if [expression] else [on_false]` to practice working with lambda functions.

In [14]:
# def extract_tag(l):
#     return l[-1] if len(l) == 4 else None (logic for below lambda function)
cleaned_tags=hn_df['tags'].apply(lambda l: l[-1] if len(l) == 4 else None)
hn_df['tags']=cleaned_tags
print(hn_df['tags'][:10])

0    None
1    None
2    None
3    None
4    None
5    None
6    None
7    None
8    None
9    None
Name: tags, dtype: object
