In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
url = 'https://jsonplaceholder.typicode.com/users'

try:
    response = requests.get(url)  # Send an HTTP GET request to the URL

    if response.status_code == 200:
        data = response.json()  # Parse the JSON response
        print("Data fetched successfully.")

    else:
        print("HTTP request failed with status code:", response.status_code)

except Exception as e:
    print(f"An error occurred: {e}")

Data fetched successfully.


In [6]:
print("First entry in the data:", data[0])

First entry in the data: {'id': 1, 'name': 'Leanne Graham', 'username': 'Bret', 'email': 'Sincere@april.biz', 'address': {'street': 'Kulas Light', 'suite': 'Apt. 556', 'city': 'Gwenborough', 'zipcode': '92998-3874', 'geo': {'lat': '-37.3159', 'lng': '81.1496'}}, 'phone': '1-770-736-8031 x56442', 'website': 'hildegard.org', 'company': {'name': 'Romaguera-Crona', 'catchPhrase': 'Multi-layered client-server neural-net', 'bs': 'harness real-time e-markets'}}


In [7]:
# Convert the list of users into a pandas DataFrame
df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,id,name,username,email,address,phone,website,company
0,1,Leanne Graham,Bret,Sincere@april.biz,"{'street': 'Kulas Light', 'suite': 'Apt. 556',...",1-770-736-8031 x56442,hildegard.org,"{'name': 'Romaguera-Crona', 'catchPhrase': 'Mu..."
1,2,Ervin Howell,Antonette,Shanna@melissa.tv,"{'street': 'Victor Plains', 'suite': 'Suite 87...",010-692-6593 x09125,anastasia.net,"{'name': 'Deckow-Crist', 'catchPhrase': 'Proac..."
2,3,Clementine Bauch,Samantha,Nathan@yesenia.net,"{'street': 'Douglas Extension', 'suite': 'Suit...",1-463-123-4447,ramiro.info,"{'name': 'Romaguera-Jacobson', 'catchPhrase': ..."
3,4,Patricia Lebsack,Karianne,Julianne.OConner@kory.org,"{'street': 'Hoeger Mall', 'suite': 'Apt. 692',...",493-170-9623 x156,kale.biz,"{'name': 'Robel-Corkery', 'catchPhrase': 'Mult..."
4,5,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,"{'street': 'Skiles Walks', 'suite': 'Suite 351...",(254)954-1289,demarco.info,"{'name': 'Keebler LLC', 'catchPhrase': 'User-c..."


In the original JSON data, *address* and *company* are nested dictionaries.

Flattening means converting nested JSON objects (dictionaries inside dictionaries) into a flat table format where each nested key becomes its own column.

In [9]:
# Flatten the entire JSON data including nested fields like 'address' and 'company'
df_flat = pd.json_normalize(data)

# Display the first few rows of the flattened DataFrame
df_flat.head()

Unnamed: 0,id,name,username,email,phone,website,address.street,address.suite,address.city,address.zipcode,address.geo.lat,address.geo.lng,company.name,company.catchPhrase,company.bs
0,1,Leanne Graham,Bret,Sincere@april.biz,1-770-736-8031 x56442,hildegard.org,Kulas Light,Apt. 556,Gwenborough,92998-3874,-37.3159,81.1496,Romaguera-Crona,Multi-layered client-server neural-net,harness real-time e-markets
1,2,Ervin Howell,Antonette,Shanna@melissa.tv,010-692-6593 x09125,anastasia.net,Victor Plains,Suite 879,Wisokyburgh,90566-7771,-43.9509,-34.4618,Deckow-Crist,Proactive didactic contingency,synergize scalable supply-chains
2,3,Clementine Bauch,Samantha,Nathan@yesenia.net,1-463-123-4447,ramiro.info,Douglas Extension,Suite 847,McKenziehaven,59590-4157,-68.6102,-47.0653,Romaguera-Jacobson,Face to face bifurcated interface,e-enable strategic applications
3,4,Patricia Lebsack,Karianne,Julianne.OConner@kory.org,493-170-9623 x156,kale.biz,Hoeger Mall,Apt. 692,South Elvis,53919-4257,29.4572,-164.299,Robel-Corkery,Multi-tiered zero tolerance productivity,transition cutting-edge web services
4,5,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,(254)954-1289,demarco.info,Skiles Walks,Suite 351,Roscoeview,33263,-31.8129,62.5342,Keebler LLC,User-centric fault-tolerant solution,revolutionize end-to-end systems


In [12]:
# Display basic information about the DataFrame
df_flat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   10 non-null     int64 
 1   name                 10 non-null     object
 2   username             10 non-null     object
 3   email                10 non-null     object
 4   phone                10 non-null     object
 5   website              10 non-null     object
 6   address.street       10 non-null     object
 7   address.suite        10 non-null     object
 8   address.city         10 non-null     object
 9   address.zipcode      10 non-null     object
 10  address.geo.lat      10 non-null     object
 11  address.geo.lng      10 non-null     object
 12  company.name         10 non-null     object
 13  company.catchPhrase  10 non-null     object
 14  company.bs           10 non-null     object
dtypes: int64(1), object(14)
memory usage: 1.3+ KB


In [14]:
# Count number of users per city
city_counts = df_flat['address.city'].value_counts()
print("\nNumber of users per city:")
print(city_counts)


Number of users per city:
address.city
Gwenborough       1
Wisokyburgh       1
McKenziehaven     1
South Elvis       1
Roscoeview        1
South Christy     1
Howemouth         1
Aliyaview         1
Bartholomebury    1
Lebsackbury       1
Name: count, dtype: int64


In [17]:
# Display unique company names
unique_companies = df_flat['company.name'].unique()
print("\nUnique companies:")
print(unique_companies)


Unique companies:
['Romaguera-Crona' 'Deckow-Crist' 'Romaguera-Jacobson' 'Robel-Corkery'
 'Keebler LLC' 'Considine-Lockman' 'Johns Group' 'Abernathy Group'
 'Yost and Sons' 'Hoeger LLC']


In [18]:
# Loop through each row in the DataFrame
for index, row in df_flat.iterrows():
    if row['address.city'] == 'Aliyaview':
        print(f"User {row['name']} lives in {row['address.city']}")

User Nicholas Runolfsdottir V lives in Aliyaview


For `index, row in df_flat.iterrows():`

This loops over each **row** of the DataFrame.

*index* is the row’s index & *row* is a pandas Series representing all the data in that row.

In [19]:
# Calculate length of each catchPhrase string
catchphrase_lengths = df_flat['company.catchPhrase'].str.len()

# Calculate the average length
average_length = catchphrase_lengths.mean()

print(f"The average length of company catch phrases is {average_length:.2f} characters.")

The average length of company catch phrases is 34.40 characters.
