###Normalização e Manipulação de Arquivo JSON 🔪


##### **Etapa 01:** *Importação e Normalização dos Dados*

In [138]:
# Importação de bibliotecas
import requests
import json
import pandas as pd

In [139]:
# Realização a requisição do conjunto de dados
url = 'https://jsonplaceholder.typicode.com/users'
resultado = requests.get(url)

In [140]:
# Transformando a string JSON em um objeto Python
dados = json.loads(resultado.text)

In [141]:
# Convertendo a estrutura dos dados para dataframe
pd.DataFrame(dados)

Unnamed: 0,id,name,username,email,address,phone,website,company
0,1,Leanne Graham,Bret,Sincere@april.biz,"{'street': 'Kulas Light', 'suite': 'Apt. 556',...",1-770-736-8031 x56442,hildegard.org,"{'name': 'Romaguera-Crona', 'catchPhrase': 'Mu..."
1,2,Ervin Howell,Antonette,Shanna@melissa.tv,"{'street': 'Victor Plains', 'suite': 'Suite 87...",010-692-6593 x09125,anastasia.net,"{'name': 'Deckow-Crist', 'catchPhrase': 'Proac..."
2,3,Clementine Bauch,Samantha,Nathan@yesenia.net,"{'street': 'Douglas Extension', 'suite': 'Suit...",1-463-123-4447,ramiro.info,"{'name': 'Romaguera-Jacobson', 'catchPhrase': ..."
3,4,Patricia Lebsack,Karianne,Julianne.OConner@kory.org,"{'street': 'Hoeger Mall', 'suite': 'Apt. 692',...",493-170-9623 x156,kale.biz,"{'name': 'Robel-Corkery', 'catchPhrase': 'Mult..."
4,5,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,"{'street': 'Skiles Walks', 'suite': 'Suite 351...",(254)954-1289,demarco.info,"{'name': 'Keebler LLC', 'catchPhrase': 'User-c..."
5,6,Mrs. Dennis Schulist,Leopoldo_Corkery,Karley_Dach@jasper.info,"{'street': 'Norberto Crossing', 'suite': 'Apt....",1-477-935-8478 x6430,ola.org,"{'name': 'Considine-Lockman', 'catchPhrase': '..."
6,7,Kurtis Weissnat,Elwyn.Skiles,Telly.Hoeger@billy.biz,"{'street': 'Rex Trail', 'suite': 'Suite 280', ...",210.067.6132,elvis.io,"{'name': 'Johns Group', 'catchPhrase': 'Config..."
7,8,Nicholas Runolfsdottir V,Maxime_Nienow,Sherwood@rosamond.me,"{'street': 'Ellsworth Summit', 'suite': 'Suite...",586.493.6943 x140,jacynthe.com,"{'name': 'Abernathy Group', 'catchPhrase': 'Im..."
8,9,Glenna Reichert,Delphine,Chaim_McDermott@dana.io,"{'street': 'Dayna Park', 'suite': 'Suite 449',...",(775)976-6794 x41206,conrad.com,"{'name': 'Yost and Sons', 'catchPhrase': 'Swit..."
9,10,Clementina DuBuque,Moriah.Stanton,Rey.Padberg@karina.biz,"{'street': 'Kattie Turnpike', 'suite': 'Suite ...",024-648-3804,ambrose.net,"{'name': 'Hoeger LLC', 'catchPhrase': 'Central..."


In [142]:
# Normalizando o dataframe
df_users = pd.json_normalize(dados)

####**Etapa 02:** *Manipulação do Dataframe*

In [143]:
# Obtendo as informações gerais do dataframe
df_users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   10 non-null     int64 
 1   name                 10 non-null     object
 2   username             10 non-null     object
 3   email                10 non-null     object
 4   phone                10 non-null     object
 5   website              10 non-null     object
 6   address.street       10 non-null     object
 7   address.suite        10 non-null     object
 8   address.city         10 non-null     object
 9   address.zipcode      10 non-null     object
 10  address.geo.lat      10 non-null     object
 11  address.geo.lng      10 non-null     object
 12  company.name         10 non-null     object
 13  company.catchPhrase  10 non-null     object
 14  company.bs           10 non-null     object
dtypes: int64(1), object(14)
memory usage: 1.3+ KB


In [144]:
# Alterando os nomes das colunas de latitude e longitude
df_users.rename(columns={'address.geo.lat': 'latitude', 'address.geo.lng': 'longitude'}, inplace=True)
df_users.head()

Unnamed: 0,id,name,username,email,phone,website,address.street,address.suite,address.city,address.zipcode,latitude,longitude,company.name,company.catchPhrase,company.bs
0,1,Leanne Graham,Bret,Sincere@april.biz,1-770-736-8031 x56442,hildegard.org,Kulas Light,Apt. 556,Gwenborough,92998-3874,-37.3159,81.1496,Romaguera-Crona,Multi-layered client-server neural-net,harness real-time e-markets
1,2,Ervin Howell,Antonette,Shanna@melissa.tv,010-692-6593 x09125,anastasia.net,Victor Plains,Suite 879,Wisokyburgh,90566-7771,-43.9509,-34.4618,Deckow-Crist,Proactive didactic contingency,synergize scalable supply-chains
2,3,Clementine Bauch,Samantha,Nathan@yesenia.net,1-463-123-4447,ramiro.info,Douglas Extension,Suite 847,McKenziehaven,59590-4157,-68.6102,-47.0653,Romaguera-Jacobson,Face to face bifurcated interface,e-enable strategic applications
3,4,Patricia Lebsack,Karianne,Julianne.OConner@kory.org,493-170-9623 x156,kale.biz,Hoeger Mall,Apt. 692,South Elvis,53919-4257,29.4572,-164.299,Robel-Corkery,Multi-tiered zero tolerance productivity,transition cutting-edge web services
4,5,Chelsey Dietrich,Kamren,Lucio_Hettinger@annie.ca,(254)954-1289,demarco.info,Skiles Walks,Suite 351,Roscoeview,33263,-31.8129,62.5342,Keebler LLC,User-centric fault-tolerant solution,revolutionize end-to-end systems


In [145]:
# Criando um novo dataframe para armazenar os dados importantes
colunas_importantes = ['name', 'username',
                       'latitude', 'longitude']
df_users_location = df_users[colunas_importantes]
df_users_location.head()

Unnamed: 0,name,username,latitude,longitude
0,Leanne Graham,Bret,-37.3159,81.1496
1,Ervin Howell,Antonette,-43.9509,-34.4618
2,Clementine Bauch,Samantha,-68.6102,-47.0653
3,Patricia Lebsack,Karianne,29.4572,-164.299
4,Chelsey Dietrich,Kamren,-31.8129,62.5342


In [146]:
# Unindo as duas primeiras colunas do dataframe
df_users_location['full_name'] = df_users_location['name'] + ' ' + df_users_location['username']
df_users_location.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_users_location['full_name'] = df_users_location['name'] + ' ' + df_users_location['username']


Unnamed: 0,name,username,latitude,longitude,full_name
0,Leanne Graham,Bret,-37.3159,81.1496,Leanne Graham Bret
1,Ervin Howell,Antonette,-43.9509,-34.4618,Ervin Howell Antonette
2,Clementine Bauch,Samantha,-68.6102,-47.0653,Clementine Bauch Samantha
3,Patricia Lebsack,Karianne,29.4572,-164.299,Patricia Lebsack Karianne
4,Chelsey Dietrich,Kamren,-31.8129,62.5342,Chelsey Dietrich Kamren


In [147]:
# Excluindo as duas primeiras colunas do dataframe
df_users_location.drop(columns=['name', 'username'], inplace=True)
df_users_location.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_users_location.drop(columns=['name', 'username'], inplace=True)


Unnamed: 0,latitude,longitude,full_name
0,-37.3159,81.1496,Leanne Graham Bret
1,-43.9509,-34.4618,Ervin Howell Antonette
2,-68.6102,-47.0653,Clementine Bauch Samantha
3,29.4572,-164.299,Patricia Lebsack Karianne
4,-31.8129,62.5342,Chelsey Dietrich Kamren


In [148]:
# Ordenando o dataframe pelos valores da latitude e longitude
df_users_location.sort_values(by=['latitude', 'longitude'], ascending=True, inplace=True)
df_users_location.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_users_location.sort_values(by=['latitude', 'longitude'], ascending=True, inplace=True)


Unnamed: 0,latitude,longitude,full_name
7,-14.399,-120.7677,Nicholas Runolfsdottir V Maxime_Nienow
4,-31.8129,62.5342,Chelsey Dietrich Kamren
0,-37.3159,81.1496,Leanne Graham Bret
9,-38.2386,57.2232,Clementina DuBuque Moriah.Stanton
1,-43.9509,-34.4618,Ervin Howell Antonette


In [149]:
# Alterando o tipo das colunas de latitude e longitude para float
df_users_location['latitude'] = df_users_location['latitude'].astype(float)
df_users_location['longitude'] = df_users_location['longitude'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_users_location['latitude'] = df_users_location['latitude'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_users_location['longitude'] = df_users_location['longitude'].astype(float)


In [150]:
# Arredondando os valores das colunas de latitude e longitude
df_users_location[['latitude', 'longitude']].round(3)

Unnamed: 0,latitude,longitude
7,-14.399,-120.768
4,-31.813,62.534
0,-37.316,81.15
9,-38.239,57.223
1,-43.951,-34.462
2,-68.61,-47.065
5,-71.42,71.748
8,24.646,-168.889
6,24.892,21.898
3,29.457,-164.299


In [None]:
# Visualizando geograficamente a distribuição dos clientes


####**Etapa 03:** *Exportação dos Dados*

In [151]:
# Visualizando as informações do dataframe antes de exportá-lo
df_users_location.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 7 to 3
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   latitude   10 non-null     float64
 1   longitude  10 non-null     float64
 2   full_name  10 non-null     object 
dtypes: float64(2), object(1)
memory usage: 320.0+ bytes


In [152]:
# Exportando o conjunto de dados necessário para o contexto
df_users_location.to_csv('users_location.csv', index=False)