# Projeto API Carbon Dioxide

## Importação da Biblioteca

In [2]:
import pandas as pd

from pandas.io.json import json_normalize

In [3]:
# Leitura do arquivo em json

dfjson = pd.read_json('/content/carbon_dioxide.json')


In [4]:
# Listar os tipos de colunas

dfjson.dtypes

co2    object
dtype: object

In [5]:
# Normalização do dataset em json

dfcsv = pd.json_normalize(dfjson['co2'])

In [6]:
# Visão geral do arquivo em json normalizado

dfcsv

Unnamed: 0,year,month,day,cycle,trend
0,2012,1,20,393.35,391.59
1,2012,1,21,393.36,391.59
2,2012,1,22,393.37,391.60
3,2012,1,23,393.38,391.61
4,2012,1,24,393.40,391.61
...,...,...,...,...,...
3956,2022,11,19,418.04,418.01
3957,2022,11,20,418.08,418.01
3958,2022,11,21,418.13,418.02
3959,2022,11,22,418.18,418.02


In [7]:
# Salvando o arquivo em formato csv

dfcsv.to_csv("carbon_dioxide.csv", index=False, sep=';', encoding='utf-8')

In [8]:
# Leitura do arquivo em csv

dfler = pd.read_csv('/content/carbon_dioxide.csv', sep=';', encoding='utf-8')

In [9]:
# Visão geral do arquivo em csv

dfler

Unnamed: 0,year,month,day,cycle,trend
0,2012,1,20,393.35,391.59
1,2012,1,21,393.36,391.59
2,2012,1,22,393.37,391.60
3,2012,1,23,393.38,391.61
4,2012,1,24,393.40,391.61
...,...,...,...,...,...
3956,2022,11,19,418.04,418.01
3957,2022,11,20,418.08,418.01
3958,2022,11,21,418.13,418.02
3959,2022,11,22,418.18,418.02


## Análise e Tratamento

In [10]:
# Verificando os tipos de colunas

dfler.dtypes

year       int64
month      int64
day        int64
cycle    float64
trend    float64
dtype: object

In [11]:
# Tradução dos nomes das colunas

dfler.rename(columns = {
    'year': 'ano',
    'month': 'mes',
    'day': 'dia',
    'cycle': 'ciclo_ppm',
    'trend': 'tendencia_ppm'}, inplace=True)

In [12]:
# Coluna ano

ano =  sorted(pd.unique(dfler['ano']))
for a in range(len(ano)):
  if a % 1 == 0:
        print()
  print(ano[a], end=', ')


2012, 
2013, 
2014, 
2015, 
2016, 
2017, 
2018, 
2019, 
2020, 
2021, 
2022, 

In [13]:
# Coluna mes

mes =  sorted(pd.unique(dfler['mes']))
for m in range(len(mes)):
  if m % 1 == 0:
        print()
  print(mes[m], end=', ')


1, 
2, 
3, 
4, 
5, 
6, 
7, 
8, 
9, 
10, 
11, 
12, 

In [14]:
# Coluna dia

dia =  sorted(pd.unique(dfler['dia']))
for d in range(len(dia)):
  if d % 1 == 0:
        print()
  print(dia[d], end=', ')


1, 
2, 
3, 
4, 
5, 
6, 
7, 
8, 
9, 
10, 
11, 
12, 
13, 
14, 
15, 
16, 
17, 
18, 
19, 
20, 
21, 
22, 
23, 
24, 
25, 
26, 
27, 
28, 
29, 
30, 
31, 

In [15]:
# Coluna ciclo_ppm

ciclo =  sorted(pd.unique(dfler['ciclo_ppm']))
for c in range(len(ciclo)):
  if c % 1 == 0:
        print()
  print(ciclo[c], end=', ')


389.16, 
389.17, 
389.18, 
389.19, 
389.2, 
389.22, 
389.24, 
389.26, 
389.28, 
389.31, 
389.34, 
389.36, 
389.37, 
389.4, 
389.41, 
389.43, 
389.45, 
389.46, 
389.49, 
389.5, 
389.53, 
389.54, 
389.58, 
389.62, 
389.63, 
389.67, 
389.68, 
389.71, 
389.74, 
389.76, 
389.8, 
389.81, 
389.86, 
389.91, 
389.92, 
389.96, 
389.98, 
390.01, 
390.05, 
390.07, 
390.12, 
390.13, 
390.18, 
390.19, 
390.24, 
390.26, 
390.3, 
390.34, 
390.36, 
390.41, 
390.42, 
390.48, 
390.49, 
390.54, 
390.57, 
390.61, 
390.65, 
390.67, 
390.73, 
390.74, 
390.8, 
390.82, 
390.87, 
390.9, 
390.93, 
390.99, 
391.0, 
391.07, 
391.13, 
391.16, 
391.2, 
391.25, 
391.27, 
391.34, 
391.41, 
391.42, 
391.48, 
391.51, 
391.55, 
391.6, 
391.62, 
391.69, 
391.76, 
391.78, 
391.83, 
391.87, 
391.9, 
391.96, 
391.97, 
392.04, 
392.05, 
392.12, 
392.13, 
392.19, 
392.22, 
392.26, 
392.31, 
392.33, 
392.39, 
392.4, 
392.47, 
392.48, 
392.54, 
392.56, 
392.6, 
392.61, 
392.62, 
392.63, 
392.64, 
392.65, 
392.66, 
392.67, 
392.

In [16]:
# Padronizando para 2 casas decimais depois da vírgula

dfler.loc[:, "ciclo_ppm"] = dfler["ciclo_ppm"].map('{:.2f}'.format)

In [17]:
dfler.dtypes

ano                int64
mes                int64
dia                int64
ciclo_ppm         object
tendencia_ppm    float64
dtype: object

In [18]:
# Coluna ciclo_ppm

ciclo =  sorted(pd.unique(dfler['ciclo_ppm']))
for c in range(len(ciclo)):
  if c % 1 == 0:
        print()
  print(ciclo[c], end=', ')


389.16, 
389.17, 
389.18, 
389.19, 
389.20, 
389.22, 
389.24, 
389.26, 
389.28, 
389.31, 
389.34, 
389.36, 
389.37, 
389.40, 
389.41, 
389.43, 
389.45, 
389.46, 
389.49, 
389.50, 
389.53, 
389.54, 
389.58, 
389.62, 
389.63, 
389.67, 
389.68, 
389.71, 
389.74, 
389.76, 
389.80, 
389.81, 
389.86, 
389.91, 
389.92, 
389.96, 
389.98, 
390.01, 
390.05, 
390.07, 
390.12, 
390.13, 
390.18, 
390.19, 
390.24, 
390.26, 
390.30, 
390.34, 
390.36, 
390.41, 
390.42, 
390.48, 
390.49, 
390.54, 
390.57, 
390.61, 
390.65, 
390.67, 
390.73, 
390.74, 
390.80, 
390.82, 
390.87, 
390.90, 
390.93, 
390.99, 
391.00, 
391.07, 
391.13, 
391.16, 
391.20, 
391.25, 
391.27, 
391.34, 
391.41, 
391.42, 
391.48, 
391.51, 
391.55, 
391.60, 
391.62, 
391.69, 
391.76, 
391.78, 
391.83, 
391.87, 
391.90, 
391.96, 
391.97, 
392.04, 
392.05, 
392.12, 
392.13, 
392.19, 
392.22, 
392.26, 
392.31, 
392.33, 
392.39, 
392.40, 
392.47, 
392.48, 
392.54, 
392.56, 
392.60, 
392.61, 
392.62, 
392.63, 
392.64, 
392.65, 
392.66, 


In [19]:
# Convertendo de string para float

dfler['ciclo_ppm'] = dfler['ciclo_ppm'].astype(float)

In [20]:
# Verificando os tipos de colunas

dfler.dtypes

ano                int64
mes                int64
dia                int64
ciclo_ppm        float64
tendencia_ppm    float64
dtype: object

In [21]:
# Coluna tendencia_ppm

tendencia =  sorted(pd.unique(dfler['tendencia_ppm']))
for t in range(len(tendencia)):
  if t % 1 == 0:
        print()
  print(tendencia[t], end=', ')


391.59, 
391.6, 
391.61, 
391.62, 
391.63, 
391.64, 
391.65, 
391.66, 
391.67, 
391.68, 
391.69, 
391.7, 
391.71, 
391.72, 
391.73, 
391.74, 
391.75, 
391.76, 
391.77, 
391.78, 
391.79, 
391.8, 
391.81, 
391.82, 
391.83, 
391.84, 
391.85, 
391.86, 
391.87, 
391.88, 
391.89, 
391.9, 
391.91, 
391.92, 
391.93, 
391.94, 
391.95, 
391.96, 
391.97, 
391.98, 
391.99, 
392.0, 
392.01, 
392.02, 
392.03, 
392.04, 
392.05, 
392.06, 
392.07, 
392.08, 
392.09, 
392.1, 
392.11, 
392.12, 
392.13, 
392.14, 
392.15, 
392.16, 
392.17, 
392.18, 
392.19, 
392.2, 
392.21, 
392.22, 
392.23, 
392.24, 
392.25, 
392.26, 
392.27, 
392.28, 
392.29, 
392.3, 
392.31, 
392.32, 
392.33, 
392.34, 
392.35, 
392.36, 
392.37, 
392.38, 
392.39, 
392.4, 
392.41, 
392.42, 
392.43, 
392.44, 
392.45, 
392.46, 
392.47, 
392.48, 
392.49, 
392.5, 
392.51, 
392.52, 
392.53, 
392.54, 
392.55, 
392.56, 
392.57, 
392.58, 
392.59, 
392.6, 
392.61, 
392.62, 
392.63, 
392.64, 
392.65, 
392.66, 
392.67, 
392.68, 
392.69, 
392.7, 
392

In [22]:
# Padronizando para 2 casas decimais depois da vírgula

dfler.loc[:, "tendencia_ppm"] = dfler["tendencia_ppm"].map('{:.2f}'.format)

In [23]:
# Convertendo de string para tipo float

dfler['tendencia_ppm'] = dfler['tendencia_ppm'].astype(float)

In [24]:
# Verificando os tipos de colunas

dfler.dtypes

ano                int64
mes                int64
dia                int64
ciclo_ppm        float64
tendencia_ppm    float64
dtype: object

In [25]:
# Dataset que retorna informações da quantidade de CO2 anual em relação à tendência/previsão

dfanalise = dfler.groupby(['ano']) ['ciclo_ppm', 'tendencia_ppm'].sum().reset_index()

  This is separate from the ipykernel package so we can avoid doing imports until


In [26]:
# Visão geral do dataset gerado com groupBy

dfanalise

Unnamed: 0,ano,ciclo_ppm,tendencia_ppm
0,2012,136278.66,136307.09
1,2013,144382.93,144372.83
2,2014,145110.71,145126.78
3,2015,145925.79,145921.57
4,2016,147502.5,147503.69
5,2017,148004.95,148005.76
6,2018,148819.39,148813.04
7,2019,149753.26,149757.84
8,2020,151083.34,151069.28
9,2021,151469.14,151489.6


In [27]:
# Lista os tipos de colunas

dfanalise.dtypes

ano                int64
ciclo_ppm        float64
tendencia_ppm    float64
dtype: object