# read_json

* [pandas.read_json](https://pandas.pydata.org/docs/reference/api/pandas.read_json.html)

In [1]:
import numpy as np
import pandas as pd

## Specify data types

In [3]:
df = pd.read_json(
    "../data/recovery.json",
    dtype={
        #"facility": pd.CategoricalDtype, # does not work (https://stackoverflow.com/questions/75089569)
        "facility": 'category',
        "supplier": 'category',
        "supplierCode": 'category',
        "suppliedM3": np.float32,
        "recoveredM3": np.float32,
    },
    convert_dates=['date']
)
df

Unnamed: 0,facility,timeStart,processTime,supplier,suppliedM3,recoveredM3,date,timeEnd,supplierCode
0,Bundaberg,9/1/22 8:16 AM,4:05,Mary,5.09,4.13,NaT,,
1,Newcastle,8:29:00 AM,,,2.00,1.55,2022-09-01,9:07:00 AM,har
2,Newcastle,9:27:00 AM,,,6.80,4.15,2022-09-01,11:28:00 AM,dic
3,Newcastle,11:38:00 AM,,,1.95,1.55,2022-09-01,12:21:00 PM,har
4,Bundaberg,9/1/22 12:34 PM,1:50,Mary Therese,3.78,2.56,NaT,,
...,...,...,...,...,...,...,...,...,...
227,Newcastle,11:40:00 AM,,,3.70,2.35,2022-09-30,12:41:00 PM,tom
228,Newcastle,12:52:00 PM,,,6.35,4.55,2022-09-30,2:36:00 PM,dic
229,Bundaberg,9/30/22 1:48 PM,3:40,Mary Therese,4.53,2.73,NaT,,
230,Newcastle,3:02:00 PM,,,2.00,1.45,2022-09-30,3:42:00 PM,har


# Date/Time 

## Auto detection

* convert_dates  
> bool or list of str, default True  
>  
> If True then default datelike columns may be converted (depending on keep_default_dates). If False, no dates will be converted. If a list of column names, then those columns will be converted and default datelike columns may also be converted (depending on keep_default_dates).

In [6]:
df = pd.read_json(
    "../data/recovery.json",
    dtype={
        #"facility": pd.CategoricalDtype, # does not work (https://stackoverflow.com/questions/75089569)
        "facility": 'category',
        "supplier": 'category',
        "supplierCode": 'category',
        "suppliedM3": np.float32,
        "recoveredM3": np.float32,
    },
    convert_dates=True
)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 232 entries, 0 to 231
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   facility      232 non-null    category      
 1   timeStart     232 non-null    object        
 2   processTime   111 non-null    object        
 3   supplier      111 non-null    category      
 4   suppliedM3    232 non-null    float32       
 5   recoveredM3   232 non-null    float32       
 6   date          121 non-null    datetime64[ns]
 7   timeEnd       121 non-null    object        
 8   supplierCode  121 non-null    category      
dtypes: category(3), datetime64[ns](1), float32(2), object(3)
memory usage: 10.3+ KB


In [7]:
df

Unnamed: 0,facility,timeStart,processTime,supplier,suppliedM3,recoveredM3,date,timeEnd,supplierCode
0,Bundaberg,9/1/22 8:16 AM,4:05,Mary,5.09,4.13,NaT,,
1,Newcastle,8:29:00 AM,,,2.00,1.55,2022-09-01,9:07:00 AM,har
2,Newcastle,9:27:00 AM,,,6.80,4.15,2022-09-01,11:28:00 AM,dic
3,Newcastle,11:38:00 AM,,,1.95,1.55,2022-09-01,12:21:00 PM,har
4,Bundaberg,9/1/22 12:34 PM,1:50,Mary Therese,3.78,2.56,NaT,,
...,...,...,...,...,...,...,...,...,...
227,Newcastle,11:40:00 AM,,,3.70,2.35,2022-09-30,12:41:00 PM,tom
228,Newcastle,12:52:00 PM,,,6.35,4.55,2022-09-30,2:36:00 PM,dic
229,Bundaberg,9/30/22 1:48 PM,3:40,Mary Therese,4.53,2.73,NaT,,
230,Newcastle,3:02:00 PM,,,2.00,1.45,2022-09-30,3:42:00 PM,har


## Specify date/time column

In [9]:
df = pd.read_json(
    "../data/recovery.json",
    dtype={
        "facility": 'category',
        "supplier": 'category',
        "supplierCode": 'category',
        "suppliedM3": np.float32,
        "recoveredM3": np.float32,
    },
    convert_dates=['date']
)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 232 entries, 0 to 231
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   facility      232 non-null    category      
 1   timeStart     232 non-null    object        
 2   processTime   111 non-null    object        
 3   supplier      111 non-null    category      
 4   suppliedM3    232 non-null    float32       
 5   recoveredM3   232 non-null    float32       
 6   date          121 non-null    datetime64[ns]
 7   timeEnd       121 non-null    object        
 8   supplierCode  121 non-null    category      
dtypes: category(3), datetime64[ns](1), float32(2), object(3)
memory usage: 10.3+ KB
