# Uvoz podatkov

## Text encoding: ASCII, Unicode, and others

In [1]:
with open('data/out2.txt', 'wb') as f:
    f.write(bytes([65,66,67,255,12,193]))

In [9]:
!cat data/out2.txt

ABCćčđ

In [10]:
with open('data/out2.txt', 'wb') as f:
    f.read()

UnsupportedOperation: read

In [11]:
with open('data/out2.txt', 'rb') as f:
    print(f.read())

b''


In [12]:
with open('data/out2.txt', errors='ignore') as f:
    print(f.read())




In [13]:
with open('data/out2.txt', errors='replace') as f:
    print(f.read())




In [8]:
with open('data/out2.txt', errors='backslashreplace') as f:
    print(f.read())




## Reading and Writing Data with pandas

In [14]:
import pandas as pd
import numpy as np

[IO tools (text, CSV, HDF5, …)](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html)

<table class="colwidths-given table">
<colgroup>
<col style="width: 12%">
<col style="width: 40%">
<col style="width: 24%">
<col style="width: 24%">
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Format Type</p></th>
<th class="head"><p>Data Description</p></th>
<th class="head"><p>Reader</p></th>
<th class="head"><p>Writer</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>text</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Comma-separated_values">CSV</a></p></td>
<td><p><a class="reference internal" href="#io-read-csv-table"><span class="std std-ref">read_csv</span></a></p></td>
<td><p><a class="reference internal" href="#io-store-in-csv"><span class="std std-ref">to_csv</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>text</p></td>
<td><p>Fixed-Width Text File</p></td>
<td><p><a class="reference internal" href="#io-fwf-reader"><span class="std std-ref">read_fwf</span></a></p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>text</p></td>
<td><p><a class="reference external" href="https://www.json.org/">JSON</a></p></td>
<td><p><a class="reference internal" href="#io-json-reader"><span class="std std-ref">read_json</span></a></p></td>
<td><p><a class="reference internal" href="#io-json-writer"><span class="std std-ref">to_json</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>text</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/HTML">HTML</a></p></td>
<td><p><a class="reference internal" href="#io-read-html"><span class="std std-ref">read_html</span></a></p></td>
<td><p><a class="reference internal" href="#io-html"><span class="std std-ref">to_html</span></a></p></td>
</tr>
<tr class="row-even"><td><p>text</p></td>
<td><p>Local clipboard</p></td>
<td><p><a class="reference internal" href="#io-clipboard"><span class="std std-ref">read_clipboard</span></a></p></td>
<td><p><a class="reference internal" href="#io-clipboard"><span class="std std-ref">to_clipboard</span></a></p></td>
</tr>
<tr class="row-odd"><td></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Microsoft_Excel">MS Excel</a></p></td>
<td><p><a class="reference internal" href="#io-excel-reader"><span class="std std-ref">read_excel</span></a></p></td>
<td><p><a class="reference internal" href="#io-excel-writer"><span class="std std-ref">to_excel</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="http://www.opendocumentformat.org">OpenDocument</a></p></td>
<td><p><a class="reference internal" href="#io-ods"><span class="std std-ref">read_excel</span></a></p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://support.hdfgroup.org/HDF5/whatishdf5.html">HDF5 Format</a></p></td>
<td><p><a class="reference internal" href="#io-hdf5"><span class="std std-ref">read_hdf</span></a></p></td>
<td><p><a class="reference internal" href="#io-hdf5"><span class="std std-ref">to_hdf</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://github.com/wesm/feather">Feather Format</a></p></td>
<td><p><a class="reference internal" href="#io-feather"><span class="std std-ref">read_feather</span></a></p></td>
<td><p><a class="reference internal" href="#io-feather"><span class="std std-ref">to_feather</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://parquet.apache.org/">Parquet Format</a></p></td>
<td><p><a class="reference internal" href="#io-parquet"><span class="std std-ref">read_parquet</span></a></p></td>
<td><p><a class="reference internal" href="#io-parquet"><span class="std std-ref">to_parquet</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="//https://orc.apache.org/">ORC Format</a></p></td>
<td><p><a class="reference internal" href="#io-orc"><span class="std std-ref">read_orc</span></a></p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://msgpack.org/index.html">Msgpack</a></p></td>
<td><p><a class="reference internal" href="#io-msgpack"><span class="std std-ref">read_msgpack</span></a></p></td>
<td><p><a class="reference internal" href="#io-msgpack"><span class="std std-ref">to_msgpack</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Stata">Stata</a></p></td>
<td><p><a class="reference internal" href="#io-stata-reader"><span class="std std-ref">read_stata</span></a></p></td>
<td><p><a class="reference internal" href="#io-stata-writer"><span class="std std-ref">to_stata</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/SAS_(software)">SAS</a></p></td>
<td><p><a class="reference internal" href="#io-sas-reader"><span class="std std-ref">read_sas</span></a></p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/SPSS">SPSS</a></p></td>
<td><p><a class="reference internal" href="#io-spss-reader"><span class="std std-ref">read_spss</span></a></p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://docs.python.org/3/library/pickle.html">Python Pickle Format</a></p></td>
<td><p><a class="reference internal" href="#io-pickle"><span class="std std-ref">read_pickle</span></a></p></td>
<td><p><a class="reference internal" href="#io-pickle"><span class="std std-ref">to_pickle</span></a></p></td>
</tr>
<tr class="row-even"><td><p>SQL</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/SQL">SQL</a></p></td>
<td><p><a class="reference internal" href="#io-sql"><span class="std std-ref">read_sql</span></a></p></td>
<td><p><a class="reference internal" href="#io-sql"><span class="std std-ref">to_sql</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>SQL</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/BigQuery">Google BigQuery</a></p></td>
<td><p><a class="reference internal" href="#io-bigquery"><span class="std std-ref">read_gbq</span></a></p></td>
<td><p><a class="reference internal" href="#io-bigquery"><span class="std std-ref">to_gbq</span></a></p></td>
</tr>
</tbody>
</table>

### CSV files

#### Primer 1: seaslug.txt

In [15]:
!head -n 5 data/seaslug.txt

Time	Percent
99	0.067
99	0.133
99	0.067
99	0


In [18]:
pd.read_csv('data/seaslug.txt', sep='\t').head()

Unnamed: 0,Time,Percent
0,99,0.067
1,99,0.133
2,99,0.067
3,99,0.0
4,99,0.0


#### Primer 2: FOOD_DES.txt

Encoding: 'iso-8859-1'

In [19]:
!head -n 5 data/FOOD_DES.txt

~01001~^~0100~^~Butter, salted~^~BUTTER,WITH SALT~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01002~^~0100~^~Butter, whipped, with salt~^~BUTTER,WHIPPED,W/ SALT~^~~^~~^~Y~^~~^0^~~^6.38^^^
~01003~^~0100~^~Butter oil, anhydrous~^~BUTTER OIL,ANHYDROUS~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01004~^~0100~^~Cheese, blue~^~CHEESE,BLUE~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01005~^~0100~^~Cheese, brick~^~CHEESE,BRICK~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87


In [23]:
pd.read_csv('data/FOOD_DES.txt', encoding='iso-8859-1', sep='^', nrows=10, header=None, quotechar='~')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1001,100,"Butter, salted","BUTTER,WITH SALT",,,Y,,0,,6.38,4.27,8.79,3.87
1,1002,100,"Butter, whipped, with salt","BUTTER,WHIPPED,W/ SALT",,,Y,,0,,6.38,,,
2,1003,100,"Butter oil, anhydrous","BUTTER OIL,ANHYDROUS",,,Y,,0,,6.38,4.27,8.79,3.87
3,1004,100,"Cheese, blue","CHEESE,BLUE",,,Y,,0,,6.38,4.27,8.79,3.87
4,1005,100,"Cheese, brick","CHEESE,BRICK",,,Y,,0,,6.38,4.27,8.79,3.87
5,1006,100,"Cheese, brie","CHEESE,BRIE",,,Y,,0,,6.38,4.27,8.79,3.87
6,1007,100,"Cheese, camembert","CHEESE,CAMEMBERT",,,Y,,0,,6.38,4.27,8.79,3.87
7,1008,100,"Cheese, caraway","CHEESE,CARAWAY",,,,,0,,6.38,4.27,8.79,3.87
8,1009,100,"Cheese, cheddar","CHEESE,CHEDDAR",,,Y,,0,,,,,
9,1010,100,"Cheese, cheshire","CHEESE,CHESHIRE",,,,,0,,6.38,4.27,8.79,3.87


#### Primer 3: MplsStops.csv

In [24]:
!head -n 3 ./data/mpls_stops.csv

Unnamed: 0,id Num,date,problem,MDC,citation Issued,person Search,vehicle Search,pre Race,race,gender,lat,long,police Precinct,neighborhood
,idNum,date,problem,MDC,citationIssued,personSearch,vehicleSearch,preRace,race,gender,lat,long,policePrecinct,neighborhood
6823.0,17-000003,2017-01-01 00:00:42,suspicious,MDC,,NO,NO,Unknown,Unknown,Unknown,44.96661711,-93.24645826,1,Cedar Riverside


In [25]:
mpls = pd.read_csv('data/mpls_stops.csv', nrows=3)
mpls

Unnamed: 0.1,Unnamed: 0,id Num,date,problem,MDC,citation Issued,person Search,vehicle Search,pre Race,race,gender,lat,long,police Precinct,neighborhood
0,,idNum,date,problem,MDC,citationIssued,personSearch,vehicleSearch,preRace,race,gender,lat,long,policePrecinct,neighborhood
1,6823.0,17-000003,2017-01-01 00:00:42,suspicious,MDC,,NO,NO,Unknown,Unknown,Unknown,44.96661711,-93.24645826,1,Cedar Riverside
2,6824.0,17-000007,2017-01-01 00:03:07,suspicious,MDC,,NO,NO,Unknown,Unknown,Male,44.98045,-93.27134,1,Downtown West


In [26]:
mpls.columns

Index(['Unnamed: 0', 'id Num', 'date', 'problem', 'MDC', 'citation Issued',
       'person Search', 'vehicle Search', 'pre Race', 'race', 'gender', 'lat',
       'long', 'police Precinct', 'neighborhood'],
      dtype='object')

In [27]:
new_columns_names = ['Unnamed: 0', 'id Num', 'date', 'problem', 'MDC', 'citation Issued',
       'person Search', 'vehicle Search', 'pre Race', 'race', 'gender', 'lat',
       'long', 'police Precinct', 'neighborhood']

In [29]:
new_columns_names = [name.lower().replace(' ', '_') for name in new_columns_names]
new_columns_names[0] = 'case_number_id'
print(new_columns_names)

['case_number_id', 'id_num', 'date', 'problem', 'mdc', 'citation_issued', 'person_search', 'vehicle_search', 'pre_race', 'race', 'gender', 'lat', 'long', 'police_precinct', 'neighborhood']


In [46]:
mpls = pd.read_csv('data/mpls_stops.csv', 
                  nrows=6,
                  names=new_columns_names,
                  skiprows=2,
                  engine='c',
                  true_values=['YES'],
                  false_values=['NO'],
                  dtype={'mdc':'category', 'problem':'category', 'citation_issued':'float',
                         'person_search':'float', 'pre_race':'category'},
                  index_col='case_number_id',
                  na_values=['Unknown'],
                  parse_dates=['date'])
mpls

Unnamed: 0_level_0,id_num,date,problem,mdc,citation_issued,person_search,vehicle_search,pre_race,race,gender,lat,long,police_precinct,neighborhood
case_number_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
6823.0,17-000003,2017-01-01 00:00:42,suspicious,MDC,,0.0,False,,,,44.966617,-93.246458,1,Cedar Riverside
6824.0,17-000007,2017-01-01 00:03:07,suspicious,MDC,,0.0,False,,,Male,44.98045,-93.27134,1,Downtown West
6825.0,17-000073,2017-01-01 00:23:15,traffic,MDC,,0.0,False,,White,Female,44.94835,-93.27538,5,Whittier
6826.0,17-000092,2017-01-01 00:33:48,suspicious,MDC,,0.0,False,,East African,Male,44.94836,-93.28135,5,Whittier
6827.0,17-000098,2017-01-01 00:37:58,traffic,MDC,,0.0,False,,White,Female,44.979078,-93.262076,1,Downtown West
6828.0,17-000111,2017-01-01 00:46:48,traffic,MDC,,0.0,False,,East African,Male,44.980535,-93.263627,1,Downtown West


In [37]:
%timeit mpls = pd.read_csv('data/mpls_stops.csv', names=new_columns_names, skiprows=2, engine='python')

1.21 s ± 196 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [38]:
%timeit mpls = pd.read_csv('data/mpls_stops.csv', names=new_columns_names, skiprows=2, engine='c')

289 ms ± 100 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [45]:
mpls['vehicle_search'].unique()

array([False])

#### Primer 4: iperf.txt

In [48]:
!head -n 20 data/iperf.txt

Wed Aug 15 19:35:11 CEST 2018
Connecting to host x.x.x.x, port 5201
[  4] local x.x.x.x port 48944 connected to x.x.x.x port 5201
[ ID] Interval           Transfer     Bandwidth       Retr  Cwnd
[  4]   0.00-1.00   sec   375 MBytes  3.14 Gbits/sec  273    471 KBytes
[  4]   1.00-2.00   sec   428 MBytes  3.59 Gbits/sec  145    376 KBytes
[  4]   2.00-3.00   sec   360 MBytes  3.02 Gbits/sec  148    454 KBytes
[  4]   3.00-4.00   sec   339 MBytes  2.84 Gbits/sec   83    407 KBytes
[  4]   4.00-5.00   sec   305 MBytes  2.56 Gbits/sec  104    414 KBytes
[  4]   5.00-6.00   sec   301 MBytes  2.53 Gbits/sec  186    440 KBytes
[  4]   6.00-7.00   sec   325 MBytes  2.73 Gbits/sec  174    485 KBytes
[  4]   7.00-8.00   sec   434 MBytes  3.64 Gbits/sec   81    677 KBytes
[  4]   8.00-9.00   sec   412 MBytes  3.46 Gbits/sec  226    537 KBytes
[  4]   9.00-10.00  sec   409 MBytes  3.43 Gbits/sec   47    372 KBytes
[  4]   10.00-11.00  sec   523 MBytes  3.81 Gbits/sec   96    422 KByte

In [51]:
pd.read_csv('data/iperf.txt', sep='s+', skiprows=4)

  """Entry point for launching an IPython kernel.


Unnamed: 0,[ 4] 0.00-1.00,ec 375 MByte,3.14 Gbit,/,ec 273 471 KByte,Unnamed: 5
0,[ 4] 1.00-2.00,ec 428 MByte,3.59 Gbit,/,ec 145 376 KByte,
1,[ 4] 2.00-3.00,ec 360 MByte,3.02 Gbit,/,ec 148 454 KByte,
2,[ 4] 3.00-4.00,ec 339 MByte,2.84 Gbit,/,ec 83 407 KByte,
3,[ 4] 4.00-5.00,ec 305 MByte,2.56 Gbit,/,ec 104 414 KByte,
4,[ 4] 5.00-6.00,ec 301 MByte,2.53 Gbit,/,ec 186 440 KByte,
5,[ 4] 6.00-7.00,ec 325 MByte,2.73 Gbit,/,ec 174 485 KByte,
6,[ 4] 7.00-8.00,ec 434 MByte,3.64 Gbit,/,ec 81 677 KByte,
7,[ 4] 8.00-9.00,ec 412 MByte,3.46 Gbit,/,ec 226 537 KByte,
8,[ 4] 9.00-10.00,ec 409 MByte,3.43 Gbit,/,ec 47 372 KByte,
9,[ 4] 10.00-11.00,ec 523 MByte,3.81 Gbit,/,ec 96 422 KByte,


In [52]:
with open('data/iperf.txt', 'r') as f:
    data = f.readlines()
    data = [line.strip() for line in data]
    print(data[:8])

['Wed Aug 15 19:35:11 CEST 2018', 'Connecting to host x.x.x.x, port 5201', '[  4] local x.x.x.x port 48944 connected to x.x.x.x port 5201', '[ ID] Interval           Transfer     Bandwidth       Retr  Cwnd', '[  4]   0.00-1.00   sec   375 MBytes  3.14 Gbits/sec  273    471 KBytes', '[  4]   1.00-2.00   sec   428 MBytes  3.59 Gbits/sec  145    376 KBytes', '[  4]   2.00-3.00   sec   360 MBytes  3.02 Gbits/sec  148    454 KBytes', '[  4]   3.00-4.00   sec   339 MBytes  2.84 Gbits/sec   83    407 KBytes']


In [53]:
print(data)

['Wed Aug 15 19:35:11 CEST 2018', 'Connecting to host x.x.x.x, port 5201', '[  4] local x.x.x.x port 48944 connected to x.x.x.x port 5201', '[ ID] Interval           Transfer     Bandwidth       Retr  Cwnd', '[  4]   0.00-1.00   sec   375 MBytes  3.14 Gbits/sec  273    471 KBytes', '[  4]   1.00-2.00   sec   428 MBytes  3.59 Gbits/sec  145    376 KBytes', '[  4]   2.00-3.00   sec   360 MBytes  3.02 Gbits/sec  148    454 KBytes', '[  4]   3.00-4.00   sec   339 MBytes  2.84 Gbits/sec   83    407 KBytes', '[  4]   4.00-5.00   sec   305 MBytes  2.56 Gbits/sec  104    414 KBytes', '[  4]   5.00-6.00   sec   301 MBytes  2.53 Gbits/sec  186    440 KBytes', '[  4]   6.00-7.00   sec   325 MBytes  2.73 Gbits/sec  174    485 KBytes', '[  4]   7.00-8.00   sec   434 MBytes  3.64 Gbits/sec   81    677 KBytes', '[  4]   8.00-9.00   sec   412 MBytes  3.46 Gbits/sec  226    537 KBytes', '[  4]   9.00-10.00  sec   409 MBytes  3.43 Gbits/sec   47    372 KBytes', '[  4]   10.00-11.00  sec   523 MBytes  3.

In [83]:
import datetime

start_time = datetime.datetime.strptime(data[0], '%a %b %d %H:%M:%S CEST %Y')
print(start_time, type(start_time))

2018-08-15 19:35:11 <class 'datetime.datetime'>


In [87]:
rows = []

for line in data[4:]:
    line_splited = line.split()
    add_seconds = int(line_splited[2].split('.')[0])
    timestamp = start_time + datetime.timedelta(seconds=add_seconds)
    transfer_mbytesec = int(line_splited[4])
    bandwidth_gbitsec = float(line_splited[6])
    retr = int(line_splited[8])
    cwnd_kbytes = int(line_splited[9]
    rows.append((timestamp, transfer_mbytesec, bandwidth_gbitsec, retr, cwnd_kbytes))

print(rows)

SyntaxError: invalid syntax (<ipython-input-87-531f138bc604>, line 11)

In [70]:
import csv

In [77]:
headers = ['timestamp', 'transfer_mbytesec', 'bandwidth_gbitsec', 'retr', 'cwnd_kbytes']

with open('data/iperf_clean.csv', 'w') as f:
    f_csv = csv.writer(f)
    f_csv.writerow(headers)
    f_csv.writerows(rows)

Error: iterable expected, not datetime.datetime

In [72]:
!cat data/iperf_clean.csv

timestamp,transfer_mbytesec,bandwidth_gbitsec,retr,cwnd_kbytes
2018-08-15 19:35:11,2018-08-15 19:35:12,2018-08-15 19:35:13,2018-08-15 19:35:14,2018-08-15 19:35:15,2018-08-15 19:35:16,2018-08-15 19:35:17,2018-08-15 19:35:18,2018-08-15 19:35:19,2018-08-15 19:35:20,2018-08-15 19:35:21


In [75]:
!head -n 2 data/iperf_clean.csv

timestamp,transfer_mbytesec,bandwidth_gbitsec,retr,cwnd_kbytes
2018-08-15 19:35:11,2018-08-15 19:35:12,2018-08-15 19:35:13,2018-08-15 19:35:14,2018-08-15 19:35:15,2018-08-15 19:35:16,2018-08-15 19:35:17,2018-08-15 19:35:18,2018-08-15 19:35:19,2018-08-15 19:35:20,2018-08-15 19:35:21


In [76]:
iperf_data = pd.read_csv('data/iperf_clean.csv')
iperf_data

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,timestamp,transfer_mbytesec,bandwidth_gbitsec,retr,cwnd_kbytes
2018-08-15 19:35:11,2018-08-15 19:35:12,2018-08-15 19:35:13,2018-08-15 19:35:14,2018-08-15 19:35:15,2018-08-15 19:35:16,2018-08-15 19:35:17,2018-08-15 19:35:18,2018-08-15 19:35:19,2018-08-15 19:35:20,2018-08-15 19:35:21


### Reading JSON files

#### Orient options

In [62]:
dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)), columns=list('ABC'), index=list('xyz'))

In [63]:
dfjo

Unnamed: 0,A,B,C
x,1,4,7
y,2,5,8
z,3,6,9


<table class="colwidths-given table">
<colgroup>
<col style="width: 12%">
<col style="width: 88%">
</colgroup>
<tbody>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">split</span></code></p></td>
<td><p>dict like {index -&gt; [index], columns -&gt; [columns], data -&gt; [values]}</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">records</span></code></p></td>
<td><p>list like [{column -&gt; value}, … , {column -&gt; value}]</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">index</span></code></p></td>
<td><p>dict like {index -&gt; {column -&gt; value}}</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">columns</span></code></p></td>
<td><p>dict like {column -&gt; {index -&gt; value}}</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">values</span></code></p></td>
<td><p>just the values array</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">table</span></code></p></td>
<td><p>adhering to the JSON <a class="reference external" href="https://specs.frictionlessdata.io/json-table-schema/">Table Schema</a></p></td>
</tr>
</tbody>
</table>

In [64]:
dfjo.to_json(orient="columns")

'{"A":{"x":1,"y":2,"z":3},"B":{"x":4,"y":5,"z":6},"C":{"x":7,"y":8,"z":9}}'

In [65]:
dfjo.to_json(orient="index")

'{"x":{"A":1,"B":4,"C":7},"y":{"A":2,"B":5,"C":8},"z":{"A":3,"B":6,"C":9}}'

In [66]:
dfjo.to_json(orient="records")

'[{"A":1,"B":4,"C":7},{"A":2,"B":5,"C":8},{"A":3,"B":6,"C":9}]'

In [67]:
dfjo.to_json(orient="values")

'[[1,4,7],[2,5,8],[3,6,9]]'

In [68]:
dfjo.to_json(orient="split")

'{"columns":["A","B","C"],"index":["x","y","z"],"data":[[1,4,7],[2,5,8],[3,6,9]]}'

In [69]:
dfjo.to_json(orient="table")

'{"schema": {"fields":[{"name":"index","type":"string"},{"name":"A","type":"integer"},{"name":"B","type":"integer"},{"name":"C","type":"integer"}],"primaryKey":["index"],"pandas_version":"0.20.0"}, "data": [{"index":"x","A":1,"B":4,"C":7},{"index":"y","A":2,"B":5,"C":8},{"index":"z","A":3,"B":6,"C":9}]}'

#### Primer: ocenas.json

#### Primer: temperatures.json

#### Primer: cities.json

#### Primer: transactions.json

#### Primer: all_hour_geo.json

#### Primer: rates.json

### Python Pickle Format

In [14]:
# pripravimo datoteko za pisnaje v pickle format
titanic = pd.read_csv('data/titanic_sub.csv', 
                     index_col='PassengerId',
                     usecols=['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'Fare', 'Cabin', 'Embarked'])

### Excel files