# Uvoz podatkov

## Text encoding: ASCII, Unicode, and others

In [8]:
with open('data/out2.txt', 'wb') as f: # write binary
    f.write(bytes([65,66,67,255,192,193]))

In [10]:
with open('data/out2.txt', 'rb') as f: # read binary
    print(f.read())

b'ABC\xff\xc0\xc1'


In [4]:
# ignorirajmo tiste dele, ki jih ne zmore decodirat
with open('data/out2.txt', errors="ignore") as f:
    print(f.read())


ABC


In [5]:
with open('data/out2.txt', errors="replace") as f:
    print(f.read())

ABC���


In [6]:
with open('data/out2.txt', errors="backslashreplace") as f:
    print(f.read())

ABC\xff\xc0\xc1


## Reading and Writing Data with pandas

In [11]:
import pandas as pd
import numpy as np

[IO tools (text, CSV, HDF5, …)](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html)

<table class="colwidths-given table">
<colgroup>
<col style="width: 12%">
<col style="width: 40%">
<col style="width: 24%">
<col style="width: 24%">
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Format Type</p></th>
<th class="head"><p>Data Description</p></th>
<th class="head"><p>Reader</p></th>
<th class="head"><p>Writer</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>text</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Comma-separated_values">CSV</a></p></td>
<td><p><a class="reference internal" href="#io-read-csv-table"><span class="std std-ref">read_csv</span></a></p></td>
<td><p><a class="reference internal" href="#io-store-in-csv"><span class="std std-ref">to_csv</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>text</p></td>
<td><p>Fixed-Width Text File</p></td>
<td><p><a class="reference internal" href="#io-fwf-reader"><span class="std std-ref">read_fwf</span></a></p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>text</p></td>
<td><p><a class="reference external" href="https://www.json.org/">JSON</a></p></td>
<td><p><a class="reference internal" href="#io-json-reader"><span class="std std-ref">read_json</span></a></p></td>
<td><p><a class="reference internal" href="#io-json-writer"><span class="std std-ref">to_json</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>text</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/HTML">HTML</a></p></td>
<td><p><a class="reference internal" href="#io-read-html"><span class="std std-ref">read_html</span></a></p></td>
<td><p><a class="reference internal" href="#io-html"><span class="std std-ref">to_html</span></a></p></td>
</tr>
<tr class="row-even"><td><p>text</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/LaTeX">LaTeX</a></p></td>
<td></td>
<td><p><a class="reference internal" href="#io-latex"><span class="std std-ref">Styler.to_latex</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>text</p></td>
<td><p><a class="reference external" href="https://www.w3.org/standards/xml/core">XML</a></p></td>
<td><p><a class="reference internal" href="#io-read-xml"><span class="std std-ref">read_xml</span></a></p></td>
<td><p><a class="reference internal" href="#io-xml"><span class="std std-ref">to_xml</span></a></p></td>
</tr>
<tr class="row-even"><td><p>text</p></td>
<td><p>Local clipboard</p></td>
<td><p><a class="reference internal" href="#io-clipboard"><span class="std std-ref">read_clipboard</span></a></p></td>
<td><p><a class="reference internal" href="#io-clipboard"><span class="std std-ref">to_clipboard</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Microsoft_Excel">MS Excel</a></p></td>
<td><p><a class="reference internal" href="#io-excel-reader"><span class="std std-ref">read_excel</span></a></p></td>
<td><p><a class="reference internal" href="#io-excel-writer"><span class="std std-ref">to_excel</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="http://www.opendocumentformat.org">OpenDocument</a></p></td>
<td><p><a class="reference internal" href="#io-ods"><span class="std std-ref">read_excel</span></a></p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://support.hdfgroup.org/HDF5/whatishdf5.html">HDF5 Format</a></p></td>
<td><p><a class="reference internal" href="#io-hdf5"><span class="std std-ref">read_hdf</span></a></p></td>
<td><p><a class="reference internal" href="#io-hdf5"><span class="std std-ref">to_hdf</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://github.com/wesm/feather">Feather Format</a></p></td>
<td><p><a class="reference internal" href="#io-feather"><span class="std std-ref">read_feather</span></a></p></td>
<td><p><a class="reference internal" href="#io-feather"><span class="std std-ref">to_feather</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://parquet.apache.org/">Parquet Format</a></p></td>
<td><p><a class="reference internal" href="#io-parquet"><span class="std std-ref">read_parquet</span></a></p></td>
<td><p><a class="reference internal" href="#io-parquet"><span class="std std-ref">to_parquet</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://orc.apache.org/">ORC Format</a></p></td>
<td><p><a class="reference internal" href="#io-orc"><span class="std std-ref">read_orc</span></a></p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/Stata">Stata</a></p></td>
<td><p><a class="reference internal" href="#io-stata-reader"><span class="std std-ref">read_stata</span></a></p></td>
<td><p><a class="reference internal" href="#io-stata-writer"><span class="std std-ref">to_stata</span></a></p></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/SAS_(software)">SAS</a></p></td>
<td><p><a class="reference internal" href="#io-sas-reader"><span class="std std-ref">read_sas</span></a></p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/SPSS">SPSS</a></p></td>
<td><p><a class="reference internal" href="#io-spss-reader"><span class="std std-ref">read_spss</span></a></p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>binary</p></td>
<td><p><a class="reference external" href="https://docs.python.org/3/library/pickle.html">Python Pickle Format</a></p></td>
<td><p><a class="reference internal" href="#io-pickle"><span class="std std-ref">read_pickle</span></a></p></td>
<td><p><a class="reference internal" href="#io-pickle"><span class="std std-ref">to_pickle</span></a></p></td>
</tr>
<tr class="row-odd"><td><p>SQL</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/SQL">SQL</a></p></td>
<td><p><a class="reference internal" href="#io-sql"><span class="std std-ref">read_sql</span></a></p></td>
<td><p><a class="reference internal" href="#io-sql"><span class="std std-ref">to_sql</span></a></p></td>
</tr>
<tr class="row-even"><td><p>SQL</p></td>
<td><p><a class="reference external" href="https://en.wikipedia.org/wiki/BigQuery">Google BigQuery</a></p></td>
<td><p><a class="reference internal" href="#io-bigquery"><span class="std std-ref">read_gbq</span></a></p></td>
<td><p><a class="reference internal" href="#io-bigquery"><span class="std std-ref">to_gbq</span></a></p></td>
</tr>
</tbody>
</table>

### CSV files

#### Primer 1: seaslug.txt

In [15]:
pd.read_csv("data/seaslug.txt",sep="\t")
# tab separator encoded as \t -> ce tega ne podamo, ne prebere pravilno
# alias za argument "sep" je tudi "delimiter"

Unnamed: 0,Time,Percent
0,99,0.067
1,99,0.133
2,99,0.067
3,99,0.0
4,99,0.0
5,0,0.5
6,0,0.467
7,0,0.857
8,0,0.5
9,0,0.357


#### Primer 2: FOOD_DES.txt

In [None]:
pd.read_csv("data/FOOD_DES.txt", encoding="iso-8859-1").head() # tezava - v naslednjem koaku dekodiraj samo nekaj vrstic

In [19]:
pd.read_csv("data/FOOD_DES.txt", encoding="iso-8859-1",nrows=10).head() # izpisemo samo nekaj vrstic. Ugotovimo da je morda tezava v separatorju

Unnamed: 0,Unnamed: 1,~01001~^~0100~^~Butter,salted~^~BUTTER,WITH SALT~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01002~^~0100~^~Butter,whipped,with salt~^~BUTTER,WHIPPED,W/ SALT~^~~^~~^~Y~^~~^0^~~^6.38^^^
~01003~^~0100~^~Butter oil,anhydrous~^~BUTTER OIL,ANHYDROUS~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87,,
~01004~^~0100~^~Cheese,blue~^~CHEESE,BLUE~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87,,
~01005~^~0100~^~Cheese,brick~^~CHEESE,BRICK~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87,,
~01006~^~0100~^~Cheese,brie~^~CHEESE,BRIE~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87,,


In [21]:
pd.read_csv("data/FOOD_DES.txt", encoding="iso-8859-1",sep="^").head(10) # vidimo, da je uporabljen nestandarden qoutechar

Unnamed: 0,~01001~,~0100~,"~Butter, salted~","~BUTTER,WITH SALT~",~~,~~.1,~Y~,~~.2,0,~~.3,6.38,4.27,8.79,3.87
0,~01002~,~0100~,"~Butter, whipped, with salt~","~BUTTER,WHIPPED,W/ SALT~",~~,~~,~Y~,~~,0.0,~~,6.38,,,
1,~01003~,~0100~,"~Butter oil, anhydrous~","~BUTTER OIL,ANHYDROUS~",~~,~~,~Y~,~~,0.0,~~,6.38,4.27,8.79,3.87
2,~01004~,~0100~,"~Cheese, blue~","~CHEESE,BLUE~",~~,~~,~Y~,~~,0.0,~~,6.38,4.27,8.79,3.87
3,~01005~,~0100~,"~Cheese, brick~","~CHEESE,BRICK~",~~,~~,~Y~,~~,0.0,~~,6.38,4.27,8.79,3.87
4,~01006~,~0100~,"~Cheese, brie~","~CHEESE,BRIE~",~~,~~,~Y~,~~,0.0,~~,6.38,4.27,8.79,3.87
5,~01007~,~0100~,"~Cheese, camembert~","~CHEESE,CAMEMBERT~",~~,~~,~Y~,~~,0.0,~~,6.38,4.27,8.79,3.87
6,~01008~,~0100~,"~Cheese, caraway~","~CHEESE,CARAWAY~",~~,~~,~~,~~,0.0,~~,6.38,4.27,8.79,3.87
7,~01009~,~0100~,"~Cheese, cheddar~","~CHEESE,CHEDDAR~",~~,~~,~Y~,~~,0.0,~~,,,,
8,~01010~,~0100~,"~Cheese, cheshire~","~CHEESE,CHESHIRE~",~~,~~,~~,~~,0.0,~~,6.38,4.27,8.79,3.87
9,~01011~,~0100~,"~Cheese, colby~","~CHEESE,COLBY~",~~,~~,~Y~,~~,0.0,~~,6.38,4.27,8.79,3.87


In [22]:
pd.read_csv("data/FOOD_DES.txt", encoding="iso-8859-1",sep="^", quotechar="~").head(10) 

Unnamed: 0,01001,0100,"Butter, salted","BUTTER,WITH SALT",Unnamed: 4,Unnamed: 5,Y,Unnamed: 7,0,Unnamed: 9,6.38,4.27,8.79,3.87
0,1002,100,"Butter, whipped, with salt","BUTTER,WHIPPED,W/ SALT",,,Y,,0.0,,6.38,,,
1,1003,100,"Butter oil, anhydrous","BUTTER OIL,ANHYDROUS",,,Y,,0.0,,6.38,4.27,8.79,3.87
2,1004,100,"Cheese, blue","CHEESE,BLUE",,,Y,,0.0,,6.38,4.27,8.79,3.87
3,1005,100,"Cheese, brick","CHEESE,BRICK",,,Y,,0.0,,6.38,4.27,8.79,3.87
4,1006,100,"Cheese, brie","CHEESE,BRIE",,,Y,,0.0,,6.38,4.27,8.79,3.87
5,1007,100,"Cheese, camembert","CHEESE,CAMEMBERT",,,Y,,0.0,,6.38,4.27,8.79,3.87
6,1008,100,"Cheese, caraway","CHEESE,CARAWAY",,,,,0.0,,6.38,4.27,8.79,3.87
7,1009,100,"Cheese, cheddar","CHEESE,CHEDDAR",,,Y,,0.0,,,,,
8,1010,100,"Cheese, cheshire","CHEESE,CHESHIRE",,,,,0.0,,6.38,4.27,8.79,3.87
9,1011,100,"Cheese, colby","CHEESE,COLBY",,,Y,,0.0,,6.38,4.27,8.79,3.87


In [23]:
pd.read_csv("data/FOOD_DES.txt", encoding="iso-8859-1",sep="^", quotechar="~", header=None).head(10) 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1001,100,"Butter, salted","BUTTER,WITH SALT",,,Y,,0.0,,6.38,4.27,8.79,3.87
1,1002,100,"Butter, whipped, with salt","BUTTER,WHIPPED,W/ SALT",,,Y,,0.0,,6.38,,,
2,1003,100,"Butter oil, anhydrous","BUTTER OIL,ANHYDROUS",,,Y,,0.0,,6.38,4.27,8.79,3.87
3,1004,100,"Cheese, blue","CHEESE,BLUE",,,Y,,0.0,,6.38,4.27,8.79,3.87
4,1005,100,"Cheese, brick","CHEESE,BRICK",,,Y,,0.0,,6.38,4.27,8.79,3.87
5,1006,100,"Cheese, brie","CHEESE,BRIE",,,Y,,0.0,,6.38,4.27,8.79,3.87
6,1007,100,"Cheese, camembert","CHEESE,CAMEMBERT",,,Y,,0.0,,6.38,4.27,8.79,3.87
7,1008,100,"Cheese, caraway","CHEESE,CARAWAY",,,,,0.0,,6.38,4.27,8.79,3.87
8,1009,100,"Cheese, cheddar","CHEESE,CHEDDAR",,,Y,,0.0,,,,,
9,1010,100,"Cheese, cheshire","CHEESE,CHESHIRE",,,,,0.0,,6.38,4.27,8.79,3.87


#### Primer 3: mpls_stops.csv

In [26]:
pd.read_csv("data/mpls_stops.csv", nrows=5)

Unnamed: 0.1,Unnamed: 0,id Num,date,problem,MDC,citation Issued,person Search,vehicle Search,pre Race,race,gender,lat,long,police Precinct,neighborhood
0,,idNum,date,problem,MDC,citationIssued,personSearch,vehicleSearch,preRace,race,gender,lat,long,policePrecinct,neighborhood
1,6823.0,17-000003,2017-01-01 00:00:42,suspicious,MDC,,NO,NO,Unknown,Unknown,Unknown,44.96661711,-93.24645826,1,Cedar Riverside
2,6824.0,17-000007,2017-01-01 00:03:07,suspicious,MDC,,NO,NO,Unknown,Unknown,Male,44.98045,-93.27134,1,Downtown West
3,6825.0,17-000073,2017-01-01 00:23:15,traffic,MDC,,NO,NO,Unknown,White,Female,44.94835,-93.27538,5,Whittier
4,6826.0,17-000092,2017-01-01 00:33:48,suspicious,MDC,,NO,NO,Unknown,East African,Male,44.94836,-93.28135,5,Whittier


najprej preberemo samo za to da si uredimo poimenovanje stolpcev

In [30]:
mpls = pd.read_csv("data/mpls_stops.csv", nrows=2)
mpls_columns = mpls.columns
new_mpls_columns = [name.lower().replace(" ","_") for name in mpls_columns]
new_mpls_columns[0]="case_number_id"
print(new_mpls_columns)

['case_number_id', 'id_num', 'date', 'problem', 'mdc', 'citation_issued', 'person_search', 'vehicle_search', 'pre_race', 'race', 'gender', 'lat', 'long', 'police_precinct', 'neighborhood']


Sedaj zares preberemo in sproti še počistimo:
- določimo kaj so imena (names)
- skipamo vrstice (skiprows)
- vnaprej določimo katere vrednosti so True in katere False (true_values, false_values)
- vnaprej določimo datetime stolpce (parse_dates)
- naredimo svoj date parser z lamda funkcijo (date_parser)
- določimo kaj so NaN vrednosti (na_values)
- sprememba podatkovnega tipa ob branju (dtype)
- pretvorbe podatkovnega tipa po branju v tem primeru za index: npr: float -> int

In [50]:
from datetime import datetime

mpls = pd.read_csv("data/mpls_stops.csv",
names=new_mpls_columns,
skiprows=2,
true_values=["YES"],
false_values=["NO"],
index_col="case_number_id",
parse_dates=["date"],
date_parser= lambda datum: datetime.strptime(datum, '%Y-%m-%d %H:%M:%S'),
dtype={"problem":"category", "citation_issued": "float", "gender": "category"},
na_values=["Unknown"],
nrows=10
)

mpls.index = mpls.index.astype("int")

mpls.info()
mpls.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 6823 to 6832
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   id_num           10 non-null     object        
 1   date             10 non-null     datetime64[ns]
 2   problem          10 non-null     category      
 3   mdc              10 non-null     object        
 4   citation_issued  0 non-null      float64       
 5   person_search    10 non-null     bool          
 6   vehicle_search   10 non-null     bool          
 7   pre_race         0 non-null      float64       
 8   race             8 non-null      object        
 9   gender           9 non-null      category      
 10  lat              10 non-null     float64       
 11  long             10 non-null     float64       
 12  police_precinct  10 non-null     int64         
 13  neighborhood     10 non-null     object        
dtypes: bool(2), category(2), datetime64[ns]

Unnamed: 0_level_0,id_num,date,problem,mdc,citation_issued,person_search,vehicle_search,pre_race,race,gender,lat,long,police_precinct,neighborhood
case_number_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
6823,17-000003,2017-01-01 00:00:42,suspicious,MDC,,False,False,,,,44.966617,-93.246458,1,Cedar Riverside
6824,17-000007,2017-01-01 00:03:07,suspicious,MDC,,False,False,,,Male,44.98045,-93.27134,1,Downtown West
6825,17-000073,2017-01-01 00:23:15,traffic,MDC,,False,False,,White,Female,44.94835,-93.27538,5,Whittier
6826,17-000092,2017-01-01 00:33:48,suspicious,MDC,,False,False,,East African,Male,44.94836,-93.28135,5,Whittier
6827,17-000098,2017-01-01 00:37:58,traffic,MDC,,False,False,,White,Female,44.979078,-93.262076,1,Downtown West


#### Izberemo engine ob branju:
C dela hitreje, podpira pa manj argumentov

In [54]:
%timeit mpls = pd.read_csv('data/mpls_stops.csv', names=new_mpls_columns, skiprows=2, engine='python')

224 ms ± 5.85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [55]:
%timeit mpls = pd.read_csv('data/mpls_stops.csv', names=new_mpls_columns, skiprows=2, engine='c')

80.6 ms ± 536 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### Primer 4: iperf.txt

Težava pri tovrstnih podatkih -> ni standarden tip/format
- polno predlsedkov brez separatorjev
- na vrhu datoteke nekaj tekstovnih opisov, ki niso podatki

V teh primerih si pomagamo s python string funkcijami, datetime knjižnico itd... še predno vse skupaj naložimo v pandas

In [None]:
# skopiraj si ta del kode iz Leonovega repozitorija

### Reading JSON files

#### Orient options

In [56]:
dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)), columns=list('ABC'), index=list('xyz'))

In [57]:
dfjo

Unnamed: 0,A,B,C
x,1,4,7
y,2,5,8
z,3,6,9


<table class="colwidths-given table">
<colgroup>
<col style="width: 12%">
<col style="width: 88%">
</colgroup>
<tbody>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">split</span></code></p></td>
<td><p>dict like {index -&gt; [index], columns -&gt; [columns], data -&gt; [values]}</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">records</span></code></p></td>
<td><p>list like [{column -&gt; value}, … , {column -&gt; value}]</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">index</span></code></p></td>
<td><p>dict like {index -&gt; {column -&gt; value}}</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">columns</span></code></p></td>
<td><p>dict like {column -&gt; {index -&gt; value}}</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">values</span></code></p></td>
<td><p>just the values array</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">table</span></code></p></td>
<td><p>adhering to the JSON <a class="reference external" href="https://specs.frictionlessdata.io/json-table-schema/">Table Schema</a></p></td>
</tr>
</tbody>
</table>

In [66]:
dfjo.to_json(orient="columns")

'{"A":{"x":1,"y":2,"z":3},"B":{"x":4,"y":5,"z":6},"C":{"x":7,"y":8,"z":9}}'

In [65]:
dfjo.to_json(orient="index")

'{"x":{"A":1,"B":4,"C":7},"y":{"A":2,"B":5,"C":8},"z":{"A":3,"B":6,"C":9}}'

In [64]:
dfjo.to_json(orient="records") # slabost: ne kodira indeksov

'[{"A":1,"B":4,"C":7},{"A":2,"B":5,"C":8},{"A":3,"B":6,"C":9}]'

In [67]:
dfjo.to_json(orient="values")

'[[1,4,7],[2,5,8],[3,6,9]]'

In [62]:
dfjo.to_json(orient="split")

'{"columns":["A","B","C"],"index":["x","y","z"],"data":[[1,4,7],[2,5,8],[3,6,9]]}'

In [61]:
dfjo.to_json(orient="table")

'{"schema":{"fields":[{"name":"index","type":"string"},{"name":"A","type":"integer"},{"name":"B","type":"integer"},{"name":"C","type":"integer"}],"primaryKey":["index"],"pandas_version":"1.4.0"},"data":[{"index":"x","A":1,"B":4,"C":7},{"index":"y","A":2,"B":5,"C":8},{"index":"z","A":3,"B":6,"C":9}]}'

#### Primer: ocenas.json

#### Primer: temperatures.json

#### Primer: cities.json

#### Primer: transactions.json

#### Primer: all_hour_geo.json

#### Primer: rates.json

### Python Pickle Format

In [69]:
# pripravimo datoteko za pisnaje v pickle format
titanic = pd.read_csv('data/titanic_sub.csv', 
                     index_col='PassengerId',
                     usecols=['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'Fare', 'Cabin', 'Embarked'])

In [70]:
titanic.head()

Unnamed: 0_level_0,Survived,Pclass,Sex,Age,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,3,male,22.0,7.25,,S
2,1,1,female,38.0,71.2833,C85,C
3,1,3,female,26.0,7.925,,S
4,1,1,female,35.0,53.1,C123,S
5,0,3,male,35.0,8.05,,S


In [71]:
titanic.to_pickle("data/titanic_sub.pkl")

In [74]:
titanic_read = pd.read_pickle('data/titanic_sub.pkl')
titanic_read

Unnamed: 0_level_0,Survived,Pclass,Sex,Age,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,0,3,male,22.0,7.2500,,S
2,1,1,female,38.0,71.2833,C85,C
3,1,3,female,26.0,7.9250,,S
4,1,1,female,35.0,53.1000,C123,S
5,0,3,male,35.0,8.0500,,S
...,...,...,...,...,...,...,...
887,0,2,male,27.0,13.0000,,S
888,1,1,female,19.0,30.0000,B42,S
889,0,3,female,,23.4500,,S
890,1,1,male,26.0,30.0000,C148,C


### Excel files

In [78]:
# opcija 2
df_2002 = pd.read_excel("data/battledeath.xlsx",sheet_name="2002")


In [None]:
df_2004 = pd.read_excel("data/battledeath.xlsx",sheet_name="2004")

In [85]:
# opcija 2
with pd.ExcelFile("data/battledeath.xlsx") as xls:
    # dir(xls) # ugotovimo metode ki so na voljo
    xls.sheet_names
    df = pd.read_excel(xls, sheet_name="2004")
    print(df)

# domača naloga - DN ali lahko beremo direktno tabele?


    War(country)      2004
0    Afghanistan  9.451028
1        Albania  0.130354
2        Algeria  3.407277
3        Andorra  0.000000
4         Angola  2.597931
..           ...       ...
187    Venezuela  0.000000
188      Vietnam  0.037507
189  Yemen, Rep.  3.602868
190       Zambia  0.041963
191     Zimbabwe  0.509568

[192 rows x 2 columns]
