In [4]:
import pandas as pd
import numpy as np

### 텍스트 파일 읽기/쓰기

In [65]:
%%writefile example1.csv
a, b, c, d, e, text
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv

Overwriting example1.csv


In [2]:
!ls

example1.csv  sample_data


In [5]:
pd.read_csv('example1.csv')

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv


In [6]:
%%writefile example2.csv
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv

Writing example2.csv


In [7]:
pd.read_csv('example2.csv', header=None)

Unnamed: 0,0,1,2,3,4,5
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv


In [9]:
pd.read_csv('example2.csv', names=['a', 'b', 'c', 'd', 'e', 'text'])

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv


In [12]:
pd.read_csv('example2.csv', names=['a', 'b', 'c', 'd', 'e', 'text'], index_col='text')

Unnamed: 0_level_0,a,b,c,d,e
text,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
hi,1,2,3,4,5
pandas,6,7,8,9,10
csv,11,12,13,14,15


In [13]:
%%writefile example3.txt
  a     b     c
1 0.1  0.2  0.3
2 0.4  0.5  0.6
3 0.7 0.8 0.9

Writing example3.txt


In [14]:
pd.read_table('example3.txt', sep='\s+')

Unnamed: 0,a,b,c
1,0.1,0.2,0.3
2,0.4,0.5,0.6
3,0.7,0.8,0.9


In [16]:
%%writefile example4.csv
# 파일 설명
a, b, c, d, e, text
# 컬럼은 a, b, c, d, e와 text가 있음
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv

Writing example4.csv


In [19]:
pd.read_csv('example4.csv', skiprows=[0, 2])

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv


In [22]:
%%writefile example5.csv
a, b, c, d, e, text
1, 2, NA, 4, 5, hi
6, 7, 8, NULL, 10, pandas
11, NA, 13, 14, 15, csv

Overwriting example5.csv


In [23]:
pd.read_csv('example5.csv')

Unnamed: 0,a,b,c,d,e,text
0,1,2.0,,4.0,5,hi
1,6,7.0,8.0,,10,pandas
2,11,,13.0,14.0,15,csv


In [24]:
%%writefile example6.csv
a, b, c, d, e, text
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv
1, 2, 3, 4, 5, hi
6, 7, 8, 9, 10, pandas
11, 12, 13, 14, 15, csv

Writing example6.csv


In [25]:
pd.read_csv('example6.csv', nrows=5)

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv
3,1,2,3,4,5,hi
4,6,7,8,9,10,pandas


In [26]:
df = pd.read_csv('example6.csv')
df

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv
3,1,2,3,4,5,hi
4,6,7,8,9,10,pandas
5,11,12,13,14,15,csv
6,1,2,3,4,5,hi
7,6,7,8,9,10,pandas
8,11,12,13,14,15,csv
9,1,2,3,4,5,hi


In [27]:
df.to_csv('output.csv')

In [28]:
!cat output.csv

,a, b, c, d, e, text
0,1,2,3,4,5, hi
1,6,7,8,9,10, pandas
2,11,12,13,14,15, csv
3,1,2,3,4,5, hi
4,6,7,8,9,10, pandas
5,11,12,13,14,15, csv
6,1,2,3,4,5, hi
7,6,7,8,9,10, pandas
8,11,12,13,14,15, csv
9,1,2,3,4,5, hi
10,6,7,8,9,10, pandas
11,11,12,13,14,15, csv
12,1,2,3,4,5, hi
13,6,7,8,9,10, pandas
14,11,12,13,14,15, csv


In [29]:
dr = pd.date_range('2020-01-01', periods=10)
ts = pd.Series(np.arange(10), index=dr)
ts

2020-01-01    0
2020-01-02    1
2020-01-03    2
2020-01-04    3
2020-01-05    4
2020-01-06    5
2020-01-07    6
2020-01-08    7
2020-01-09    8
2020-01-10    9
Freq: D, dtype: int64

In [30]:
ts.to_csv('ts.csv', header=['value'])

In [31]:
!cat ts.csv

,value
2020-01-01,0
2020-01-02,1
2020-01-03,2
2020-01-04,3
2020-01-05,4
2020-01-06,5
2020-01-07,6
2020-01-08,7
2020-01-09,8
2020-01-10,9


In [57]:
%%writefile example.json
[{"a":1, "b":2, "c":3, "d":4, "e":5},
 {"a":6, "b":7, "c":8, "d":9, "e":10},
 {"a":11, "b":12, "c":13, "d":14, "e":15}]

 # json 파일 만들 때 ''(작은따옴표) 사용하면 read_json 할 때 ValueError 남

Overwriting example.json


In [58]:
!cat example.json

[{"a":1, "b":2, "c":3, "d":4, "e":5},
 {"a":6, "b":7, "c":8, "d":9, "e":10},
 {"a":11, "b":12, "c":13, "d":14, "e":15}]

In [59]:
pd.read_json('example.json')  

Unnamed: 0,a,b,c,d,e
0,1,2,3,4,5
1,6,7,8,9,10
2,11,12,13,14,15


In [60]:
ts.to_json('output.json')

In [61]:
!cat output.json

{"1577836800000":0,"1577923200000":1,"1578009600000":2,"1578096000000":3,"1578182400000":4,"1578268800000":5,"1578355200000":6,"1578441600000":7,"1578528000000":8,"1578614400000":9}

In [62]:
df.to_json('output.json')

In [63]:
!cat output.json

{"a":{"0":1,"1":6,"2":11,"3":1,"4":6,"5":11,"6":1,"7":6,"8":11,"9":1,"10":6,"11":11,"12":1,"13":6,"14":11}," b":{"0":2,"1":7,"2":12,"3":2,"4":7,"5":12,"6":2,"7":7,"8":12,"9":2,"10":7,"11":12,"12":2,"13":7,"14":12}," c":{"0":3,"1":8,"2":13,"3":3,"4":8,"5":13,"6":3,"7":8,"8":13,"9":3,"10":8,"11":13,"12":3,"13":8,"14":13}," d":{"0":4,"1":9,"2":14,"3":4,"4":9,"5":14,"6":4,"7":9,"8":14,"9":4,"10":9,"11":14,"12":4,"13":9,"14":14}," e":{"0":5,"1":10,"2":15,"3":5,"4":10,"5":15,"6":5,"7":10,"8":15,"9":5,"10":10,"11":15,"12":5,"13":10,"14":15}," text":{"0":" hi","1":" pandas","2":" csv","3":" hi","4":" pandas","5":" csv","6":" hi","7":" pandas","8":" csv","9":" hi","10":" pandas","11":" csv","12":" hi","13":" pandas","14":" csv"}}

### 이진 데이터 파일 읽기/쓰기

In [66]:
df = pd.read_csv('example1.csv')
df

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv


In [67]:
df.to_pickle('df_pickle')
pd.read_pickle('df_pickle')

Unnamed: 0,a,b,c,d,e,text
0,1,2,3,4,5,hi
1,6,7,8,9,10,pandas
2,11,12,13,14,15,csv


In [68]:
df = pd.DataFrame({'a':np.random.randn(100),
                   'b':np.random.randn(100),
                   'c':np.random.randn(100)})
df

Unnamed: 0,a,b,c
0,1.204988,-0.198334,-0.536155
1,1.334320,0.276890,0.221561
2,1.298035,-1.543549,0.695465
3,-0.727722,0.937370,-0.384016
4,0.033393,0.201834,-2.171184
...,...,...,...
95,-1.227406,-0.325282,-0.411787
96,-0.455761,-1.156967,0.326606
97,-0.196427,-0.974067,0.645184
98,-0.435536,0.493858,-0.470976


In [69]:
h = pd.HDFStore('data.h5')
h['obj1'] = df
h['obj1_col1'] = df['a']
h['obj1_col2'] = df['b']
h['obj1_col3'] = df['c']
h

<class 'pandas.io.pytables.HDFStore'>
File path: data.h5

In [70]:
h['obj1']

Unnamed: 0,a,b,c
0,1.204988,-0.198334,-0.536155
1,1.334320,0.276890,0.221561
2,1.298035,-1.543549,0.695465
3,-0.727722,0.937370,-0.384016
4,0.033393,0.201834,-2.171184
...,...,...,...
95,-1.227406,-0.325282,-0.411787
96,-0.455761,-1.156967,0.326606
97,-0.196427,-0.974067,0.645184
98,-0.435536,0.493858,-0.470976


In [71]:
h.put('obj2', df, format='table')

In [72]:
h.select('obj2', where=['index > 50 and index <= 60'])

Unnamed: 0,a,b,c
51,0.053989,-0.322717,0.910188
52,-2.318915,0.128592,1.220326
53,1.590394,-1.11582,-0.770429
54,0.122549,0.128416,-0.169774
55,-0.464706,-0.545434,1.025356
56,0.789919,0.996167,1.105495
57,-0.936007,-0.635059,3.051835
58,0.779845,0.691458,-0.59885
59,-1.763304,-2.859792,-0.275393
60,-0.945685,0.919806,1.370618


In [73]:
h.close()

In [74]:
df.to_hdf('data.h5', 'obj3', format='table')

In [75]:
pd.read_hdf('data.h5', 'obj3', where=['index < 10'])

Unnamed: 0,a,b,c
0,1.204988,-0.198334,-0.536155
1,1.33432,0.27689,0.221561
2,1.298035,-1.543549,0.695465
3,-0.727722,0.93737,-0.384016
4,0.033393,0.201834,-2.171184
5,1.35902,-0.907481,1.616598
6,-1.512363,0.104897,-0.095284
7,-1.929285,0.664853,0.431489
8,-0.735501,-0.375128,0.097968
9,-1.575761,-0.097121,-1.378763


In [79]:
df.to_excel('example.xlsx', 'Sheet1')

In [80]:
!ls

data.h5       example2.csv  example5.csv  example.xlsx	sample_data
df_pickle     example3.txt  example6.csv  output.csv	ts.csv
example1.csv  example4.csv  example.json  output.json


In [81]:
pd.read_excel('example.xlsx', 'Sheet1')

Unnamed: 0.1,Unnamed: 0,a,b,c
0,0,1.204988,-0.198334,-0.536155
1,1,1.334320,0.276890,0.221561
2,2,1.298035,-1.543549,0.695465
3,3,-0.727722,0.937370,-0.384016
4,4,0.033393,0.201834,-2.171184
...,...,...,...,...
95,95,-1.227406,-0.325282,-0.411787
96,96,-0.455761,-1.156967,0.326606
97,97,-0.196427,-0.974067,0.645184
98,98,-0.435536,0.493858,-0.470976
