# 판다스 데이터 입출력

In [1]:
import pandas as pd

## CSV 파일 입력

### 1. Magic 명령어
> % 로 시작하는 명령어 

> 파일 만들고 내용넣기

In [3]:
%%writefile sample1.csv
c1, c2, c3
1, 1.11, one
2, 2.22, two
3, 3.33, three

Overwriting sample1.csv


In [10]:
%%read_csv('sample1.csv')

UsageError: Cell magic `%%read_csv('//sample1.csv')` not found.


### 2. read_csv
> csv 파일 읽기

In [18]:
pd.read_csv('sample1.csv',header=None)

Unnamed: 0,0,1,2
0,c1,c2,c3
1,1,1.11,one
2,2,2.22,two
3,3,3.33,three


In [20]:
pd.read_csv('sample1.csv',names=['c1', 'c2', 'c3'])

Unnamed: 0,c1,c2,c3
0,c1,c2,c3
1,1,1.11,one
2,2,2.22,two
3,3,3.33,three


In [22]:
pd.read_csv('sample1.csv', index_col='c1')

Unnamed: 0_level_0,c2,c3
c1,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.11,one
2,2.22,two
3,3.33,three


### 3. csv 아닌 파일 read_csv

In [24]:
%%writefile sample3.txt
c1        c2        c3        c4
0.179181 -1.538472  1.347553  0.43381
1.024209  0.087307 -1.281997  0.49265
0.417899 -2.002308  0.255245 -1.10515

Overwriting sample3.txt


In [28]:
pd.read_csv('sample3.txt') #제대로 실행 x

Unnamed: 0,c1 c2 c3 c4
0,0.179181 -1.538472 1.347553 0.43381
1,1.024209 0.087307 -1.281997 0.49265
2,0.417899 -2.002308 0.255245 -1.10515


In [31]:
pd.read_csv('sample3.txt', sep='\s+') #txt 일떄 스페이스 공간 구분방법

Unnamed: 0,c1,c2,c3,c4
0,0.179181,-1.538472,1.347553,0.43381
1,1.024209,0.087307,-1.281997,0.49265
2,0.417899,-2.002308,0.255245,-1.10515


### 4. 자료파일중 불필요한 정보 있을시
> skiprows

In [32]:
%%writefile sample4.txt
파일 제목: sample4.txt
데이터 포맷의 설명:
c1, c2, c3
1, 1.11, one
2, 2.22, two
3, 3.33, three

Writing sample4.txt


In [36]:
pd.read_csv('sample4.txt', skiprows=[0,2])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


### 5. 파일에 특정한 값을 NaN으로 취급하고 싶으면
> na_values 

In [37]:
%%writefile sample5.csv
c1, c2, c3
1, 1.11, one
2, , two
누락, 3.33, three

Writing sample5.csv


In [41]:
pd.read_csv('sample5.csv', na_values=['누락']) ## , 만 제대로 찍혀있으면 공백생김

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


In [42]:
pd.read_csv('sample5.csv', na_values=['누락',' ']) # 공백으로 두는거는 좋지 않다

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


### 6. csv 파일 데이터 프레임화

In [44]:
df = pd.read_csv('sample5.csv', na_values=['누락',' '])
df

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


## CSV 파일 출력

In [59]:
df = pd.read_csv('sample5.csv', na_values=['누락',' '])
df

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


In [64]:
df.to_csv('sample6.csv')

In [65]:
pd.read_csv('sample6.csv')

Unnamed: 0.1,Unnamed: 0,c1,c2,c3
0,0,1.0,1.11,one
1,1,2.0,,two
2,2,,3.33,three


In [63]:
df.to_csv('sample7.csv', sep='|', index=False) ## 위에 unnamed 없애기 위해 index=

In [58]:
pd.read_csv('sample7.csv', sep='|')

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


## 인터넷 상의 CSV 파일 입력

In [66]:
df = pd.read_csv("https://raw.githubusercontent.com/datascienceschool/docker_rpython/master/data/titanic.csv")
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
