In [3]:
import pandas as pd

In [2]:
%%writefile sample.csv
num1, num2, num3
1, 일, one
2, 이, two
3, 삼, three

Overwriting sample.csv


In [4]:
pd.read_csv('sample.csv')

Unnamed: 0,num1,num2,num3
0,1,일,one
1,2,이,two
2,3,삼,three


In [6]:
#특정한 열을 행의 인덱스로 사용할 때 'index_col'
pd.read_csv('sample.csv', index_col='num1')

Unnamed: 0_level_0,num2,num3
num1,Unnamed: 1_level_1,Unnamed: 2_level_1
1,일,one
2,이,two
3,삼,three


In [7]:
%%writefile sample3.txt
c1        c2        c3        c4
0.179181 -1.538472  1.347553  0.43381
1.024209  0.087307 -1.281997  0.49265
0.417899 -2.002308  0.255245 -1.10515

Writing sample3.txt


In [9]:
#데이터의 구분자가 쉼표가 아닐경우
# sep 인수를 지정하여 테이블로 나타낼 수 있다.
# \s+ 는 정규식
pd.read_table('sample3.txt', sep='\s+')

Unnamed: 0,c1,c2,c3,c4
0,0.179181,-1.538472,1.347553,0.43381
1,1.024209,0.087307,-1.281997,0.49265
2,0.417899,-2.002308,0.255245,-1.10515


In [10]:
%%writefile sample4.txt
파일제목 : sample4.txt
데이터 포맷 설명 :
num1, num2, num3
1,일,one
2,이,two
3,삼,three

Writing sample4.txt


In [11]:
 특정 행을 건너뛸때는 sikprow를 사용한다.
pd.read_csv('sample4.txt', skiprows=[0,1])

Unnamed: 0,num1,num2,num3
0,1,일,one
1,2,이,two
2,3,삼,three


In [15]:
%%writefile sample5.txt
num1, num2, num3
1,일,one
2, ,two
누락,삼,three

Overwriting sample5.txt


In [17]:
#특정 값을 NAN으로 처리할때는 no_value
data = pd.read_csv('sample5.txt', na_values=['누락'])
data

Unnamed: 0,num1,num2,num3
0,1.0,일,one
1,2.0,,two
2,,삼,three


In [23]:
# csv파일로 내보낼때는 to_csv(파일명)
data.to_csv('sample6.csv')
# type는 파일 내용을 읽어오는 함수
!type sample6.csv

,num1, num2, num3
0,1.0,�씪,one
1,2.0, ,two
2,,�궪,three


In [21]:
# 파일을 불러올때도 sep 인수 사용 가능
data.to_csv('sample7.txt', sep="|")
# type는 파일 내용을 읽어오는 함수
!type sample7.txt

|num1| num2| num3
0|1.0|�씪|one
1|2.0| |two
2||�궪|three


In [25]:
# NAN값 변경
data.to_csv('sample8.csv', na_rep='누락')
!type sample8.txt

,num1, num2, num3
0,1.0,�씪,one
1,2.0, ,two
2,�늻�씫,�궪,three


In [26]:
# 행 인덱스 바꾸기
data.index = ['a', 'b','c']
data

Unnamed: 0,num1,num2,num3
a,1.0,일,one
b,2.0,,two
c,,삼,three


In [29]:
# 외부 데이터 읽어오기
df = pd.read_csv("https://raw.githubusercontent.com/pandas-dev/pandas/master/doc/data/tips.csv")
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [30]:
# 보여줄 행의 개수 정하는 옵션.
pd.set_option("display.max_rows", 20)
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [31]:
# 데이터의 시작부분 출력
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [32]:
# 데이터의 끝부분 출력(행의 개수 선택)
df.tail(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
242,17.82,1.75,Male,No,Sat,Dinner,2
243,18.78,3.0,Female,No,Thur,Dinner,2


In [35]:
!pip install pandas-datareader

Collecting pandas-datareader

You should consider upgrading via the 'c:\users\user\anaconda3\python.exe -m pip install --upgrade pip' command.



  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
Installing collected packages: pandas-datareader
Successfully installed pandas-datareader-0.10.0


In [38]:
!pip install --user --upgrade pip



In [45]:
import datetime
dt_start = datetime.datetime(2015,1,1)
dt_end = "2015,12,31"

In [47]:
import pandas_datareader as pdr
gdp = pdr.get_data_fred('GS10')
gdp.tail()

Unnamed: 0_level_0,GS10
DATE,Unnamed: 1_level_1
2021-05-01,1.62
2021-06-01,1.52
2021-07-01,1.32
2021-08-01,1.28
2021-09-01,1.37
