In [1]:
import numpy as np
import pandas as pd

In [2]:
# 一次元
s = pd.Series([1, 2, np.nan])

In [3]:
s

0    1.0
1    2.0
2    NaN
dtype: float64

In [4]:
s[0]

1.0

In [5]:
s.sum()

3.0

In [6]:
# 二次元
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df

Unnamed: 0,A,B
0,1,3
1,2,4


In [7]:
df.dtypes

A    int64
B    int64
dtype: object

In [8]:
# 6 x 4 のランダムな値
df = pd.DataFrame(np.random.randn(6, 4))
df

Unnamed: 0,0,1,2,3
0,-2.154644,1.186934,-1.021715,1.477142
1,-0.329277,0.725475,-0.701764,-0.660422
2,-0.555419,0.398304,-1.17025,1.948584
3,-0.300914,-0.237531,0.762758,0.067884
4,1.179006,-0.4444,-1.083847,0.057104
5,0.130221,1.057053,-1.685407,-0.812048


In [9]:
# インデックス部分に日付を入れることができる
df = pd.DataFrame(np.random.randn(6, 4), index=pd.date_range('20220101', periods=6))

In [10]:
df

Unnamed: 0,0,1,2,3
2022-01-01,0.965022,-0.782042,-1.562792,0.217369
2022-01-02,0.774866,-0.04605,-0.999756,0.80319
2022-01-03,0.002557,-1.925166,-1.544157,0.196664
2022-01-04,-0.930639,0.161214,0.499294,0.921899
2022-01-05,-0.453628,0.25961,-0.490956,-0.876658
2022-01-06,0.002772,-1.95691,0.808614,1.451701


In [11]:
df = pd.DataFrame(np.random.randn(6, 4), index=pd.date_range('20220101', periods=6), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033
2022-01-04,1.545466,-0.756314,-0.678088,1.074305
2022-01-05,0.883891,-0.980443,0.9928,-0.39495
2022-01-06,0.707297,-0.924857,0.324163,-0.833199


In [12]:
# 上のデータの呼び出し
# 引数は上から何個目までの呼び出しをするか
df.head(2)

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505


In [13]:
# 下のデータの呼び出し
df.tail(3)

Unnamed: 0,A,B,C,D
2022-01-04,1.545466,-0.756314,-0.678088,1.074305
2022-01-05,0.883891,-0.980443,0.9928,-0.39495
2022-01-06,0.707297,-0.924857,0.324163,-0.833199


In [14]:
df.index

DatetimeIndex(['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04',
               '2022-01-05', '2022-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [16]:
df.values

array([[ 1.09895636,  0.50208646, -1.04226027,  0.07162276],
       [-0.08177983, -1.57583122, -1.31222937, -0.44850516],
       [-0.98011672,  0.61417777, -1.1942564 , -0.47303333],
       [ 1.54546597, -0.75631416, -0.67808785,  1.07430514],
       [ 0.88389077, -0.98044252,  0.9928    , -0.39495034],
       [ 0.70729725, -0.92485731,  0.32416315, -0.83319899]])

In [17]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.528952,-0.520197,-0.484978,-0.167293
std,0.912828,0.880632,0.935287,0.67327
min,-0.980117,-1.575831,-1.312229,-0.833199
25%,0.115489,-0.966546,-1.156257,-0.466901
50%,0.795594,-0.840586,-0.860174,-0.421728
75%,1.04519,0.187486,0.0736,-0.045021
max,1.545466,0.614178,0.9928,1.074305


In [18]:
# データの反転
df.T

Unnamed: 0,2022-01-01,2022-01-02,2022-01-03,2022-01-04,2022-01-05,2022-01-06
A,1.098956,-0.08178,-0.980117,1.545466,0.883891,0.707297
B,0.502086,-1.575831,0.614178,-0.756314,-0.980443,-0.924857
C,-1.04226,-1.312229,-1.194256,-0.678088,0.9928,0.324163
D,0.071623,-0.448505,-0.473033,1.074305,-0.39495,-0.833199


In [19]:
# B の昇順で表示している
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505
2022-01-05,0.883891,-0.980443,0.9928,-0.39495
2022-01-06,0.707297,-0.924857,0.324163,-0.833199
2022-01-04,1.545466,-0.756314,-0.678088,1.074305
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033


In [20]:
# スライスで 0番目からの3つを表示
df[0:3]

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033


In [21]:
df['20220102':'20220104']

Unnamed: 0,A,B,C,D
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033
2022-01-04,1.545466,-0.756314,-0.678088,1.074305


In [22]:
# 20220101 の横に並んだデータを取り出す
df.loc['20220101']

A    1.098956
B    0.502086
C   -1.042260
D    0.071623
Name: 2022-01-01 00:00:00, dtype: float64

In [23]:
df.loc['20220102', ['A', 'B']]

A   -0.081780
B   -1.575831
Name: 2022-01-02 00:00:00, dtype: float64

In [24]:
df.loc['20220102':'20220104', ['A', 'B']]

Unnamed: 0,A,B
2022-01-02,-0.08178,-1.575831
2022-01-03,-0.980117,0.614178
2022-01-04,1.545466,-0.756314


In [25]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2022-01-01,1.098956,0.502086
2022-01-02,-0.08178,-1.575831
2022-01-03,-0.980117,0.614178
2022-01-04,1.545466,-0.756314
2022-01-05,0.883891,-0.980443
2022-01-06,0.707297,-0.924857


In [26]:
# 0 の 0 にあるデータ(20220101, A)
# i をつけることによって整数でデータを呼び出すことができる(int)
df.iloc[0, 0]

1.0989563572031302

In [27]:
df.iloc[2, 3]

-0.4730333287976815

In [28]:
# 縦の 0~2番目のデータと横の 0~3番目のデータ
df.iloc[0:2, 0:3]

Unnamed: 0,A,B,C
2022-01-01,1.098956,0.502086,-1.04226
2022-01-02,-0.08178,-1.575831,-1.312229


In [29]:
# A のうちにある 0よりも大きいデータを呼び出す
df[df.A > 0]

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-04,1.545466,-0.756314,-0.678088,1.074305
2022-01-05,0.883891,-0.980443,0.9928,-0.39495
2022-01-06,0.707297,-0.924857,0.324163,-0.833199


In [30]:
df[df > 0]

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,,0.071623
2022-01-02,,,,
2022-01-03,,0.614178,,
2022-01-04,1.545466,,,1.074305
2022-01-05,0.883891,,0.9928,
2022-01-06,0.707297,,0.324163,


In [31]:
df

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033
2022-01-04,1.545466,-0.756314,-0.678088,1.074305
2022-01-05,0.883891,-0.980443,0.9928,-0.39495
2022-01-06,0.707297,-0.924857,0.324163,-0.833199


In [32]:
# コピーしてデータの追加
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2022-01-01,1.098956,0.502086,-1.04226,0.071623,one
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505,one
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033,two
2022-01-04,1.545466,-0.756314,-0.678088,1.074305,three
2022-01-05,0.883891,-0.980443,0.9928,-0.39495,four
2022-01-06,0.707297,-0.924857,0.324163,-0.833199,three


In [33]:
# df2 にある 'one' や 'four' が含まれているデータ
df2[df2['E'].isin(['one', 'four'])]

Unnamed: 0,A,B,C,D,E
2022-01-01,1.098956,0.502086,-1.04226,0.071623,one
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505,one
2022-01-05,0.883891,-0.980443,0.9928,-0.39495,four


In [34]:
df

Unnamed: 0,A,B,C,D
2022-01-01,1.098956,0.502086,-1.04226,0.071623
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033
2022-01-04,1.545466,-0.756314,-0.678088,1.074305
2022-01-05,0.883891,-0.980443,0.9928,-0.39495
2022-01-06,0.707297,-0.924857,0.324163,-0.833199


In [35]:
s = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20220101', periods=6))

In [36]:
s

2022-01-01    1
2022-01-02    2
2022-01-03    3
2022-01-04    4
2022-01-05    5
2022-01-06    6
Freq: D, dtype: int64

In [37]:
df['E'] = s

In [38]:
df

Unnamed: 0,A,B,C,D,E
2022-01-01,1.098956,0.502086,-1.04226,0.071623,1
2022-01-02,-0.08178,-1.575831,-1.312229,-0.448505,2
2022-01-03,-0.980117,0.614178,-1.194256,-0.473033,3
2022-01-04,1.545466,-0.756314,-0.678088,1.074305,4
2022-01-05,0.883891,-0.980443,0.9928,-0.39495,5
2022-01-06,0.707297,-0.924857,0.324163,-0.833199,6


In [39]:
# 値を一行データをずらすことができる
df.shift(1)

Unnamed: 0,A,B,C,D,E
2022-01-01,,,,,
2022-01-02,1.098956,0.502086,-1.04226,0.071623,1.0
2022-01-03,-0.08178,-1.575831,-1.312229,-0.448505,2.0
2022-01-04,-0.980117,0.614178,-1.194256,-0.473033,3.0
2022-01-05,1.545466,-0.756314,-0.678088,1.074305,4.0
2022-01-06,0.883891,-0.980443,0.9928,-0.39495,5.0


In [40]:
df = pd.DataFrame(np.random.randn(2, 2))

In [41]:
df

Unnamed: 0,0,1
0,0.225227,0.377279
1,0.205236,0.155443


In [42]:
# データをくっつけることができる
pd.concat([df, df])

Unnamed: 0,0,1
0,0.225227,0.377279
1,0.205236,0.155443
0,0.225227,0.377279
1,0.205236,0.155443


In [43]:
df = pd.DataFrame(np.random.rand(8, 4), columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,0.558699,0.830106,0.605302,0.628776
1,0.974276,0.203647,0.432165,0.059277
2,0.041422,0.245126,0.513718,0.173761
3,0.243906,0.888911,0.450714,0.119645
4,0.083429,0.331614,0.879413,0.097733
5,0.84835,0.104232,0.522244,0.121101
6,0.925025,0.649225,0.812027,0.980325
7,0.618044,0.764951,0.193637,0.224037


In [44]:
s = df.iloc[0]
s

A    0.558699
B    0.830106
C    0.605302
D    0.628776
Name: 0, dtype: float64

In [45]:
# append は現在非推奨？
# append の代わりに concat を使う
# そのまま使うとデータの整合性が取れなくなるので pd.DataFrame([s]) でデータを追加する
df = pd.concat([df, pd.DataFrame([s])])
df

Unnamed: 0,A,B,C,D
0,0.558699,0.830106,0.605302,0.628776
1,0.974276,0.203647,0.432165,0.059277
2,0.041422,0.245126,0.513718,0.173761
3,0.243906,0.888911,0.450714,0.119645
4,0.083429,0.331614,0.879413,0.097733
5,0.84835,0.104232,0.522244,0.121101
6,0.925025,0.649225,0.812027,0.980325
7,0.618044,0.764951,0.193637,0.224037
0,0.558699,0.830106,0.605302,0.628776


In [46]:
# そのまま concat すると インデックス番号が順番通りにならないので ignore_index=True をする
df = pd.concat([df, pd.DataFrame([s])], ignore_index=True)
df

Unnamed: 0,A,B,C,D
0,0.558699,0.830106,0.605302,0.628776
1,0.974276,0.203647,0.432165,0.059277
2,0.041422,0.245126,0.513718,0.173761
3,0.243906,0.888911,0.450714,0.119645
4,0.083429,0.331614,0.879413,0.097733
5,0.84835,0.104232,0.522244,0.121101
6,0.925025,0.649225,0.812027,0.980325
7,0.618044,0.764951,0.193637,0.224037
8,0.558699,0.830106,0.605302,0.628776
9,0.558699,0.830106,0.605302,0.628776


In [47]:
# グルーピング
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'], 'B': np.random.randn(4)})

In [48]:
df

Unnamed: 0,A,B
0,foo,-1.105524
1,bar,0.567248
2,foo,-0.849216
3,bar,0.043105


In [49]:
# A のデータをグルーピングして合計する
df.groupby('A').sum()

Unnamed: 0_level_0,B
A,Unnamed: 1_level_1
bar,0.610354
foo,-1.95474


In [50]:
import pandas_datareader

In [53]:
# yahoo の API で APPLE の株価を取得し df で返す
df = pandas_datareader.data.DataReader('AAPL', 'yahoo', '2022-01-01')

In [54]:
df

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-12-31,179.229996,177.259995,178.089996,177.570007,64062300.0,176.838242
2022-01-03,182.880005,177.710007,177.830002,182.009995,104487900.0,181.259918
2022-01-04,182.940002,179.119995,182.630005,179.699997,99310400.0,178.959457
2022-01-05,180.169998,174.639999,179.610001,174.919998,94537600.0,174.199158
2022-01-06,175.300003,171.639999,172.699997,172.000000,96904000.0,171.291183
...,...,...,...,...,...,...
2022-10-28,157.500000,147.820007,148.199997,155.740005,164762400.0,155.740005
2022-10-31,154.240005,151.919998,153.160004,153.339996,97943200.0,153.339996
2022-11-01,155.449997,149.130005,155.080002,150.649994,80379300.0,150.649994
2022-11-02,152.169998,145.000000,148.949997,145.029999,93604600.0,145.029999
