In [1]:
import numpy as np
import pandas as pd  # pandas実装内部では、numpyをimportして使っている

s = pd.Series([1, 2, 3])
s

0    1
1    2
2    3
dtype: int64

In [2]:
s.sum()

6

In [3]:
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df

Unnamed: 0,A,B
0,1,3
1,2,4


In [4]:
df = pd.DataFrame(
    np.random.randn(6, 4),
    index=pd.date_range('20240101', periods=6),  # 6日間の日付
    columns=list('ABCD')
)
df

Unnamed: 0,A,B,C,D
2024-01-01,-0.874699,1.455105,1.422594,0.301177
2024-01-02,0.365047,0.130609,0.120021,-1.477649
2024-01-03,0.901611,-0.71319,-0.153988,-2.25282
2024-01-04,1.391324,-0.3148,2.210178,-0.030623
2024-01-05,-1.15775,1.252721,-1.593243,-1.921778
2024-01-06,-0.078472,-1.350248,0.296741,-0.116334


In [5]:
df.T

Unnamed: 0,2024-01-01,2024-01-02,2024-01-03,2024-01-04,2024-01-05,2024-01-06
A,-0.874699,0.365047,0.901611,1.391324,-1.15775,-0.078472
B,1.455105,0.130609,-0.71319,-0.3148,1.252721,-1.350248
C,1.422594,0.120021,-0.153988,2.210178,-1.593243,0.296741
D,0.301177,-1.477649,-2.25282,-0.030623,-1.921778,-0.116334


In [6]:
df.head(1)

Unnamed: 0,A,B,C,D
2024-01-01,-0.874699,1.455105,1.422594,0.301177


In [7]:
df.tail(3)

Unnamed: 0,A,B,C,D
2024-01-04,1.391324,-0.3148,2.210178,-0.030623
2024-01-05,-1.15775,1.252721,-1.593243,-1.921778
2024-01-06,-0.078472,-1.350248,0.296741,-0.116334


In [8]:
df.index

DatetimeIndex(['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04',
               '2024-01-05', '2024-01-06'],
              dtype='datetime64[ns]', freq='D')

In [9]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
df.values

array([[-0.87469942,  1.45510502,  1.42259413,  0.30117695],
       [ 0.36504742,  0.13060925,  0.12002119, -1.4776487 ],
       [ 0.90161052, -0.71318971, -0.15398762, -2.25281991],
       [ 1.39132388, -0.31479953,  2.21017846, -0.03062266],
       [-1.15775032,  1.25272122, -1.59324274, -1.92177816],
       [-0.07847167, -1.35024833,  0.29674125, -0.11633381]])

In [11]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.091177,0.0767,0.383717,-0.916338
std,0.994355,1.104474,1.318319,1.097179
min,-1.15775,-1.350248,-1.593243,-2.25282
25%,-0.675642,-0.613592,-0.085485,-1.810746
50%,0.143288,-0.092095,0.208381,-0.796991
75%,0.76747,0.972193,1.141131,-0.05205
max,1.391324,1.455105,2.210178,0.301177


In [12]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2024-01-06,-0.078472,-1.350248,0.296741,-0.116334
2024-01-03,0.901611,-0.71319,-0.153988,-2.25282
2024-01-04,1.391324,-0.3148,2.210178,-0.030623
2024-01-02,0.365047,0.130609,0.120021,-1.477649
2024-01-05,-1.15775,1.252721,-1.593243,-1.921778
2024-01-01,-0.874699,1.455105,1.422594,0.301177


In [13]:
df[0:3]

Unnamed: 0,A,B,C,D
2024-01-01,-0.874699,1.455105,1.422594,0.301177
2024-01-02,0.365047,0.130609,0.120021,-1.477649
2024-01-03,0.901611,-0.71319,-0.153988,-2.25282


In [14]:
df['20240101':'20240104']  # 4日目も含むので注意

Unnamed: 0,A,B,C,D
2024-01-01,-0.874699,1.455105,1.422594,0.301177
2024-01-02,0.365047,0.130609,0.120021,-1.477649
2024-01-03,0.901611,-0.71319,-0.153988,-2.25282
2024-01-04,1.391324,-0.3148,2.210178,-0.030623


In [15]:
df.loc['20240102']

A    0.365047
B    0.130609
C    0.120021
D   -1.477649
Name: 2024-01-02 00:00:00, dtype: float64

In [16]:
df.loc['20240102', ['A', 'B']]

A    0.365047
B    0.130609
Name: 2024-01-02 00:00:00, dtype: float64

In [17]:
df.loc['20240101':'20240104', ['A', 'B']]

Unnamed: 0,A,B
2024-01-01,-0.874699,1.455105
2024-01-02,0.365047,0.130609
2024-01-03,0.901611,-0.71319
2024-01-04,1.391324,-0.3148


In [18]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2024-01-01,-0.874699,1.455105
2024-01-02,0.365047,0.130609
2024-01-03,0.901611,-0.71319
2024-01-04,1.391324,-0.3148
2024-01-05,-1.15775,1.252721
2024-01-06,-0.078472,-1.350248


In [19]:
df.iloc[0, 0]

-0.874699417741453

In [20]:
df.iloc[0:2, 0:2]

Unnamed: 0,A,B
2024-01-01,-0.874699,1.455105
2024-01-02,0.365047,0.130609


In [21]:
# boolean indexing
df[df.A > 0]  # Trueを返す行のみ抽出

Unnamed: 0,A,B,C,D
2024-01-02,0.365047,0.130609,0.120021,-1.477649
2024-01-03,0.901611,-0.71319,-0.153988,-2.25282
2024-01-04,1.391324,-0.3148,2.210178,-0.030623


In [22]:
df[df > 0]  # 変数df自体をindex部に使用した場合: 条件を満たすデータのみ抽出し、他はNaN

Unnamed: 0,A,B,C,D
2024-01-01,,1.455105,1.422594,0.301177
2024-01-02,0.365047,0.130609,0.120021,
2024-01-03,0.901611,,,
2024-01-04,1.391324,,2.210178,
2024-01-05,,1.252721,,
2024-01-06,,,0.296741,


In [23]:
df2 = df.copy()
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']  # 新しい列を追加
df2

Unnamed: 0,A,B,C,D,E
2024-01-01,-0.874699,1.455105,1.422594,0.301177,one
2024-01-02,0.365047,0.130609,0.120021,-1.477649,one
2024-01-03,0.901611,-0.71319,-0.153988,-2.25282,two
2024-01-04,1.391324,-0.3148,2.210178,-0.030623,three
2024-01-05,-1.15775,1.252721,-1.593243,-1.921778,four
2024-01-06,-0.078472,-1.350248,0.296741,-0.116334,three


In [24]:
df2[df2['E'].isin(['one', 'four'])]  # Eの列がoneかfourの行を抽出

Unnamed: 0,A,B,C,D,E
2024-01-01,-0.874699,1.455105,1.422594,0.301177,one
2024-01-02,0.365047,0.130609,0.120021,-1.477649,one
2024-01-05,-1.15775,1.252721,-1.593243,-1.921778,four


In [25]:
df2.shift(1)  # 1行ずらす(新しい行を追加する準備に使うなど)

Unnamed: 0,A,B,C,D,E
2024-01-01,,,,,
2024-01-02,-0.874699,1.455105,1.422594,0.301177,one
2024-01-03,0.365047,0.130609,0.120021,-1.477649,one
2024-01-04,0.901611,-0.71319,-0.153988,-2.25282,two
2024-01-05,1.391324,-0.3148,2.210178,-0.030623,three
2024-01-06,-1.15775,1.252721,-1.593243,-1.921778,four


In [26]:
# 結合
df = pd.DataFrame(np.random.randn(2, 2))
df

Unnamed: 0,0,1
0,0.747319,0.093683
1,0.794083,0.507585


In [27]:
pd.concat([df, df])

Unnamed: 0,0,1
0,0.747319,0.093683
1,0.794083,0.507585
0,0.747319,0.093683
1,0.794083,0.507585


In [28]:
pd.concat([df, df], axis=1)

Unnamed: 0,0,1,0.1,1.1
0,0.747319,0.093683,0.747319,0.093683
1,0.794083,0.507585,0.794083,0.507585


In [29]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar'], 'B': np.random.randn(4)})
df

Unnamed: 0,A,B
0,foo,-0.558642
1,bar,2.765794
2,foo,-1.453033
3,bar,0.324757


In [30]:
# (A列のグループ毎の)B列の合計値
df.groupby('A').sum()

Unnamed: 0_level_0,B
A,Unnamed: 1_level_1
bar,3.090551
foo,-2.011675
