In [129]:
import pandas as pd

### リストからDfを作る

In [130]:
data = [[1, 2], [3, 4]]

In [131]:
df = pd.DataFrame(data, index=["row1", "row2"], columns=["col1", "col2"])

In [132]:
df

Unnamed: 0,col1,col2
row1,1,2
row2,3,4


### 辞書からデータフレームを作る

In [133]:
data = [{"p1": 1, "p2": 2}, {"p1": 1, "p2": 2}]

In [134]:
df = pd.DataFrame(data, index=["test1", "test2"])

In [135]:
df

Unnamed: 0,p1,p2
test1,1,2
test2,1,2


### 辞書の1列をindexにする

In [136]:
data = [{"p1": 1, "p2": 2, "pin": 1857}, {"p1": 3, "p2": 4, "pin": 1858}]

In [137]:
df = pd.DataFrame(data)

In [138]:
df

Unnamed: 0,p1,p2,pin
0,1,2,1857
1,3,4,1858


In [139]:
df = df.set_index("pin")

In [140]:
df

Unnamed: 0_level_0,p1,p2
pin,Unnamed: 1_level_1,Unnamed: 2_level_1
1857,1,2
1858,3,4


### 列を並び替える

In [172]:
df = df.reindex(columns=["p2", "p1"])

In [173]:
df

Unnamed: 0_level_0,p2,p1
pin,Unnamed: 1_level_1,Unnamed: 2_level_1
1857,,1
1858,,3


### 列を抽出する

In [174]:
data = [
    {"p1": 1, "p2": 2, "p3": 1, "p4": 2, "pin": 1857},
    {"p1": 3, "p2": 4, "p3": 3, "p4": 4, "pin": 1858},
]

In [175]:
df = pd.DataFrame(data)

In [176]:
df

Unnamed: 0,p1,p2,p3,p4,pin
0,1,2,1,2,1857
1,3,4,3,4,1858


In [177]:
df = df.set_index("pin")

In [178]:
df

Unnamed: 0_level_0,p1,p2,p3,p4
pin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1857,1,2,1,2
1858,3,4,3,4


In [184]:
df_tmp = df.loc[:, ["p2", "p1"]]

In [185]:
df_tmp

Unnamed: 0_level_0,p2,p1
pin,Unnamed: 1_level_1,Unnamed: 2_level_1
1857,2,1
1858,4,3


In [186]:
df_tmp2 = df.reindex(columns=["p1"])

In [187]:
df_tmp2

Unnamed: 0_level_0,p1
pin,Unnamed: 1_level_1
1857,1
1858,3


In [189]:
df_tmp3 = df.iloc[:, [1, 2]]

In [190]:
df_tmp3

Unnamed: 0_level_0,p2,p3
pin,Unnamed: 1_level_1,Unnamed: 2_level_1
1857,2,1
1858,4,3


### dfのindexをresetする

In [191]:
data = [
    {"p1": 1, "p2": 2, "p3": 1, "p4": 2, "pin": 1857},
    {"p1": 3, "p2": 4, "p3": 3, "p4": 4, "pin": 1858},
]

In [192]:
df = pd.DataFrame(data)

In [193]:
df.set_index("pin")

Unnamed: 0_level_0,p1,p2,p3,p4
pin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1857,1,2,1,2
1858,3,4,3,4


In [196]:
df = df.reset_index()

In [197]:
df

Unnamed: 0,index,p1,p2,p3,p4,pin
0,0,1,2,1,2,1857
1,1,3,4,3,4,1858


In [199]:
df.loc[:, ["p1", "p2"]]

Unnamed: 0,p1,p2
0,1,2
1,3,4


### カラムを抽出する

In [201]:
df

Unnamed: 0,index,p1,p2,p3,p4,pin
0,0,1,2,1,2,1857
1,1,3,4,3,4,1858


In [205]:
df.columns.tolist()

['index', 'p1', 'p2', 'p3', 'p4', 'pin']

In [206]:
df.columns

Index(['index', 'p1', 'p2', 'p3', 'p4', 'pin'], dtype='object')

### 値を抽出する

In [207]:
df.values

array([[   0,    1,    2,    1,    2, 1857],
       [   1,    3,    4,    3,    4, 1858]], dtype=int64)

In [209]:
df.values.tolist()

[[0, 1, 2, 1, 2, 1857], [1, 3, 4, 3, 4, 1858]]

### csvファイルを読み込む

In [230]:
import pandas as pd

df = pd.read_csv("./data/wavedata_wfx.csv", header=None)

In [235]:
df.set_axis(["a", "b"], axis=1)

Unnamed: 0,a,b
0,1,2
1,3,4
2,5,6


### 結合

In [238]:
df_new = pd.concat([df, df], axis=1)

In [239]:
df_new

Unnamed: 0,0,1,0.1,1.1
0,1,2,1,2
1,3,4,3,4
2,5,6,5,6


### 欠損値の対応

In [255]:
data = [
    {"p1": 1, "p2": 2, "p3": 1, "pin": 1857},
    {"p1": 3, "p2": 4, "p3": 3, "p4": 4, "pin": 1858},
]

In [256]:
df = pd.DataFrame(data)

In [257]:
df = df.set_index("pin")

In [258]:
df

Unnamed: 0_level_0,p1,p2,p3,p4
pin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1857,1,2,1,
1858,3,4,3,4.0


In [259]:
df.isnull()

Unnamed: 0_level_0,p1,p2,p3,p4
pin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1857,False,False,False,True
1858,False,False,False,False


In [262]:
df.dropna(how="any", axis=0)

Unnamed: 0_level_0,p1,p2,p3,p4
pin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1858,3,4,3,4.0


### copy

In [263]:
data = [1, 2]

In [264]:
df = pd.DataFrame(data)

In [265]:
df

Unnamed: 0,0
0,1
1,2


In [266]:
tmp = df

In [269]:
tmp["a"] = 1

In [272]:
tmp

Unnamed: 0,0,a
0,1,1
1,2,1


In [276]:
tmp = tmp.drop("a", axis=1)

In [277]:
tmp

Unnamed: 0,0
0,1
1,2


In [278]:
df

Unnamed: 0,0,a
0,1,1
1,2,1


In [279]:
data = [
    {"p1": 1, "p2": 2, "p3": 1, "pin": 1857},
    {"p1": 3, "p2": 4, "p3": 3, "p4": 4, "pin": 1858},
]

In [280]:
df = pd.DataFrame(data)

In [281]:
df

Unnamed: 0,p1,p2,p3,pin,p4
0,1,2,1,1857,
1,3,4,3,1858,4.0


In [282]:
df = df[df["pin"]==1857]

In [283]:
df

Unnamed: 0,p1,p2,p3,pin,p4
0,1,2,1,1857,
