## 7.10. ファイルから読み込む/保存する

### 7.10.1 ファイルから読み込む

In [1]:
import pandas as pd
pd.set_option('display.notebook_repr_html', False)

df = pd.read_csv('OTg6QzA_activities.csv', header=0, names=['t','x','y','z'])
df.head()

                         t   x   y   z
0  2022-01-01 00:01:40.363  33  76  56
1  2022-01-01 00:01:42.961  34  87  56
2  2022-01-01 00:01:45.562  25  89  47
3  2022-01-01 00:01:48.163  11  70  50
4  2022-01-01 00:02:08.864  33  72  58

In [2]:
# usecolsで必要な列データだけを読み込むことができる
df = pd.read_csv('OTg6QzA_activities.csv', header=0, names=['t','y','z'], usecols=[0,2,3])
df.head()

                         t   y   z
0  2022-01-01 00:01:40.363  76  56
1  2022-01-01 00:01:42.961  87  56
2  2022-01-01 00:01:45.562  89  47
3  2022-01-01 00:01:48.163  70  50
4  2022-01-01 00:02:08.864  72  58

In [3]:
# index_colを用いてインデックスを生成できる
df = pd.read_csv('OTg6QzA_activities.csv', header=0, names=['t','x','y','z'], index_col='t')
df.head()

                          x   y   z
t                                  
2022-01-01 00:01:40.363  33  76  56
2022-01-01 00:01:42.961  34  87  56
2022-01-01 00:01:45.562  25  89  47
2022-01-01 00:01:48.163  11  70  50
2022-01-01 00:02:08.864  33  72  58

In [4]:
df.index

Index(['2022-01-01 00:01:40.363', '2022-01-01 00:01:42.961',
       '2022-01-01 00:01:45.562', '2022-01-01 00:01:48.163',
       '2022-01-01 00:02:08.864', '2022-01-01 00:02:21.765',
       '2022-01-01 00:02:32.092', '2022-01-01 00:02:37.297',
       '2022-01-01 00:03:10.795', '2022-01-01 00:03:31.301',
       ...
       '2022-01-31 23:25:15.974', '2022-01-31 23:27:32.381',
       '2022-01-31 23:27:42.580', '2022-01-31 23:27:50.282',
       '2022-01-31 23:27:55.381', '2022-01-31 23:28:00.481',
       '2022-01-31 23:28:41.683', '2022-01-31 23:28:49.383',
       '2022-01-31 23:32:36.426', '2022-01-31 23:32:44.126'],
      dtype='object', name='t', length=66281)

In [5]:
df1 = pd.read_csv('OTg6QzA_activities.csv', header=0,
                  names=['t','x','y','z'])
df1

                             t   x    y    z
0      2022-01-01 00:01:40.363  33   76   56
1      2022-01-01 00:01:42.961  34   87   56
2      2022-01-01 00:01:45.562  25   89   47
3      2022-01-01 00:01:48.163  11   70   50
4      2022-01-01 00:02:08.864  33   72   58
...                        ...  ..  ...  ...
66276  2022-01-31 23:28:00.481 -91  143  136
66277  2022-01-31 23:28:41.683 -89  145  138
66278  2022-01-31 23:28:49.383 -93  138  137
66279  2022-01-31 23:32:36.426 -93  131  137
66280  2022-01-31 23:32:44.126 -91  129  138

[66281 rows x 4 columns]

In [6]:
df1[60000:]

                             t   x    y    z
60000  2022-01-29 11:33:16.265 -94  112  140
60001  2022-01-29 11:33:18.866 -92  121  141
60002  2022-01-29 11:33:21.466 -82  115  143
60003  2022-01-29 11:33:24.066 -96  120  141
60004  2022-01-29 11:33:26.666 -94  120  142
...                        ...  ..  ...  ...
66276  2022-01-31 23:28:00.481 -91  143  136
66277  2022-01-31 23:28:41.683 -89  145  138
66278  2022-01-31 23:28:49.383 -93  138  137
66279  2022-01-31 23:32:36.426 -93  131  137
66280  2022-01-31 23:32:44.126 -91  129  138

[6281 rows x 4 columns]

In [7]:
# skiprows引数を付与し、一部のデータのみを読み込む
df2 = pd.read_csv('OTg6QzA_activities.csv', header=0,
                  names=['t','x','y','z'], skiprows=60000)
df2

                            t   x    y    z
0     2022-01-29 11:33:16.265 -94  112  140
1     2022-01-29 11:33:18.866 -92  121  141
2     2022-01-29 11:33:21.466 -82  115  143
3     2022-01-29 11:33:24.066 -96  120  141
4     2022-01-29 11:33:26.666 -94  120  142
...                       ...  ..  ...  ...
6276  2022-01-31 23:28:00.481 -91  143  136
6277  2022-01-31 23:28:41.683 -89  145  138
6278  2022-01-31 23:28:49.383 -93  138  137
6279  2022-01-31 23:32:36.426 -93  131  137
6280  2022-01-31 23:32:44.126 -91  129  138

[6281 rows x 4 columns]

In [8]:
df3 = pd.read_csv('OTg6QzA_activities.csv', header=0,
                  names=['t','x','y','z'])
df3[:10]

                         t   x   y   z
0  2022-01-01 00:01:40.363  33  76  56
1  2022-01-01 00:01:42.961  34  87  56
2  2022-01-01 00:01:45.562  25  89  47
3  2022-01-01 00:01:48.163  11  70  50
4  2022-01-01 00:02:08.864  33  72  58
5  2022-01-01 00:02:21.765  21  68  58
6  2022-01-01 00:02:32.092  24  69  56
7  2022-01-01 00:02:37.297  27  74  56
8  2022-01-01 00:03:10.795  29  66  58
9  2022-01-01 00:03:31.301  27  78  57

In [9]:
# skiprows引数を付与し、一部のデータのみを読み込む
df4 = pd.read_csv('OTg6QzA_activities.csv', header=0,
                  names=['t','x','y','z'], skiprows=[1,3,5] )
df4[:10]

                         t   x   y   z
0  2022-01-01 00:01:42.961  34  87  56
1  2022-01-01 00:01:48.163  11  70  50
2  2022-01-01 00:02:21.765  21  68  58
3  2022-01-01 00:02:32.092  24  69  56
4  2022-01-01 00:02:37.297  27  74  56
5  2022-01-01 00:03:10.795  29  66  58
6  2022-01-01 00:03:31.301  27  78  57
7  2022-01-01 00:03:41.495  19  62  51
8  2022-01-01 00:03:43.995  29  69  59
9  2022-01-01 00:04:30.098  25  74  52

In [10]:
df5 = pd.read_csv('OTg6QzA_activities.csv', header=0, skiprows=60000,
                  nrows=5)
df5

   2022-01-29 11:33:13.665  -92  118  142
0  2022-01-29 11:33:16.265  -94  112  140
1  2022-01-29 11:33:18.866  -92  121  141
2  2022-01-29 11:33:21.466  -82  115  143
3  2022-01-29 11:33:24.066  -96  120  141
4  2022-01-29 11:33:26.666  -94  120  142

### 7.10.2 ファイルの保存

In [11]:
df = pd.read_csv('OTg6QzA_activities.csv', header=0, names=['t','x','y','z'])
df

                             t   x    y    z
0      2022-01-01 00:01:40.363  33   76   56
1      2022-01-01 00:01:42.961  34   87   56
2      2022-01-01 00:01:45.562  25   89   47
3      2022-01-01 00:01:48.163  11   70   50
4      2022-01-01 00:02:08.864  33   72   58
...                        ...  ..  ...  ...
66276  2022-01-31 23:28:00.481 -91  143  136
66277  2022-01-31 23:28:41.683 -89  145  138
66278  2022-01-31 23:28:49.383 -93  138  137
66279  2022-01-31 23:32:36.426 -93  131  137
66280  2022-01-31 23:32:44.126 -91  129  138

[66281 rows x 4 columns]

In [12]:
df.eval('FQ = (x >= 0 and y >= 200)', inplace=True)

In [13]:
df.query('FQ == True')

                             t    x    y    z    FQ
78     2022-01-01 04:05:01.958   62  559    5  True
79     2022-01-01 04:05:09.559   61  551    4  True
80     2022-01-01 04:34:16.548   17  469   20  True
81     2022-01-01 04:34:19.148   16  476   24  True
82     2022-01-01 04:34:21.751    4  473   31  True
...                        ...  ...  ...  ...   ...
65876  2022-01-31 15:18:52.945   96  233  114  True
65877  2022-01-31 15:18:55.545   75  239   80  True
65878  2022-01-31 15:18:58.144   95  243   90  True
65879  2022-01-31 15:19:00.746  111  255   78  True
66220  2022-01-31 22:36:03.919    1  234  101  True

[7823 rows x 5 columns]

In [14]:
df

                             t   x    y    z     FQ
0      2022-01-01 00:01:40.363  33   76   56  False
1      2022-01-01 00:01:42.961  34   87   56  False
2      2022-01-01 00:01:45.562  25   89   47  False
3      2022-01-01 00:01:48.163  11   70   50  False
4      2022-01-01 00:02:08.864  33   72   58  False
...                        ...  ..  ...  ...    ...
66276  2022-01-31 23:28:00.481 -91  143  136  False
66277  2022-01-31 23:28:41.683 -89  145  138  False
66278  2022-01-31 23:28:49.383 -93  138  137  False
66279  2022-01-31 23:32:36.426 -93  131  137  False
66280  2022-01-31 23:32:44.126 -91  129  138  False

[66281 rows x 5 columns]

In [15]:
# df.query('FQ == True')

In [16]:
df.to_csv('OTg6QzA_FQ.csv' , index_label='No')

In [17]:
df = pd.read_csv('OTg6QzA_activities.csv', header=0, names=['t','x','y','z'])
df.to_json('OTg6QzA_FQ.json')