## テキストファイルを読み込む

### 全データを読み込む

In [None]:
with open("./sample_data/README.md", "r") as f:
    text = f.read()

print(text)

This directory includes a few sample datasets to get you started.

*   `california_housing_data*.csv` is California housing data from the 1990 US
    Census; more information is available at:
    https://developers.google.com/machine-learning/crash-course/california-housing-data-description

*   `mnist_*.csv` is a small sample of the
    [MNIST database](https://en.wikipedia.org/wiki/MNIST_database), which is
    described at: http://yann.lecun.com/exdb/mnist/

*   `anscombe.json` contains a copy of
    [Anscombe's quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet); it
    was originally described in

    Anscombe, F. J. (1973). 'Graphs in Statistical Analysis'. American
    Statistician. 27 (1): 17-21. JSTOR 2682899.

    and our copy was prepared by the
    [vega_datasets library](https://github.com/altair-viz/vega_datasets/blob/4f67bdaad10f45e3549984e17e1b3088c731503d/vega_datasets/_data/anscombe.json).



### リストとして読み込む

In [None]:
with open("./sample_data/README.md") as f:
    lines = f.readlines()

lines

['This directory includes a few sample datasets to get you started.\n',
 '\n',
 '*   `california_housing_data*.csv` is California housing data from the 1990 US\n',
 '    Census; more information is available at:\n',
 '    https://developers.google.com/machine-learning/crash-course/california-housing-data-description\n',
 '\n',
 '*   `mnist_*.csv` is a small sample of the\n',
 '    [MNIST database](https://en.wikipedia.org/wiki/MNIST_database), which is\n',
 '    described at: http://yann.lecun.com/exdb/mnist/\n',
 '\n',
 '*   `anscombe.json` contains a copy of\n',
 "    [Anscombe's quartet](https://en.wikipedia.org/wiki/Anscombe%27s_quartet); it\n",
 '    was originally described in\n',
 '\n',
 "    Anscombe, F. J. (1973). 'Graphs in Statistical Analysis'. American\n",
 '    Statistician. 27 (1): 17-21. JSTOR 2682899.\n',
 '\n',
 '    and our copy was prepared by the\n',
 '    [vega_datasets library](https://github.com/altair-viz/vega_datasets/blob/4f67bdaad10f45e3549984e17e1b3088c7315

## テキストファイルに書き込む

In [None]:
with open("kakikomi.txt", "w") as f:
    f.write("これはサンプル文です。")

In [None]:
with open("kakikomi-multiline.txt", "w") as f:
    f.write("おはよう\nこんにちは\nおやすみ\n")

In [None]:
sample = """これはサンプル文です。
このように...
複数行にまたがる複数行の文字列も
しっかり書き込めます。"""
with open("kakikomi-here.txt", "w") as f:
    f.write(sample)

## さまざまなデータを読み書きする


### テーブルデータ（CSV）

#### 読み込み

In [None]:
import csv

In [None]:
with open("./sample_data/california_housing_test.csv") as f:
    reader = csv.reader(f)
    content = list(reader)
    # content = [line for line in reader] でも可

content[:5]

[['longitude',
  'latitude',
  'housing_median_age',
  'total_rooms',
  'total_bedrooms',
  'population',
  'households',
  'median_income',
  'median_house_value'],
 ['-122.050000',
  '37.370000',
  '27.000000',
  '3885.000000',
  '661.000000',
  '1537.000000',
  '606.000000',
  '6.608500',
  '344700.000000'],
 ['-118.300000',
  '34.260000',
  '43.000000',
  '1510.000000',
  '310.000000',
  '809.000000',
  '277.000000',
  '3.599000',
  '176500.000000'],
 ['-117.810000',
  '33.780000',
  '27.000000',
  '3589.000000',
  '507.000000',
  '1484.000000',
  '495.000000',
  '5.793400',
  '270500.000000'],
 ['-118.360000',
  '33.820000',
  '28.000000',
  '67.000000',
  '15.000000',
  '49.000000',
  '11.000000',
  '6.135900',
  '330000.000000']]

In [None]:
len(content)

3001

In [None]:
print(content[0][0])
print(content[1][0])

longitude
-122.050000


#### 書き込み

In [None]:
import csv

menu_data = [
    ["商品名", "説明", "値段"],
    ["特製ハンバーガー", "特製パティを使ったジューシーなハンバーガー", 1200],
    ["シーフードパスタ", "甘みとコクが特長のシーフードパスタ", 1800],
    ["野菜たっぷりサラダ", "新鮮な季節野菜がたっぷりのヘルシーサラダ", 900],
    ["シェフ特製デザート", "シェフ自慢のデザートの盛り合わせ", 1500],
    ["贅沢フルコースコース", "前菜、スープ、メイン、デザートが楽しめるフルコース", 4500],
    ["季節限定ドリンク", "旬のフルーツを使用した季節限定の特製ドリンク", 800]
]

with open("menu.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(menu_data)

In [None]:
archery_data = [
    ["商品名", "説明", "価格"],
    ["トップレベルリカーブボウ", "プロフェッショナル向けのトップレベルリカーブボウ", 35000],
    ["エントリーレベルリカーブボウ", "初心者向けのエントリーレベルリカーブボウ", 12000],
    ["コンパウンドボウ", "調整が容易で高い命中精度を持つコンパウンドボウ", 28000],
    ["矢（アロー）10本セット", "信頼性のある矢（アロー）のセット（10本）", 5000],
    ["アーチェリーグローブ", "手の保護に適したアーチェリーグローブ", 2500],
    ["アーチェリーターゲット", "耐久性が高く矢を止めやすいアーチェリーターゲット", 8000]
]

with open("archery.csv", "w") as f:
    writer = csv.writer(f)
    for row in archery_data:
    	writer.writerow(row)

In [None]:
archery_data = [
    ["商品名", "説明", "価格"],
    ["トップレベルリカーブボウ", "プロフェッショナル向けのトップレベルリカーブボウ", 35000],
    ["エントリーレベルリカーブボウ", "初心者向けのエントリーレベルリカーブボウ", 12000],
    ["コンパウンドボウ", "調整が容易で高い命中精度を持つコンパウンドボウ", 28000],
    ["矢（アロー）10本セット", "信頼性のある矢（アロー）のセット（10本）", 5000],
    ["アーチェリーグローブ", "手の保護に適したアーチェリーグローブ", 2500],
    ["アーチェリーターゲット", "耐久性が高く矢を止めやすいアーチェリーターゲット", 8000]
]

budget = 10000 # 予算を決める
with open("archery-cheap.csv", "w") as f:
    writer = csv.writer(f)
    for i, row in enumerate(archery_data):
        if i: # ヘッダーの場合はFalseと同等の0になるため、該当しない
            if row[-1] > budget: # 価格が予算を超える行をスキップする
                continue
        writer.writerow(row)

### JSONデータ

In [None]:
!wget https://huggingface.co/google/flan-t5-xxl/raw/main/config.json

--2023-08-29 09:40:54--  https://huggingface.co/google/flan-t5-xxl/raw/main/config.json
Resolving huggingface.co (huggingface.co)... 18.154.227.87, 18.154.227.69, 18.154.227.7, ...
Connecting to huggingface.co (huggingface.co)|18.154.227.87|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 674 [text/plain]
Saving to: ‘config.json’


2023-08-29 09:40:54 (421 MB/s) - ‘config.json’ saved [674/674]



In [None]:
import json

with open("./config.json") as f:
    data = json.load(f)

data

{'architectures': ['T5ForConditionalGeneration'],
 'd_ff': 10240,
 'd_kv': 64,
 'd_model': 4096,
 'decoder_start_token_id': 0,
 'dropout_rate': 0.1,
 'eos_token_id': 1,
 'feed_forward_proj': 'gated-gelu',
 'initializer_factor': 1.0,
 'is_encoder_decoder': True,
 'layer_norm_epsilon': 1e-06,
 'model_type': 't5',
 'num_decoder_layers': 24,
 'num_heads': 64,
 'num_layers': 24,
 'output_past': True,
 'pad_token_id': 0,
 'relative_attention_max_distance': 128,
 'relative_attention_num_buckets': 32,
 'tie_word_embeddings': False,
 'torch_dtype': 'float32',
 'transformers_version': '4.24.0.dev0',
 'use_cache': True,
 'vocab_size': 32128}

In [None]:
type(data)

dict

In [None]:
data["feed_forward_proj"]

'gated-gelu'

In [None]:
person_str = """{
    "name": "田中",
    "age": 25,
    "prefecture": "東京都",
    "hobby": [
        {
            "id": 1,
            "name": "映画鑑賞"
        },
        {
            "id": 2,
            "name": "ジョギング"
        }
    ]
}"""

person_data = json.loads(person_str)
type(person_data)

dict

In [None]:
person_data["hobby"][0]["name"]

'映画鑑賞'

In [None]:
company_data = {
  "company_name": "株式会社〇〇",
  "establishment_date": "2023-07-21",
  "founders": [
    {
      "name": "田中太郎",
      "position": "代表取締役社長"
    },
    {
      "name": "山田花子",
      "position": "取締役"
    }
  ],
  "capital": "1,000万円",
  "address": "東京都渋谷区〇〇町1-2-3",
  "contact": {
    "phone": "03-1234-5678",
    "email": "info@example.com"
  },
  "business_description": "情報技術に関するサービスの提供",
  "business_activities": [
    "ソフトウェア開発",
    "ウェブデザイン",
    "ITコンサルティング"
  ],
  "shareholders": [
    {
      "name": "株式会社ABC",
      "shares": "500株"
    },
    {
      "name": "山田花子",
      "shares": "300株"
    },
    {
      "name": "田中太郎",
      "shares": "200株"
    }
  ],
  "website": "https://example.com"
}

with open("company_data.json", "w") as f:
    json.dump(company_data, f)

In [None]:
with open("company_data.json", "w") as f:
    json.dump(company_data, f, ensure_ascii=False)

In [None]:
with open("company_data.json", "w") as f:
    json.dump(company_data, f, ensure_ascii=False, indent=2)

In [None]:
founders = [
    {'name': 'Aristotle Amedas', "share": 0.50, 'note': None},
    {'name': 'Erno Emerald', "share": 0.25, 'note': '譲渡制限付'},
]

with open("founders.json", "w") as f:
    json.dump(founders, f, ensure_ascii=False, indent=2)

### Pythonオブジェクト

In [None]:
class Suit:
    def __init__(self, color, size, brand):
        self.color = color
        self.size = size
        self.brand = brand

    def __call__(self, occasion="ビジネス"):
        return f"この{self.color}のスーツ（サイズ：{self.size}、ブランド：{self.brand}）は、{occasion}にぴったりです。"

my_suit = Suit(color="ピンク", size="L", brand="サンプルブランド")

In [None]:
import pickle

with open('suit.pkl', 'wb') as f:
    pickle.dump(my_suit, f)

In [None]:
# オブジェクトを読み込む
with open('suit.pkl', 'rb') as f:
    my_suit_reloaded = pickle.load(f)

In [None]:
my_suit_reloaded("パーティー")

'このピンクのスーツ（サイズ：L、ブランド：サンプルブランド）は、パーティーにぴったりです。'