In [None]:
import torch
torch.cuda.is_available()

In [None]:
!nvidia-smi

In [None]:
#1モデルの指定 PyTorchバージョン
!pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118


In [None]:
#2モデルの指定xformersバージョン
!pip install xformers==0.0.22


In [None]:
#モデルの指定numpyバージョン
pip install "numpy<2"

In [None]:
#main 環境設置
!pip install git+https://github.com/deepseek-ai/deepseek-vl2.git

In [None]:
#必要環境検査
!pip show transformers || pip install -q transformers && pip show transformers
!pip show accelerate || pip install -q accelerate && pip show accelerate
!pip show deepseek-vl2 || pip install -q git+https://github.com/deepseek-ai/deepseek-vl2.git && pip show deepseek-vl2


In [None]:
import torch
from google.colab import files
import os, csv, re
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
from deepseek_vl2.utils.io import load_pil_images

# ------------------------------
# ✅ OCR モードはdeepseek desu
# ------------------------------
class OCRModel:
    def __init__(self, model_path="deepseek-ai/deepseek-vl2-tiny"):
        self.processor = DeepseekVLV2Processor.from_pretrained(model_path)
        self.tokenizer = self.processor.tokenizer
        self.model = DeepseekVLV2ForCausalLM.from_pretrained(model_path, trust_remote_code=True)
        self.model.config.use_flash_attention = False
        self.model = self.model.to("cuda" if torch.cuda.is_available() else "cpu").eval()

    def predict(self, image_path):
        conversation = [
            {
                "role": "<|User|>",
                "content": """<image>\n
                言語設定：日本語。
                以下の項目だけ答えてください：
                - 国籍
                - 氏名
                - 性別:(男|女)
                - 生年月日
                - 住居地
                - 在留資格
                - 右上の番号は在留カード番号です:
                - 在留期間(PERIOD OF STAY):X年 (XXXX年XX月XX日)
                それ以外の説明や解説は不要です。""",
                "images": [image_path],
            },
            {"role": "<|Assistant|>", "content": ""},
        ]

        pil_images = load_pil_images(conversation)
        inputs = self.processor(conversations=conversation, images=pil_images, force_batchify=True)
        inputs = inputs.to(self.model.device)

        inputs_dict = dict(inputs)

        # ✅  float32　だけ
        for k, v in inputs_dict.items():
            if isinstance(v, torch.Tensor) and k in ["pixel_values", "images"]:
                inputs_dict[k] = v.to(torch.float32)

        inputs_embeds = self.model.prepare_inputs_embeds(**inputs_dict)

        outputs = self.model.generate(
            inputs_embeds=inputs_embeds,
            attention_mask=inputs_dict.get("attention_mask"),
            pad_token_id=self.tokenizer.eos_token_id,
            bos_token_id=self.tokenizer.bos_token_id,
            eos_token_id=self.tokenizer.eos_token_id,
            max_new_tokens=300,
            do_sample=False,
            use_cache=True,
        )

        return self.tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)

# ------------------------------
# ✅ 文字列
# ------------------------------
def extract_fields(text):
    def extract_multiline_address(text):
        match = re.search(r"住居地[:：]?\s*(.+)", text)
        return match.group(1).strip() if match else ""

    def extract_stay_period(text):
        match = re.search(r"在留期間(?:\(PERIOD OF STAY\))?.*?[:：]?\s*(\d+年).*?\(?(\d{4}年\d{1,2}月\d{1,2}日)", text)
        return (match.group(1), match.group(2)) if match else ("", "")

    def extract_card_number(text):
        match = re.search(r"右上の番号は在留カード番号です[:：]?\s*([A-Z0-9]+)", text)
        return match.group(1) if match else ""

    return {
        "国籍": re.search(r"国籍[:：]?\s*([^\s\n]+)", text).group(1) if re.search(r"国籍[:：]?\s*([^\s\n]+)", text) else "",
        "氏名": re.search(r"氏名[:：]?\s*(.+)", text).group(1).strip() if re.search(r"氏名[:：]?\s*(.+)", text) else "",
        "性別": re.search(r"性別[:：]?\s*(男|女)", text).group(1) if re.search(r"性別[:：]?\s*(男|女)", text) else "",
        "生年月日": re.search(r"生年月日[:：]?\s*(\d{4}年\d{1,2}月\d{1,2}日)", text).group(1) if re.search(r"生年月日[:：]?\s*(\d{4}年\d{1,2}月\d{1,2}日)", text) else "",
        "住居地": extract_multiline_address(text),
        "在留資格": re.search(r"在留資格[:：]?\s*(\S+)", text).group(1) if re.search(r"在留資格[:：]?\s*(\S+)", text) else "",
        "在留カード番号": extract_card_number(text),
        "在留期間_期間": extract_stay_period(text)[0],
        "在留期間_満了日": extract_stay_period(text)[1],
    }

# ------------------------------
# ✅ 保存 CSV
# ------------------------------
def save_to_csv(data_list, path):
    keys = ["学籍番号"] + [k for k in data_list[0].keys() if k != "学籍番号"]
    with open(path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        for row in data_list:
            writer.writerow(row)

# ------------------------------
# ✅ 一つファイル
# ------------------------------
def process_single():
    uploaded = files.upload()
    for filename in uploaded.keys():
        result = ocr.predict(filename)
        fields = extract_fields(result)
        fields["学籍番号"] = os.path.splitext(os.path.basename(filename))[0]
        save_to_csv([fields], "result_single.csv")
        files.download("result_single.csv")

# ------------------------------
# ✅ 多数ファイル
# ------------------------------
def process_batch():
    uploaded = files.upload()
    results = []
    for filename in uploaded.keys():
        result = ocr.predict(filename)
        fields = extract_fields(result)
        fields["学籍番号"] = os.path.splitext(os.path.basename(filename))[0]
        results.append(fields)
    save_to_csv(results, "result_batch.csv")
    files.download("result_batch.csv")

# ------------------------------
# ✅ OCRモデル初期化
# ------------------------------
ocr = OCRModel("deepseek-ai/deepseek-vl2-tiny")
print("✅ OCRモデル初期化完了。以下を実行してください：")
print("- process_single()：1枚の画像を認識")
print("- process_batch()：複数画像を一括認識")


In [None]:
#単ファイルOCR
process_single()

In [None]:
#多数ファイルOCR
process_batch()

In [None]:
#モード再開
import os
os.kill(os.getpid(), 9)

In [3]:
#全てのファイルを削除
!rm -rf /content/*

In [5]:
!ls /content