# TEI Tools

> Tools for working with TEI/XML

In [None]:
#| default_exp api

In [None]:
#| hide
from nbdev.showdoc import *
from bs4 import BeautifulSoup

In [None]:
#| export
class TeiTools:
    def __init__(self):
        '''
        This is the main class for the tei-tools package.'''
        pass

    @staticmethod
    def hello_world(title):
        '''
        This is a static method that prints "Hello World"
        
        Parameters:
        * title: str  -  The title of the book
        '''
        
        print("Hello World")

    @staticmethod
    def analyze(path):
        '''
        指定したファイルに含まれる要素および属性を抽出する
        
        Parameters:
        * path: str  -  Path to the TEI/XML file
        '''

        soup = BeautifulSoup(open(path,'r'), "xml")
            
        children = soup.find("TEI").findChildren(recursive=False)

        freq = {}

        for child in children:
            p = child.name

            if p not in freq:
                freq[p] = {}

            elements = child.findChildren(recursive=True)
            # print(len(elements))

            m = freq[p]

            for element in elements:
                name = element.name
                if name not in m:
                    m[name] = {}

                attrs = element.attrs
                if len(attrs) == 0:
                    field = "none"
                    if field not in m[name]:
                        m[name][field] = 0
                    m[name][field] += 1
                for attr in attrs:
                    field = attr
                    if field not in m[name]:
                        m[name][field] = 0
                    m[name][field] += 1
        
        return freq

In [None]:
show_doc(TeiTools.hello_world)

---

### TeiTools.hello_world

>      TeiTools.hello_world (title)

This is a static method that prints "Hello World"

Parameters:
* title: str  -  The title of the book

In [None]:
TeiTools.hello_world("abc")

Hello World


サンプルデータとして、校異源氏物語のTEI/XMLをダウンロード

In [None]:
!git clone https://github.com/kouigenjimonogatari/kouigenjimonogatari.github.io sample

fatal: destination path 'sample' already exists and is not an empty directory.


## 要素の分析

In [None]:
show_doc(TeiTools.analyze)

---

### TeiTools.analyze

>      TeiTools.analyze (path)

指定したファイルに含まれる要素および属性を抽出する

Parameters:
* path: str  -  Path to the TEI/XML file

In [None]:
input_path = "sample/tei/01.xml"
results = TeiTools.analyze(input_path)

33
51
706


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()